aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-11-23 10:29:05 -0500
committerThomas Gleixner <tglx@linutronix.de>2017-11-23 10:29:05 -0500
commit866c9b94ef968445c52214b3748ecc52a8491bca (patch)
tree1fd073acb9be8e89e77b35c41e2964ac6feabee6 /net
parentaea3706cfc4d952ed6d32b6d5845b5ecd99ed7f5 (diff)
parent841b86f3289dbe858daeceec36423d4ea286fac2 (diff)
Merge tag 'for-linus-timers-conversion-final-v4.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into timers/urgent
Pull the last batch of manual timer conversions from Kees Cook: - final batch of "non trivial" timer conversions (multi-tree dependencies, things Coccinelle couldn't handle, etc). - treewide conversions via Coccinelle, in 4 steps: - DEFINE_TIMER() functions converted to struct timer_list * argument - init_timer() -> setup_timer() - setup_timer() -> timer_setup() - setup_timer() -> timer_setup() (with a single embedded structure) - deprecated timer API removals (init_timer(), setup_*timer()) - finalization of new API (remove global casts)
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c6
-rw-r--r--net/802/mrp.c13
-rw-r--r--net/8021q/vlan.c9
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_netlink.c3
-rw-r--r--net/appletalk/aarp.c4
-rw-r--r--net/appletalk/ddp.c7
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/lec.c13
-rw-r--r--net/atm/mpc.c12
-rw-r--r--net/ax25/af_ax25.c7
-rw-r--r--net/ax25/ax25_ds_timer.c9
-rw-r--r--net/ax25/ax25_timer.c41
-rw-r--r--net/batman-adv/bat_iv_ogm.c24
-rw-r--r--net/batman-adv/bat_v.c11
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/bat_v_ogm.c12
-rw-r--r--net/batman-adv/distributed-arp-table.c4
-rw-r--r--net/batman-adv/gateway_client.c8
-rw-r--r--net/batman-adv/gateway_common.c18
-rw-r--r--net/batman-adv/hard-interface.c14
-rw-r--r--net/batman-adv/icmp_socket.c4
-rw-r--r--net/batman-adv/main.c12
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/originator.c26
-rw-r--r--net/batman-adv/routing.c6
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/soft-interface.c10
-rw-r--r--net/batman-adv/sysfs.c4
-rw-r--r--net/batman-adv/tp_meter.c16
-rw-r--r--net/bluetooth/a2mp.c2
-rw-r--r--net/bluetooth/amp.c4
-rw-r--r--net/bluetooth/ecdh_helper.c228
-rw-r--r--net/bluetooth/ecdh_helper.h9
-rw-r--r--net/bluetooth/hci_conn.c6
-rw-r--r--net/bluetooth/hci_core.c35
-rw-r--r--net/bluetooth/hci_event.c46
-rw-r--r--net/bluetooth/hci_request.c21
-rw-r--r--net/bluetooth/hci_request.h1
-rw-r--r--net/bluetooth/hci_sock.c17
-rw-r--r--net/bluetooth/hci_sysfs.c2
-rw-r--r--net/bluetooth/hidp/core.c7
-rw-r--r--net/bluetooth/mgmt.c57
-rw-r--r--net/bluetooth/rfcomm/core.c12
-rw-r--r--net/bluetooth/sco.c6
-rw-r--r--net/bluetooth/selftest.c48
-rw-r--r--net/bluetooth/smp.c149
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_arp_nd_proxy.c469
-rw-r--r--net/bridge/br_device.c27
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c24
-rw-r--r--net/bridge/br_input.c77
-rw-r--r--net/bridge/br_ioctl.c4
-rw-r--r--net/bridge/br_mdb.c54
-rw-r--r--net/bridge/br_multicast.c141
-rw-r--r--net/bridge/br_netlink.c129
-rw-r--r--net/bridge/br_netlink_tunnel.c14
-rw-r--r--net/bridge/br_private.h40
-rw-r--r--net/bridge/br_private_tunnel.h3
-rw-r--r--net/bridge/br_stp.c6
-rw-r--r--net/bridge/br_stp_if.c4
-rw-r--r--net/bridge/br_stp_timer.c50
-rw-r--r--net/bridge/br_sysfs_if.c22
-rw-r--r--net/bridge/br_vlan.c78
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/can/af_can.c4
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/proc.c4
-rw-r--r--net/ceph/ceph_hash.c12
-rw-r--r--net/ceph/crypto.c4
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/mon_client.c5
-rw-r--r--net/ceph/pagevec.c4
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c343
-rw-r--r--net/core/drop_monitor.c7
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c16
-rw-r--r--net/core/fib_notifier.c10
-rw-r--r--net/core/fib_rules.c15
-rw-r--r--net/core/filter.c336
-rw-r--r--net/core/flow_dissector.c130
-rw-r--r--net/core/gen_estimator.c6
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/core/neighbour.c32
-rw-r--r--net/core/net-sysfs.c28
-rw-r--r--net/core/net-traces.c3
-rw-r--r--net/core/net_namespace.c1
-rw-r--r--net/core/pktgen.c16
-rw-r--r--net/core/rtnetlink.c448
-rw-r--r--net/core/skbuff.c66
-rw-r--r--net/core/sock.c37
-rw-r--r--net/dccp/ccids/ccid2.c10
-rw-r--r--net/dccp/ccids/ccid2.h1
-rw-r--r--net/dccp/ccids/ccid3.c11
-rw-r--r--net/dccp/ccids/ccid3.h1
-rw-r--r--net/dccp/ccids/lib/packet_history.c4
-rw-r--r--net/dccp/input.c1
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/timer.c30
-rw-r--r--net/decnet/af_decnet.c7
-rw-r--r--net/decnet/dn_dev.c12
-rw-r--r--net/decnet/dn_nsp_in.c8
-rw-r--r--net/decnet/dn_nsp_out.c18
-rw-r--r--net/decnet/dn_route.c16
-rw-r--r--net/decnet/dn_table.c1
-rw-r--r--net/decnet/dn_timer.c8
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/Makefile3
-rw-r--r--net/dsa/dsa.c105
-rw-r--r--net/dsa/dsa2.c909
-rw-r--r--net/dsa/dsa_priv.h71
-rw-r--r--net/dsa/legacy.c85
-rw-r--r--net/dsa/master.c143
-rw-r--r--net/dsa/port.c105
-rw-r--r--net/dsa/slave.c601
-rw-r--r--net/dsa/switch.c2
-rw-r--r--net/dsa/tag_brcm.c90
-rw-r--r--net/dsa/tag_dsa.c28
-rw-r--r--net/dsa/tag_edsa.c28
-rw-r--r--net/dsa/tag_ksz.c13
-rw-r--r--net/dsa/tag_lan9303.c55
-rw-r--r--net/dsa/tag_mtk.c20
-rw-r--r--net/dsa/tag_qca.c22
-rw-r--r--net/dsa/tag_trailer.c13
-rw-r--r--net/hsr/hsr_device.c9
-rw-r--r--net/hsr/hsr_framereg.c6
-rw-r--r--net/hsr/hsr_framereg.h2
-rw-r--r--net/ieee802154/6lowpan/reassembly.c5
-rw-r--r--net/ieee802154/netlink.c6
-rw-r--r--net/ipv4/af_inet.c12
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c1
-rw-r--r--net/ipv4/devinet.c43
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_frontend.c34
-rw-r--r--net/ipv4/fib_semantics.c19
-rw-r--r--net/ipv4/fib_trie.c42
-rw-r--r--net/ipv4/icmp.c20
-rw-r--r--net/ipv4/igmp.c20
-rw-r--r--net/ipv4/inet_connection_sock.c57
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/inet_timewait_sock.c10
-rw-r--r--net/ipv4/inetpeer.c11
-rw-r--r--net/ipv4/ip_fragment.c5
-rw-r--r--net/ipv4/ip_gre.c88
-rw-r--r--net/ipv4/ip_tunnel.c12
-rw-r--r--net/ipv4/ip_vti.c16
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/ipmr.c271
-rw-r--r--net/ipv4/netfilter/arp_tables.c22
-rw-r--r--net/ipv4/netfilter/ip_tables.c23
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c3
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c19
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c520
-rw-r--r--net/ipv4/tcp.c167
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_cong.c76
-rw-r--r--net/ipv4/tcp_fastopen.c160
-rw-r--r--net/ipv4/tcp_input.c600
-rw-r--r--net/ipv4/tcp_ipv4.c69
-rw-r--r--net/ipv4/tcp_metrics.c22
-rw-r--r--net/ipv4/tcp_minisocks.c41
-rw-r--r--net/ipv4/tcp_nv.c47
-rw-r--r--net/ipv4/tcp_output.c332
-rw-r--r--net/ipv4/tcp_recovery.c102
-rw-r--r--net/ipv4/tcp_timer.c77
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/addrconf.c390
-rw-r--r--net/ipv6/addrconf_core.c9
-rw-r--r--net/ipv6/addrlabel.c146
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/esp6.c10
-rw-r--r--net/ipv6/exthdrs.c73
-rw-r--r--net/ipv6/exthdrs_core.c5
-rw-r--r--net/ipv6/icmp.c50
-rw-r--r--net/ipv6/ila/ila.h12
-rw-r--r--net/ipv6/ila/ila_common.c104
-rw-r--r--net/ipv6/ila/ila_lwt.c111
-rw-r--r--net/ipv6/ila/ila_xlat.c26
-rw-r--r--net/ipv6/ip6_fib.c677
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_gre.c30
-rw-r--r--net/ipv6/ip6_tunnel.c89
-rw-r--r--net/ipv6/ip6_vti.c23
-rw-r--r--net/ipv6/ip6mr.c14
-rw-r--r--net/ipv6/ipv6_sockglue.c12
-rw-r--r--net/ipv6/mcast.c33
-rw-r--r--net/ipv6/ndisc.c18
-rw-r--r--net/ipv6/netfilter/ip6_tables.c22
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c24
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c3
-rw-r--r--net/ipv6/output_core.c31
-rw-r--r--net/ipv6/ping.c5
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/reassembly.c5
-rw-r--r--net/ipv6/route.c940
-rw-r--r--net/ipv6/sit.c40
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c32
-rw-r--r--net/ipv6/tcp_ipv6.c17
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/ipv6/xfrm6_tunnel.c8
-rw-r--r--net/ipx/af_ipx.c1
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_core.c83
-rw-r--r--net/l2tp/l2tp_core.h37
-rw-r--r--net/l2tp/l2tp_debugfs.c4
-rw-r--r--net/l2tp/l2tp_eth.c106
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ip6.c4
-rw-r--r--net/l2tp/l2tp_netlink.c24
-rw-r--r--net/l2tp/l2tp_ppp.c320
-rw-r--r--net/lapb/lapb_iface.c4
-rw-r--r--net/lapb/lapb_timer.c18
-rw-r--r--net/llc/llc_c_ac.c27
-rw-r--r--net/llc/llc_conn.c12
-rw-r--r--net/mac80211/Makefile3
-rw-r--r--net/mac80211/aead_api.c (renamed from net/mac80211/aes_ccm.c)40
-rw-r--r--net/mac80211/aead_api.h27
-rw-r--r--net/mac80211/aes_ccm.h42
-rw-r--r--net/mac80211/aes_gcm.c109
-rw-r--r--net/mac80211/aes_gcm.h38
-rw-r--r--net/mac80211/agg-rx.c45
-rw-r--r--net/mac80211/agg-tx.c49
-rw-r--r--net/mac80211/ht.c12
-rw-r--r--net/mac80211/ibss.c7
-rw-r--r--net/mac80211/ieee80211_i.h5
-rw-r--r--net/mac80211/iface.c29
-rw-r--r--net/mac80211/led.c11
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh.c30
-rw-r--r--net/mac80211/mesh.h3
-rw-r--r--net/mac80211/mesh_hwmp.c12
-rw-r--r--net/mac80211/mesh_pathtbl.c3
-rw-r--r--net/mac80211/mesh_plink.c13
-rw-r--r--net/mac80211/mlme.c65
-rw-r--r--net/mac80211/ocb.c10
-rw-r--r--net/mac80211/scan.c37
-rw-r--r--net/mac80211/sta_info.c76
-rw-r--r--net/mac80211/sta_info.h16
-rw-r--r--net/mac80211/tx.c34
-rw-r--r--net/mac80211/util.c25
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mac80211/wpa.c4
-rw-r--r--net/mac802154/llsec.c14
-rw-r--r--net/mpls/Kconfig1
-rw-r--r--net/mpls/af_mpls.c36
-rw-r--r--net/ncsi/ncsi-aen.c15
-rw-r--r--net/ncsi/ncsi-manage.c93
-rw-r--r--net/ncsi/ncsi-rsp.c41
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h10
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h12
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c2
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c17
-rw-r--r--net/netfilter/ipset/pfxlen.c395
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/nf_conntrack_core.c17
-rw-r--r--net/netfilter/nf_conntrack_expect.c7
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c81
-rw-r--r--net/netfilter/nf_conntrack_netlink.c12
-rw-r--r--net/netfilter/nf_conntrack_proto.c86
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c62
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c41
-rw-r--r--net/netfilter/nf_nat_core.c9
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/nf_tables_api.c195
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nft_ct.c39
-rw-r--r--net/netfilter/nft_set_bitmap.c18
-rw-r--r--net/netfilter/nft_set_hash.c41
-rw-r--r--net/netfilter/nft_set_rbtree.c73
-rw-r--r--net/netfilter/x_tables.c21
-rw-r--r--net/netfilter/xt_IDLETIMER.c7
-rw-r--r--net/netfilter/xt_LED.c8
-rw-r--r--net/netfilter/xt_bpf.c2
-rw-r--r--net/netfilter/xt_connlimit.c55
-rw-r--r--net/netlabel/netlabel_addrlist.h4
-rw-r--r--net/netlink/af_netlink.c48
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/netrom/nr_in.c2
-rw-r--r--net/netrom/nr_loopback.c4
-rw-r--r--net/netrom/nr_route.c62
-rw-r--r--net/netrom/nr_timer.c48
-rw-r--r--net/nfc/core.c11
-rw-r--r--net/nfc/digital_core.c1
-rw-r--r--net/nfc/hci/core.c8
-rw-r--r--net/nfc/hci/llc_shdlc.c26
-rw-r--r--net/nfc/llcp_core.c16
-rw-r--r--net/nfc/nci/core.c14
-rw-r--r--net/nfc/netlink.c35
-rw-r--r--net/nsh/nsh.c60
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/Makefile1
-rw-r--r--net/openvswitch/actions.c126
-rw-r--r--net/openvswitch/conntrack.c12
-rw-r--r--net/openvswitch/conntrack.h7
-rw-r--r--net/openvswitch/datapath.c90
-rw-r--r--net/openvswitch/datapath.h39
-rw-r--r--net/openvswitch/dp_notify.c4
-rw-r--r--net/openvswitch/flow.c51
-rw-r--r--net/openvswitch/flow.h7
-rw-r--r--net/openvswitch/flow_netlink.c405
-rw-r--r--net/openvswitch/flow_netlink.h5
-rw-r--r--net/openvswitch/meter.c597
-rw-r--r--net/openvswitch/meter.h54
-rw-r--r--net/openvswitch/vport-netdev.c3
-rw-r--r--net/packet/af_packet.c25
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/phonet/pn_dev.c3
-rw-r--r--net/qrtr/qrtr.c377
-rw-r--r--net/rds/ib.c11
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_fmr.c4
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rose/af_rose.c17
-rw-r--r--net/rose/rose_in.c1
-rw-r--r--net/rose/rose_link.c16
-rw-r--r--net/rose/rose_loopback.c9
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rose/rose_timer.c39
-rw-r--r--net/rxrpc/af_rxrpc.c42
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c8
-rw-r--r--net/rxrpc/input.c3
-rw-r--r--net/rxrpc/output.c19
-rw-r--r--net/rxrpc/peer_object.c13
-rw-r--r--net/rxrpc/recvmsg.c7
-rw-r--r--net/rxrpc/sendmsg.c108
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c221
-rw-r--r--net/sched/act_bpf.c4
-rw-r--r--net/sched/act_ife.c153
-rw-r--r--net/sched/act_meta_mark.c2
-rw-r--r--net/sched/act_meta_skbprio.c2
-rw-r--r--net/sched/act_meta_skbtcindex.c2
-rw-r--r--net/sched/act_mirred.c13
-rw-r--r--net/sched/act_vlan.c81
-rw-r--r--net/sched/cls_api.c318
-rw-r--r--net/sched/cls_basic.c38
-rw-r--r--net/sched/cls_bpf.c99
-rw-r--r--net/sched/cls_flow.c14
-rw-r--r--net/sched/cls_flower.c76
-rw-r--r--net/sched/cls_fw.c5
-rw-r--r--net/sched/cls_matchall.c58
-rw-r--r--net/sched/cls_tcindex.c5
-rw-r--r--net/sched/cls_u32.c187
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c9
-rw-r--r--net/sched/sch_atm.c4
-rw-r--r--net/sched/sch_cbq.c3
-rw-r--r--net/sched/sch_cbs.c373
-rw-r--r--net/sched/sch_drr.c3
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_fq_codel.c3
-rw-r--r--net/sched/sch_generic.c70
-rw-r--r--net/sched/sch_hfsc.c5
-rw-r--r--net/sched/sch_htb.c10
-rw-r--r--net/sched/sch_ingress.c49
-rw-r--r--net/sched/sch_mq.c10
-rw-r--r--net/sched/sch_mqprio.c273
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c167
-rw-r--r--net/sched/sch_pie.c10
-rw-r--r--net/sched/sch_prio.c3
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_red.c93
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_sfq.c13
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/associola.c3
-rw-r--r--net/sctp/chunk.c6
-rw-r--r--net/sctp/ipv6.c5
-rw-r--r--net/sctp/outqueue.c63
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_sideeffect.c88
-rw-r--r--net/sctp/socket.c246
-rw-r--r--net/sctp/stream.c234
-rw-r--r--net/sctp/stream_sched.c275
-rw-r--r--net/sctp/stream_sched_prio.c347
-rw-r--r--net/sctp/stream_sched_rr.c201
-rw-r--r--net/sctp/transport.c13
-rw-r--r--net/smc/af_smc.c42
-rw-r--r--net/smc/smc_cdc.c7
-rw-r--r--net/smc/smc_cdc.h3
-rw-r--r--net/smc/smc_close.c3
-rw-r--r--net/smc/smc_core.c12
-rw-r--r--net/smc/smc_ib.c26
-rw-r--r--net/smc/smc_tx.c6
-rw-r--r--net/socket.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c14
-rw-r--r--net/sunrpc/clnt.c14
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/rpcb_clnt.c6
-rw-r--r--net/sunrpc/sched.c11
-rw-r--r--net/sunrpc/sunrpc_syms.c3
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svc_xprt.c115
-rw-r--r--net/sunrpc/xprt.c10
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c19
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c27
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c363
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c11
-rw-r--r--net/sunrpc/xprtrdma/transport.c19
-rw-r--r--net/sunrpc/xprtrdma/verbs.c236
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h119
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/switchdev/switchdev.c2
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/bcast.c18
-rw-r--r--net/tipc/core.h5
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/group.c871
-rw-r--r--net/tipc/group.h73
-rw-r--r--net/tipc/link.c34
-rw-r--r--net/tipc/monitor.c17
-rw-r--r--net/tipc/msg.c31
-rw-r--r--net/tipc/msg.h135
-rw-r--r--net/tipc/name_table.c176
-rw-r--r--net/tipc/name_table.h28
-rw-r--r--net/tipc/node.c52
-rw-r--r--net/tipc/node.h5
-rw-r--r--net/tipc/server.c121
-rw-r--r--net/tipc/server.h5
-rw-r--r--net/tipc/socket.c845
-rw-r--r--net/tipc/subscr.c6
-rw-r--r--net/tls/tls_main.c96
-rw-r--r--net/tls/tls_sw.c24
-rw-r--r--net/unix/af_unix.c1
-rw-r--r--net/vmw_vsock/Kconfig10
-rw-r--r--net/vmw_vsock/Makefile3
-rw-r--r--net/vmw_vsock/af_vsock.c73
-rw-r--r--net/vmw_vsock/diag.c186
-rw-r--r--net/vmw_vsock/hyperv_transport.c13
-rw-r--r--net/vmw_vsock/virtio_transport.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c22
-rw-r--r--net/vmw_vsock/vmci_transport.c35
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c2
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c2
-rw-r--r--net/wireless/.gitignore3
-rw-r--r--net/wireless/Kconfig58
-rw-r--r--net/wireless/Makefile24
-rw-r--r--net/wireless/certs/sforshee.x509bin0 -> 680 bytes
-rw-r--r--net/wireless/chan.c4
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h5
-rw-r--r--net/wireless/db.txt17
-rw-r--r--net/wireless/genregdb.awk158
-rw-r--r--net/wireless/lib80211.c11
-rw-r--r--net/wireless/nl80211.c229
-rw-r--r--net/wireless/nl80211.h2
-rw-r--r--net/wireless/reg.c492
-rw-r--r--net/wireless/reg.h14
-rw-r--r--net/wireless/regdb.h23
-rw-r--r--net/wireless/sme.c45
-rw-r--r--net/wireless/util.c202
-rw-r--r--net/x25/af_x25.c7
-rw-r--r--net/x25/x25_facilities.c2
-rw-r--r--net/x25/x25_in.c1
-rw-r--r--net/x25/x25_link.c8
-rw-r--r--net/x25/x25_timer.c18
-rw-r--r--net/xfrm/xfrm_policy.c47
-rw-r--r--net/xfrm/xfrm_state.c9
-rw-r--r--net/xfrm/xfrm_user.c105
494 files changed, 18202 insertions, 8639 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 2dac647ff420..7f50d47470bd 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -401,9 +401,9 @@ static void garp_join_timer_arm(struct garp_applicant *app)
401 mod_timer(&app->join_timer, jiffies + delay); 401 mod_timer(&app->join_timer, jiffies + delay);
402} 402}
403 403
404static void garp_join_timer(unsigned long data) 404static void garp_join_timer(struct timer_list *t)
405{ 405{
406 struct garp_applicant *app = (struct garp_applicant *)data; 406 struct garp_applicant *app = from_timer(app, t, join_timer);
407 407
408 spin_lock(&app->lock); 408 spin_lock(&app->lock);
409 garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); 409 garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
@@ -584,7 +584,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
584 spin_lock_init(&app->lock); 584 spin_lock_init(&app->lock);
585 skb_queue_head_init(&app->queue); 585 skb_queue_head_init(&app->queue);
586 rcu_assign_pointer(dev->garp_port->applicants[appl->type], app); 586 rcu_assign_pointer(dev->garp_port->applicants[appl->type], app);
587 setup_timer(&app->join_timer, garp_join_timer, (unsigned long)app); 587 timer_setup(&app->join_timer, garp_join_timer, 0);
588 garp_join_timer_arm(app); 588 garp_join_timer_arm(app);
589 return 0; 589 return 0;
590 590
diff --git a/net/802/mrp.c b/net/802/mrp.c
index be4dd3165347..a808dd5bbb27 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -586,9 +586,9 @@ static void mrp_join_timer_arm(struct mrp_applicant *app)
586 mod_timer(&app->join_timer, jiffies + delay); 586 mod_timer(&app->join_timer, jiffies + delay);
587} 587}
588 588
589static void mrp_join_timer(unsigned long data) 589static void mrp_join_timer(struct timer_list *t)
590{ 590{
591 struct mrp_applicant *app = (struct mrp_applicant *)data; 591 struct mrp_applicant *app = from_timer(app, t, join_timer);
592 592
593 spin_lock(&app->lock); 593 spin_lock(&app->lock);
594 mrp_mad_event(app, MRP_EVENT_TX); 594 mrp_mad_event(app, MRP_EVENT_TX);
@@ -605,9 +605,9 @@ static void mrp_periodic_timer_arm(struct mrp_applicant *app)
605 jiffies + msecs_to_jiffies(mrp_periodic_time)); 605 jiffies + msecs_to_jiffies(mrp_periodic_time));
606} 606}
607 607
608static void mrp_periodic_timer(unsigned long data) 608static void mrp_periodic_timer(struct timer_list *t)
609{ 609{
610 struct mrp_applicant *app = (struct mrp_applicant *)data; 610 struct mrp_applicant *app = from_timer(app, t, periodic_timer);
611 611
612 spin_lock(&app->lock); 612 spin_lock(&app->lock);
613 mrp_mad_event(app, MRP_EVENT_PERIODIC); 613 mrp_mad_event(app, MRP_EVENT_PERIODIC);
@@ -865,10 +865,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
865 spin_lock_init(&app->lock); 865 spin_lock_init(&app->lock);
866 skb_queue_head_init(&app->queue); 866 skb_queue_head_init(&app->queue);
867 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); 867 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
868 setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); 868 timer_setup(&app->join_timer, mrp_join_timer, 0);
869 mrp_join_timer_arm(app); 869 mrp_join_timer_arm(app);
870 setup_timer(&app->periodic_timer, mrp_periodic_timer, 870 timer_setup(&app->periodic_timer, mrp_periodic_timer, 0);
871 (unsigned long)app);
872 mrp_periodic_timer_arm(app); 871 mrp_periodic_timer_arm(app);
873 return 0; 872 return 0;
874 873
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 4a72ee4e2ae9..8dfdd94e430f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
138 return 0; 138 return 0;
139} 139}
140 140
141int register_vlan_dev(struct net_device *dev) 141int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
142{ 142{
143 struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 143 struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
144 struct net_device *real_dev = vlan->real_dev; 144 struct net_device *real_dev = vlan->real_dev;
@@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev)
174 if (err < 0) 174 if (err < 0)
175 goto out_uninit_mvrp; 175 goto out_uninit_mvrp;
176 176
177 err = netdev_upper_dev_link(real_dev, dev); 177 err = netdev_upper_dev_link(real_dev, dev, extack);
178 if (err) 178 if (err)
179 goto out_unregister_netdev; 179 goto out_unregister_netdev;
180 180
@@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
270 vlan->flags = VLAN_FLAG_REORDER_HDR; 270 vlan->flags = VLAN_FLAG_REORDER_HDR;
271 271
272 new_dev->rtnl_link_ops = &vlan_link_ops; 272 new_dev->rtnl_link_ops = &vlan_link_ops;
273 err = register_vlan_dev(new_dev); 273 err = register_vlan_dev(new_dev, NULL);
274 if (err < 0) 274 if (err < 0)
275 goto out_free_newdev; 275 goto out_free_newdev;
276 276
@@ -328,6 +328,9 @@ static void vlan_transfer_features(struct net_device *dev,
328 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; 328 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
329#endif 329#endif
330 330
331 vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
332 vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
333
331 netdev_update_features(vlandev); 334 netdev_update_features(vlandev);
332} 335}
333 336
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 0e7afdf86127..a8ba51030b75 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -108,7 +108,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
108int vlan_check_real_dev(struct net_device *real_dev, 108int vlan_check_real_dev(struct net_device *real_dev,
109 __be16 protocol, u16 vlan_id); 109 __be16 protocol, u16 vlan_id);
110void vlan_setup(struct net_device *dev); 110void vlan_setup(struct net_device *dev);
111int register_vlan_dev(struct net_device *dev); 111int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
112void unregister_vlan_dev(struct net_device *dev, struct list_head *head); 112void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
113bool vlan_dev_inherit_address(struct net_device *dev, 113bool vlan_dev_inherit_address(struct net_device *dev,
114 struct net_device *real_dev); 114 struct net_device *real_dev);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 5e831de3103e..6689c0b272a7 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -143,6 +143,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
143 vlan->vlan_proto = proto; 143 vlan->vlan_proto = proto;
144 vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); 144 vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
145 vlan->real_dev = real_dev; 145 vlan->real_dev = real_dev;
146 dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
146 vlan->flags = VLAN_FLAG_REORDER_HDR; 147 vlan->flags = VLAN_FLAG_REORDER_HDR;
147 148
148 err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id); 149 err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
@@ -160,7 +161,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
160 if (err < 0) 161 if (err < 0)
161 return err; 162 return err;
162 163
163 return register_vlan_dev(dev); 164 return register_vlan_dev(dev, extack);
164} 165}
165 166
166static inline size_t vlan_qos_map_size(unsigned int n) 167static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 8ad3ec2610b6..309d7dbb36e8 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -310,7 +310,7 @@ static void __aarp_expire_device(struct aarp_entry **n, struct net_device *dev)
310} 310}
311 311
312/* Handle the timer event */ 312/* Handle the timer event */
313static void aarp_expire_timeout(unsigned long unused) 313static void aarp_expire_timeout(struct timer_list *unused)
314{ 314{
315 int ct; 315 int ct;
316 316
@@ -884,7 +884,7 @@ void __init aarp_proto_init(void)
884 aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); 884 aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
885 if (!aarp_dl) 885 if (!aarp_dl)
886 printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); 886 printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
887 setup_timer(&aarp_timer, aarp_expire_timeout, 0); 887 timer_setup(&aarp_timer, aarp_expire_timeout, 0);
888 aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; 888 aarp_timer.expires = jiffies + sysctl_aarp_expiry_time;
889 add_timer(&aarp_timer); 889 add_timer(&aarp_timer);
890 register_netdevice_notifier(&aarp_notifier); 890 register_netdevice_notifier(&aarp_notifier);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 5d035c1f1156..03a9fc0771c0 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -158,9 +158,9 @@ found:
158 return s; 158 return s;
159} 159}
160 160
161static void atalk_destroy_timer(unsigned long data) 161static void atalk_destroy_timer(struct timer_list *t)
162{ 162{
163 struct sock *sk = (struct sock *)data; 163 struct sock *sk = from_timer(sk, t, sk_timer);
164 164
165 if (sk_has_allocations(sk)) { 165 if (sk_has_allocations(sk)) {
166 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; 166 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME;
@@ -175,8 +175,7 @@ static inline void atalk_destroy_socket(struct sock *sk)
175 skb_queue_purge(&sk->sk_receive_queue); 175 skb_queue_purge(&sk->sk_receive_queue);
176 176
177 if (sk_has_allocations(sk)) { 177 if (sk_has_allocations(sk)) {
178 setup_timer(&sk->sk_timer, atalk_destroy_timer, 178 timer_setup(&sk->sk_timer, atalk_destroy_timer, 0);
179 (unsigned long)sk);
180 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; 179 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME;
181 add_timer(&sk->sk_timer); 180 add_timer(&sk->sk_timer);
182 } else 181 } else
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 65f706e4344c..d4f6029d5109 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -153,7 +153,7 @@ static int neigh_check_cb(struct neighbour *n)
153 return 1; 153 return 1;
154} 154}
155 155
156static void idle_timer_check(unsigned long dummy) 156static void idle_timer_check(struct timer_list *unused)
157{ 157{
158 write_lock(&arp_tbl.lock); 158 write_lock(&arp_tbl.lock);
159 __neigh_for_each_release(&arp_tbl, neigh_check_cb); 159 __neigh_for_each_release(&arp_tbl, neigh_check_cb);
@@ -887,7 +887,7 @@ static int __init atm_clip_init(void)
887 register_netdevice_notifier(&clip_dev_notifier); 887 register_netdevice_notifier(&clip_dev_notifier);
888 register_inetaddr_notifier(&clip_inet_notifier); 888 register_inetaddr_notifier(&clip_inet_notifier);
889 889
890 setup_timer(&idle_timer, idle_timer_check, 0); 890 timer_setup(&idle_timer, idle_timer_check, 0);
891 891
892#ifdef CONFIG_PROC_FS 892#ifdef CONFIG_PROC_FS
893 { 893 {
diff --git a/net/atm/lec.c b/net/atm/lec.c
index a3d93a1bb133..6676e3433261 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1232,7 +1232,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
1232#define LEC_ARP_REFRESH_INTERVAL (3*HZ) 1232#define LEC_ARP_REFRESH_INTERVAL (3*HZ)
1233 1233
1234static void lec_arp_check_expire(struct work_struct *work); 1234static void lec_arp_check_expire(struct work_struct *work);
1235static void lec_arp_expire_arp(unsigned long data); 1235static void lec_arp_expire_arp(struct timer_list *t);
1236 1236
1237/* 1237/*
1238 * Arp table funcs 1238 * Arp table funcs
@@ -1559,8 +1559,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
1559 } 1559 }
1560 ether_addr_copy(to_return->mac_addr, mac_addr); 1560 ether_addr_copy(to_return->mac_addr, mac_addr);
1561 INIT_HLIST_NODE(&to_return->next); 1561 INIT_HLIST_NODE(&to_return->next);
1562 setup_timer(&to_return->timer, lec_arp_expire_arp, 1562 timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
1563 (unsigned long)to_return);
1564 to_return->last_used = jiffies; 1563 to_return->last_used = jiffies;
1565 to_return->priv = priv; 1564 to_return->priv = priv;
1566 skb_queue_head_init(&to_return->tx_wait); 1565 skb_queue_head_init(&to_return->tx_wait);
@@ -1569,11 +1568,11 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
1569} 1568}
1570 1569
1571/* Arp sent timer expired */ 1570/* Arp sent timer expired */
1572static void lec_arp_expire_arp(unsigned long data) 1571static void lec_arp_expire_arp(struct timer_list *t)
1573{ 1572{
1574 struct lec_arp_table *entry; 1573 struct lec_arp_table *entry;
1575 1574
1576 entry = (struct lec_arp_table *)data; 1575 entry = from_timer(entry, t, timer);
1577 1576
1578 pr_debug("\n"); 1577 pr_debug("\n");
1579 if (entry->status == ESI_ARP_PENDING) { 1578 if (entry->status == ESI_ARP_PENDING) {
@@ -1591,10 +1590,10 @@ static void lec_arp_expire_arp(unsigned long data)
1591} 1590}
1592 1591
1593/* Unknown/unused vcc expire, remove associated entry */ 1592/* Unknown/unused vcc expire, remove associated entry */
1594static void lec_arp_expire_vcc(unsigned long data) 1593static void lec_arp_expire_vcc(struct timer_list *t)
1595{ 1594{
1596 unsigned long flags; 1595 unsigned long flags;
1597 struct lec_arp_table *to_remove = (struct lec_arp_table *)data; 1596 struct lec_arp_table *to_remove = from_timer(to_remove, t, timer);
1598 struct lec_priv *priv = to_remove->priv; 1597 struct lec_priv *priv = to_remove->priv;
1599 1598
1600 del_timer(&to_remove->timer); 1599 del_timer(&to_remove->timer);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 63138c8c2269..7c6a1cc760a2 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -95,7 +95,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
95static int mpoa_event_listener(struct notifier_block *mpoa_notifier, 95static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
96 unsigned long event, void *dev); 96 unsigned long event, void *dev);
97static void mpc_timer_refresh(void); 97static void mpc_timer_refresh(void);
98static void mpc_cache_check(unsigned long checking_time); 98static void mpc_cache_check(struct timer_list *unused);
99 99
100static struct llc_snap_hdr llc_snap_mpoa_ctrl = { 100static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
101 0xaa, 0xaa, 0x03, 101 0xaa, 0xaa, 0x03,
@@ -121,7 +121,7 @@ static struct notifier_block mpoa_notifier = {
121 121
122struct mpoa_client *mpcs = NULL; /* FIXME */ 122struct mpoa_client *mpcs = NULL; /* FIXME */
123static struct atm_mpoa_qos *qos_head = NULL; 123static struct atm_mpoa_qos *qos_head = NULL;
124static DEFINE_TIMER(mpc_timer, NULL); 124static DEFINE_TIMER(mpc_timer, mpc_cache_check);
125 125
126 126
127static struct mpoa_client *find_mpc_by_itfnum(int itf) 127static struct mpoa_client *find_mpc_by_itfnum(int itf)
@@ -799,7 +799,6 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg)
799 int err; 799 int err;
800 800
801 if (mpcs == NULL) { 801 if (mpcs == NULL) {
802 init_timer(&mpc_timer);
803 mpc_timer_refresh(); 802 mpc_timer_refresh();
804 803
805 /* This lets us now how our LECs are doing */ 804 /* This lets us now how our LECs are doing */
@@ -1408,15 +1407,16 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
1408 msg_to_mpoad(msg, mpc); 1407 msg_to_mpoad(msg, mpc);
1409} 1408}
1410 1409
1410static unsigned long checking_time;
1411
1411static void mpc_timer_refresh(void) 1412static void mpc_timer_refresh(void)
1412{ 1413{
1413 mpc_timer.expires = jiffies + (MPC_P2 * HZ); 1414 mpc_timer.expires = jiffies + (MPC_P2 * HZ);
1414 mpc_timer.data = mpc_timer.expires; 1415 checking_time = mpc_timer.expires;
1415 mpc_timer.function = mpc_cache_check;
1416 add_timer(&mpc_timer); 1416 add_timer(&mpc_timer);
1417} 1417}
1418 1418
1419static void mpc_cache_check(unsigned long checking_time) 1419static void mpc_cache_check(struct timer_list *unused)
1420{ 1420{
1421 struct mpoa_client *mpc = mpcs; 1421 struct mpoa_client *mpc = mpcs;
1422 static unsigned long previous_resolving_check_time; 1422 static unsigned long previous_resolving_check_time;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index f3f9d18891de..06eac1f50c5e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -268,9 +268,9 @@ void ax25_destroy_socket(ax25_cb *);
268/* 268/*
269 * Handler for deferred kills. 269 * Handler for deferred kills.
270 */ 270 */
271static void ax25_destroy_timer(unsigned long data) 271static void ax25_destroy_timer(struct timer_list *t)
272{ 272{
273 ax25_cb *ax25=(ax25_cb *)data; 273 ax25_cb *ax25 = from_timer(ax25, t, dtimer);
274 struct sock *sk; 274 struct sock *sk;
275 275
276 sk=ax25->sk; 276 sk=ax25->sk;
@@ -326,8 +326,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
326 if (ax25->sk != NULL) { 326 if (ax25->sk != NULL) {
327 if (sk_has_allocations(ax25->sk)) { 327 if (sk_has_allocations(ax25->sk)) {
328 /* Defer: outstanding buffers */ 328 /* Defer: outstanding buffers */
329 setup_timer(&ax25->dtimer, ax25_destroy_timer, 329 timer_setup(&ax25->dtimer, ax25_destroy_timer, 0);
330 (unsigned long)ax25);
331 ax25->dtimer.expires = jiffies + 2 * HZ; 330 ax25->dtimer.expires = jiffies + 2 * HZ;
332 add_timer(&ax25->dtimer); 331 add_timer(&ax25->dtimer);
333 } else { 332 } else {
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5fb2104b7304..e9d11313d45b 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -29,7 +29,7 @@
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/interrupt.h> 30#include <linux/interrupt.h>
31 31
32static void ax25_ds_timeout(unsigned long); 32static void ax25_ds_timeout(struct timer_list *);
33 33
34/* 34/*
35 * Add DAMA slave timeout timer to timer list. 35 * Add DAMA slave timeout timer to timer list.
@@ -41,8 +41,7 @@ static void ax25_ds_timeout(unsigned long);
41 41
42void ax25_ds_setup_timer(ax25_dev *ax25_dev) 42void ax25_ds_setup_timer(ax25_dev *ax25_dev)
43{ 43{
44 setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 44 timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0);
45 (unsigned long)ax25_dev);
46} 45}
47 46
48void ax25_ds_del_timer(ax25_dev *ax25_dev) 47void ax25_ds_del_timer(ax25_dev *ax25_dev)
@@ -66,9 +65,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev)
66 * Silently discard all (slave) connections in case our master forgot us... 65 * Silently discard all (slave) connections in case our master forgot us...
67 */ 66 */
68 67
69static void ax25_ds_timeout(unsigned long arg) 68static void ax25_ds_timeout(struct timer_list *t)
70{ 69{
71 ax25_dev *ax25_dev = (struct ax25_dev *) arg; 70 ax25_dev *ax25_dev = from_timer(ax25_dev, t, dama.slave_timer);
72 ax25_cb *ax25; 71 ax25_cb *ax25;
73 72
74 if (ax25_dev == NULL || !ax25_dev->dama.slave) 73 if (ax25_dev == NULL || !ax25_dev->dama.slave)
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index 23a6f38a80bf..c47b7ee1e4da 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -33,20 +33,19 @@
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/interrupt.h> 34#include <linux/interrupt.h>
35 35
36static void ax25_heartbeat_expiry(unsigned long); 36static void ax25_heartbeat_expiry(struct timer_list *);
37static void ax25_t1timer_expiry(unsigned long); 37static void ax25_t1timer_expiry(struct timer_list *);
38static void ax25_t2timer_expiry(unsigned long); 38static void ax25_t2timer_expiry(struct timer_list *);
39static void ax25_t3timer_expiry(unsigned long); 39static void ax25_t3timer_expiry(struct timer_list *);
40static void ax25_idletimer_expiry(unsigned long); 40static void ax25_idletimer_expiry(struct timer_list *);
41 41
42void ax25_setup_timers(ax25_cb *ax25) 42void ax25_setup_timers(ax25_cb *ax25)
43{ 43{
44 setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25); 44 timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0);
45 setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25); 45 timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0);
46 setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25); 46 timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0);
47 setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25); 47 timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0);
48 setup_timer(&ax25->idletimer, ax25_idletimer_expiry, 48 timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0);
49 (unsigned long)ax25);
50} 49}
51 50
52void ax25_start_heartbeat(ax25_cb *ax25) 51void ax25_start_heartbeat(ax25_cb *ax25)
@@ -120,10 +119,10 @@ unsigned long ax25_display_timer(struct timer_list *timer)
120 119
121EXPORT_SYMBOL(ax25_display_timer); 120EXPORT_SYMBOL(ax25_display_timer);
122 121
123static void ax25_heartbeat_expiry(unsigned long param) 122static void ax25_heartbeat_expiry(struct timer_list *t)
124{ 123{
125 int proto = AX25_PROTO_STD_SIMPLEX; 124 int proto = AX25_PROTO_STD_SIMPLEX;
126 ax25_cb *ax25 = (ax25_cb *)param; 125 ax25_cb *ax25 = from_timer(ax25, t, timer);
127 126
128 if (ax25->ax25_dev) 127 if (ax25->ax25_dev)
129 proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]; 128 proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL];
@@ -145,9 +144,9 @@ static void ax25_heartbeat_expiry(unsigned long param)
145 } 144 }
146} 145}
147 146
148static void ax25_t1timer_expiry(unsigned long param) 147static void ax25_t1timer_expiry(struct timer_list *t)
149{ 148{
150 ax25_cb *ax25 = (ax25_cb *)param; 149 ax25_cb *ax25 = from_timer(ax25, t, t1timer);
151 150
152 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { 151 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
153 case AX25_PROTO_STD_SIMPLEX: 152 case AX25_PROTO_STD_SIMPLEX:
@@ -164,9 +163,9 @@ static void ax25_t1timer_expiry(unsigned long param)
164 } 163 }
165} 164}
166 165
167static void ax25_t2timer_expiry(unsigned long param) 166static void ax25_t2timer_expiry(struct timer_list *t)
168{ 167{
169 ax25_cb *ax25 = (ax25_cb *)param; 168 ax25_cb *ax25 = from_timer(ax25, t, t2timer);
170 169
171 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { 170 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
172 case AX25_PROTO_STD_SIMPLEX: 171 case AX25_PROTO_STD_SIMPLEX:
@@ -183,9 +182,9 @@ static void ax25_t2timer_expiry(unsigned long param)
183 } 182 }
184} 183}
185 184
186static void ax25_t3timer_expiry(unsigned long param) 185static void ax25_t3timer_expiry(struct timer_list *t)
187{ 186{
188 ax25_cb *ax25 = (ax25_cb *)param; 187 ax25_cb *ax25 = from_timer(ax25, t, t3timer);
189 188
190 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { 189 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
191 case AX25_PROTO_STD_SIMPLEX: 190 case AX25_PROTO_STD_SIMPLEX:
@@ -204,9 +203,9 @@ static void ax25_t3timer_expiry(unsigned long param)
204 } 203 }
205} 204}
206 205
207static void ax25_idletimer_expiry(unsigned long param) 206static void ax25_idletimer_expiry(struct timer_list *t)
208{ 207{
209 ax25_cb *ax25 = (ax25_cb *)param; 208 ax25_cb *ax25 = from_timer(ax25, t, idletimer);
210 209
211 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { 210 switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
212 case AX25_PROTO_STD_SIMPLEX: 211 case AX25_PROTO_STD_SIMPLEX:
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 83ba5483455a..1b659ab652fb 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -916,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
916 u16 tvlv_len = 0; 916 u16 tvlv_len = 0;
917 unsigned long send_time; 917 unsigned long send_time;
918 918
919 if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || 919 if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
920 (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) 920 hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
921 return; 921 return;
922 922
923 /* the interface gets activated here to avoid race conditions between 923 /* the interface gets activated here to avoid race conditions between
@@ -1264,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1264 * drops as they can't send and receive at the same time. 1264 * drops as they can't send and receive at the same time.
1265 */ 1265 */
1266 tq_iface_penalty = BATADV_TQ_MAX_VALUE; 1266 tq_iface_penalty = BATADV_TQ_MAX_VALUE;
1267 if (if_outgoing && (if_incoming == if_outgoing) && 1267 if (if_outgoing && if_incoming == if_outgoing &&
1268 batadv_is_wifi_hardif(if_outgoing)) 1268 batadv_is_wifi_hardif(if_outgoing))
1269 tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, 1269 tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
1270 bat_priv); 1270 bat_priv);
@@ -1369,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1369 ret = BATADV_NEIGH_DUP; 1369 ret = BATADV_NEIGH_DUP;
1370 } else { 1370 } else {
1371 set_mark = 0; 1371 set_mark = 0;
1372 if (is_dup && (ret != BATADV_NEIGH_DUP)) 1372 if (is_dup && ret != BATADV_NEIGH_DUP)
1373 ret = BATADV_ORIG_DUP; 1373 ret = BATADV_ORIG_DUP;
1374 } 1374 }
1375 1375
@@ -1515,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1515 /* drop packet if sender is not a direct neighbor and if we 1515 /* drop packet if sender is not a direct neighbor and if we
1516 * don't route towards it 1516 * don't route towards it
1517 */ 1517 */
1518 if (!is_single_hop_neigh && (!orig_neigh_router)) { 1518 if (!is_single_hop_neigh && !orig_neigh_router) {
1519 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1519 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1520 "Drop packet: OGM via unknown neighbor!\n"); 1520 "Drop packet: OGM via unknown neighbor!\n");
1521 goto out_neigh; 1521 goto out_neigh;
@@ -1535,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1535 sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno); 1535 sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno);
1536 similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl; 1536 similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl;
1537 1537
1538 if (is_bidirect && ((dup_status == BATADV_NO_DUP) || 1538 if (is_bidirect && (dup_status == BATADV_NO_DUP ||
1539 (sameseq && similar_ttl))) { 1539 (sameseq && similar_ttl))) {
1540 batadv_iv_ogm_orig_update(bat_priv, orig_node, 1540 batadv_iv_ogm_orig_update(bat_priv, orig_node,
1541 orig_ifinfo, ethhdr, 1541 orig_ifinfo, ethhdr,
@@ -1553,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1553 /* OGMs from secondary interfaces should only scheduled once 1553 /* OGMs from secondary interfaces should only scheduled once
1554 * per interface where it has been received, not multiple times 1554 * per interface where it has been received, not multiple times
1555 */ 1555 */
1556 if ((ogm_packet->ttl <= 2) && 1556 if (ogm_packet->ttl <= 2 &&
1557 (if_incoming != if_outgoing)) { 1557 if_incoming != if_outgoing) {
1558 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1558 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1559 "Drop packet: OGM from secondary interface and wrong outgoing interface\n"); 1559 "Drop packet: OGM from secondary interface and wrong outgoing interface\n");
1560 goto out_neigh; 1560 goto out_neigh;
@@ -1590,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1590 if_incoming, if_outgoing); 1590 if_incoming, if_outgoing);
1591 1591
1592out_neigh: 1592out_neigh:
1593 if ((orig_neigh_node) && (!is_single_hop_neigh)) 1593 if (orig_neigh_node && !is_single_hop_neigh)
1594 batadv_orig_node_put(orig_neigh_node); 1594 batadv_orig_node_put(orig_neigh_node);
1595out: 1595out:
1596 if (router_ifinfo) 1596 if (router_ifinfo)
@@ -2523,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
2523 tmp_gw_factor *= 100 * 100; 2523 tmp_gw_factor *= 100 * 100;
2524 tmp_gw_factor >>= 18; 2524 tmp_gw_factor >>= 18;
2525 2525
2526 if ((tmp_gw_factor > max_gw_factor) || 2526 if (tmp_gw_factor > max_gw_factor ||
2527 ((tmp_gw_factor == max_gw_factor) && 2527 (tmp_gw_factor == max_gw_factor &&
2528 (tq_avg > max_tq))) { 2528 tq_avg > max_tq)) {
2529 if (curr_gw) 2529 if (curr_gw)
2530 batadv_gw_node_put(curr_gw); 2530 batadv_gw_node_put(curr_gw);
2531 curr_gw = gw_node; 2531 curr_gw = gw_node;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 4e2724c5b33d..341ceab8338d 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -19,7 +19,6 @@
19#include "main.h" 19#include "main.h"
20 20
21#include <linux/atomic.h> 21#include <linux/atomic.h>
22#include <linux/bug.h>
23#include <linux/cache.h> 22#include <linux/cache.h>
24#include <linux/errno.h> 23#include <linux/errno.h>
25#include <linux/if_ether.h> 24#include <linux/if_ether.h>
@@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
623 int ret = 0; 622 int ret = 0;
624 623
625 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); 624 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
626 if (WARN_ON(!ifinfo1)) 625 if (!ifinfo1)
627 goto err_ifinfo1; 626 goto err_ifinfo1;
628 627
629 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); 628 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
630 if (WARN_ON(!ifinfo2)) 629 if (!ifinfo2)
631 goto err_ifinfo2; 630 goto err_ifinfo2;
632 631
633 ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; 632 ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput;
@@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
649 bool ret = false; 648 bool ret = false;
650 649
651 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); 650 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
652 if (WARN_ON(!ifinfo1)) 651 if (!ifinfo1)
653 goto err_ifinfo1; 652 goto err_ifinfo1;
654 653
655 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); 654 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
656 if (WARN_ON(!ifinfo2)) 655 if (!ifinfo2)
657 goto err_ifinfo2; 656 goto err_ifinfo2;
658 657
659 threshold = ifinfo1->bat_v.throughput / 4; 658 threshold = ifinfo1->bat_v.throughput / 4;
@@ -767,7 +766,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
767 if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) 766 if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
768 goto next; 767 goto next;
769 768
770 if (curr_gw && (bw <= max_bw)) 769 if (curr_gw && bw <= max_bw)
771 goto next; 770 goto next;
772 771
773 if (curr_gw) 772 if (curr_gw)
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index bd1064d98e16..1de992c58b35 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
134 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; 134 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
135 135
136 throughput = link_settings.base.speed; 136 throughput = link_settings.base.speed;
137 if (throughput && (throughput != SPEED_UNKNOWN)) 137 if (throughput && throughput != SPEED_UNKNOWN)
138 return throughput * 10; 138 return throughput * 10;
139 } 139 }
140 140
@@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
263 goto out; 263 goto out;
264 264
265 /* we are in the process of shutting this interface down */ 265 /* we are in the process of shutting this interface down */
266 if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || 266 if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
267 (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) 267 hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
268 goto out; 268 goto out;
269 269
270 /* the interface was enabled but may not be ready yet */ 270 /* the interface was enabled but may not be ready yet */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 8be61734fc43..c251445a42a0 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
304 * due to the store & forward characteristics of WIFI. 304 * due to the store & forward characteristics of WIFI.
305 * Very low throughput values are the exception. 305 * Very low throughput values are the exception.
306 */ 306 */
307 if ((throughput > 10) && 307 if (throughput > 10 &&
308 (if_incoming == if_outgoing) && 308 if_incoming == if_outgoing &&
309 !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX)) 309 !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
310 return throughput / 2; 310 return throughput / 2;
311 311
@@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
455 /* drop packets with old seqnos, however accept the first packet after 455 /* drop packets with old seqnos, however accept the first packet after
456 * a host has been rebooted. 456 * a host has been rebooted.
457 */ 457 */
458 if ((seq_diff < 0) && !protection_started) 458 if (seq_diff < 0 && !protection_started)
459 goto out; 459 goto out;
460 460
461 neigh_node->last_seen = jiffies; 461 neigh_node->last_seen = jiffies;
@@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
568 router_throughput = router_ifinfo->bat_v.throughput; 568 router_throughput = router_ifinfo->bat_v.throughput;
569 neigh_throughput = neigh_ifinfo->bat_v.throughput; 569 neigh_throughput = neigh_ifinfo->bat_v.throughput;
570 570
571 if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) && 571 if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF &&
572 (router_throughput >= neigh_throughput)) 572 router_throughput >= neigh_throughput)
573 goto out; 573 goto out;
574 } 574 }
575 575
@@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
621 return; 621 return;
622 622
623 /* only unknown & newer OGMs contain TVLVs we are interested in */ 623 /* only unknown & newer OGMs contain TVLVs we are interested in */
624 if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT)) 624 if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
625 batadv_tvlv_containers_process(bat_priv, true, orig_node, 625 batadv_tvlv_containers_process(bat_priv, true, orig_node,
626 NULL, NULL, 626 NULL, NULL,
627 (unsigned char *)(ogm2 + 1), 627 (unsigned char *)(ogm2 + 1),
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b6cfa78e9381..760c0de72582 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
492 /* this is an hash collision with the temporary selected node. Choose 492 /* this is an hash collision with the temporary selected node. Choose
493 * the one with the lowest address 493 * the one with the lowest address
494 */ 494 */
495 if ((tmp_max == max) && max_orig_node && 495 if (tmp_max == max && max_orig_node &&
496 (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) 496 batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
497 goto out; 497 goto out;
498 498
499 ret = true; 499 ret = true;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de9955d5224d..10d521f0b17f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
248 } 248 }
249 } 249 }
250 250
251 if ((curr_gw) && (!next_gw)) { 251 if (curr_gw && !next_gw) {
252 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 252 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
253 "Removing selected gateway - no gateway in range\n"); 253 "Removing selected gateway - no gateway in range\n");
254 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, 254 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL,
255 NULL); 255 NULL);
256 } else if ((!curr_gw) && (next_gw)) { 256 } else if (!curr_gw && next_gw) {
257 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 257 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
258 "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n", 258 "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
259 next_gw->orig_node->orig, 259 next_gw->orig_node->orig,
@@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
411 goto out; 411 goto out;
412 } 412 }
413 413
414 if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) && 414 if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) &&
415 (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))) 415 gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))
416 goto out; 416 goto out;
417 417
418 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 418 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 33940c5c74a8..2c26039c23fc 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
56 if (strncasecmp(tmp_ptr, "mbit", 4) == 0) 56 if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
57 bw_unit_type = BATADV_BW_UNIT_MBIT; 57 bw_unit_type = BATADV_BW_UNIT_MBIT;
58 58
59 if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || 59 if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
60 (bw_unit_type == BATADV_BW_UNIT_MBIT)) 60 bw_unit_type == BATADV_BW_UNIT_MBIT)
61 *tmp_ptr = '\0'; 61 *tmp_ptr = '\0';
62 } 62 }
63 63
@@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
190 if (!up_new) 190 if (!up_new)
191 up_new = 1; 191 up_new = 1;
192 192
193 if ((down_curr == down_new) && (up_curr == up_new)) 193 if (down_curr == down_new && up_curr == up_new)
194 return count; 194 return count;
195 195
196 batadv_gw_reselect(bat_priv); 196 batadv_gw_reselect(bat_priv);
@@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
224 /* only fetch the tvlv value if the handler wasn't called via the 224 /* only fetch the tvlv value if the handler wasn't called via the
225 * CIFNOTFND flag and if there is data to fetch 225 * CIFNOTFND flag and if there is data to fetch
226 */ 226 */
227 if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) || 227 if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND ||
228 (tvlv_value_len < sizeof(gateway))) { 228 tvlv_value_len < sizeof(gateway)) {
229 gateway.bandwidth_down = 0; 229 gateway.bandwidth_down = 0;
230 gateway.bandwidth_up = 0; 230 gateway.bandwidth_up = 0;
231 } else { 231 } else {
232 gateway_ptr = tvlv_value; 232 gateway_ptr = tvlv_value;
233 gateway.bandwidth_down = gateway_ptr->bandwidth_down; 233 gateway.bandwidth_down = gateway_ptr->bandwidth_down;
234 gateway.bandwidth_up = gateway_ptr->bandwidth_up; 234 gateway.bandwidth_up = gateway_ptr->bandwidth_up;
235 if ((gateway.bandwidth_down == 0) || 235 if (gateway.bandwidth_down == 0 ||
236 (gateway.bandwidth_up == 0)) { 236 gateway.bandwidth_up == 0) {
237 gateway.bandwidth_down = 0; 237 gateway.bandwidth_down = 0;
238 gateway.bandwidth_up = 0; 238 gateway.bandwidth_up = 0;
239 } 239 }
@@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
242 batadv_gw_node_update(bat_priv, orig, &gateway); 242 batadv_gw_node_update(bat_priv, orig, &gateway);
243 243
244 /* restart gateway selection */ 244 /* restart gateway selection */
245 if ((gateway.bandwidth_down != 0) && 245 if (gateway.bandwidth_down != 0 &&
246 (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)) 246 atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)
247 batadv_gw_check_election(bat_priv, orig); 247 batadv_gw_check_election(bat_priv, orig);
248} 248}
249 249
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e348f76ea8c1..4e3d5340ad96 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
504 504
505 rcu_read_lock(); 505 rcu_read_lock();
506 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 506 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
507 if ((hard_iface->if_status != BATADV_IF_ACTIVE) && 507 if (hard_iface->if_status != BATADV_IF_ACTIVE &&
508 (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) 508 hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
509 continue; 509 continue;
510 510
511 if (hard_iface->net_dev == net_dev) 511 if (hard_iface->net_dev == net_dev)
@@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
568 568
569 rcu_read_lock(); 569 rcu_read_lock();
570 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 570 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
571 if ((hard_iface->if_status != BATADV_IF_ACTIVE) && 571 if (hard_iface->if_status != BATADV_IF_ACTIVE &&
572 (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) 572 hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
573 continue; 573 continue;
574 574
575 if (hard_iface->soft_iface != soft_iface) 575 if (hard_iface->soft_iface != soft_iface)
@@ -654,8 +654,8 @@ out:
654static void 654static void
655batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) 655batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
656{ 656{
657 if ((hard_iface->if_status != BATADV_IF_ACTIVE) && 657 if (hard_iface->if_status != BATADV_IF_ACTIVE &&
658 (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) 658 hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
659 return; 659 return;
660 660
661 hard_iface->if_status = BATADV_IF_INACTIVE; 661 hard_iface->if_status = BATADV_IF_INACTIVE;
@@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
738 bat_priv = netdev_priv(hard_iface->soft_iface); 738 bat_priv = netdev_priv(hard_iface->soft_iface);
739 739
740 ret = netdev_master_upper_dev_link(hard_iface->net_dev, 740 ret = netdev_master_upper_dev_link(hard_iface->net_dev,
741 soft_iface, NULL, NULL); 741 soft_iface, NULL, NULL, NULL);
742 if (ret) 742 if (ret)
743 goto err_dev; 743 goto err_dev;
744 744
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 8ead292886d1..bded31121d12 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
132 size_t packet_len; 132 size_t packet_len;
133 int error; 133 int error;
134 134
135 if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0)) 135 if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0)
136 return -EAGAIN; 136 return -EAGAIN;
137 137
138 if ((!buf) || (count < sizeof(struct batadv_icmp_packet))) 138 if (!buf || count < sizeof(struct batadv_icmp_packet))
139 return -EINVAL; 139 return -EINVAL;
140 140
141 if (!access_ok(VERIFY_WRITE, buf, count)) 141 if (!access_ok(VERIFY_WRITE, buf, count))
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fb381fb26a66..4daed7ad46f2 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -73,8 +73,8 @@
73 * list traversals just rcu-locked 73 * list traversals just rcu-locked
74 */ 74 */
75struct list_head batadv_hardif_list; 75struct list_head batadv_hardif_list;
76static int (*batadv_rx_handler[256])(struct sk_buff *, 76static int (*batadv_rx_handler[256])(struct sk_buff *skb,
77 struct batadv_hard_iface *); 77 struct batadv_hard_iface *recv_if);
78 78
79unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 79unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
80 80
@@ -540,12 +540,12 @@ batadv_recv_handler_register(u8 packet_type,
540 int (*recv_handler)(struct sk_buff *, 540 int (*recv_handler)(struct sk_buff *,
541 struct batadv_hard_iface *)) 541 struct batadv_hard_iface *))
542{ 542{
543 int (*curr)(struct sk_buff *, 543 int (*curr)(struct sk_buff *skb,
544 struct batadv_hard_iface *); 544 struct batadv_hard_iface *recv_if);
545 curr = batadv_rx_handler[packet_type]; 545 curr = batadv_rx_handler[packet_type];
546 546
547 if ((curr != batadv_recv_unhandled_packet) && 547 if (curr != batadv_recv_unhandled_packet &&
548 (curr != batadv_recv_unhandled_unicast_packet)) 548 curr != batadv_recv_unhandled_unicast_packet)
549 return -EBUSY; 549 return -EBUSY;
550 550
551 batadv_rx_handler[packet_type] = recv_handler; 551 batadv_rx_handler[packet_type] = recv_handler;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 05cc7637c064..edb2f239d04d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
24#define BATADV_DRIVER_DEVICE "batman-adv" 24#define BATADV_DRIVER_DEVICE "batman-adv"
25 25
26#ifndef BATADV_SOURCE_VERSION 26#ifndef BATADV_SOURCE_VERSION
27#define BATADV_SOURCE_VERSION "2017.3" 27#define BATADV_SOURCE_VERSION "2017.4"
28#endif 28#endif
29 29
30/* B.A.T.M.A.N. parameters */ 30/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d327670641ac..e553a8770a89 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
1126 bool orig_initialized; 1126 bool orig_initialized;
1127 1127
1128 if (orig_mcast_enabled && tvlv_value && 1128 if (orig_mcast_enabled && tvlv_value &&
1129 (tvlv_value_len >= sizeof(mcast_flags))) 1129 tvlv_value_len >= sizeof(mcast_flags))
1130 mcast_flags = *(u8 *)tvlv_value; 1130 mcast_flags = *(u8 *)tvlv_value;
1131 1131
1132 spin_lock_bh(&orig->mcast_handler_lock); 1132 spin_lock_bh(&orig->mcast_handler_lock);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8e2a4b205257..2967b86c13da 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
1062 continue; 1062 continue;
1063 1063
1064 /* don't purge if the interface is not (going) down */ 1064 /* don't purge if the interface is not (going) down */
1065 if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && 1065 if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
1066 (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && 1066 if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
1067 (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) 1067 if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
1068 continue; 1068 continue;
1069 1069
1070 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1070 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
1106 continue; 1106 continue;
1107 1107
1108 /* don't purge if the interface is not (going) down */ 1108 /* don't purge if the interface is not (going) down */
1109 if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && 1109 if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
1110 (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && 1110 if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
1111 (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) 1111 if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
1112 continue; 1112 continue;
1113 1113
1114 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1114 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
1155 last_seen = neigh_node->last_seen; 1155 last_seen = neigh_node->last_seen;
1156 if_incoming = neigh_node->if_incoming; 1156 if_incoming = neigh_node->if_incoming;
1157 1157
1158 if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) || 1158 if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) ||
1159 (if_incoming->if_status == BATADV_IF_INACTIVE) || 1159 if_incoming->if_status == BATADV_IF_INACTIVE ||
1160 (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || 1160 if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
1161 (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) { 1161 if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) {
1162 if ((if_incoming->if_status == BATADV_IF_INACTIVE) || 1162 if (if_incoming->if_status == BATADV_IF_INACTIVE ||
1163 (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || 1163 if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
1164 (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) 1164 if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)
1165 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1165 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1166 "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n", 1166 "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n",
1167 orig_node->orig, neigh_node->addr, 1167 orig_node->orig, neigh_node->addr,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f10e3ff26f9d..40d9bf3e5bfe 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
93 batadv_orig_ifinfo_put(orig_ifinfo); 93 batadv_orig_ifinfo_put(orig_ifinfo);
94 94
95 /* route deleted */ 95 /* route deleted */
96 if ((curr_router) && (!neigh_node)) { 96 if (curr_router && !neigh_node) {
97 batadv_dbg(BATADV_DBG_ROUTES, bat_priv, 97 batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
98 "Deleting route towards: %pM\n", orig_node->orig); 98 "Deleting route towards: %pM\n", orig_node->orig);
99 batadv_tt_global_del_orig(bat_priv, orig_node, -1, 99 batadv_tt_global_del_orig(bat_priv, orig_node, -1,
100 "Deleted route towards originator"); 100 "Deleted route towards originator");
101 101
102 /* route added */ 102 /* route added */
103 } else if ((!curr_router) && (neigh_node)) { 103 } else if (!curr_router && neigh_node) {
104 batadv_dbg(BATADV_DBG_ROUTES, bat_priv, 104 batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
105 "Adding route towards: %pM (via %pM)\n", 105 "Adding route towards: %pM (via %pM)\n",
106 orig_node->orig, neigh_node->addr); 106 orig_node->orig, neigh_node->addr);
@@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
381 /* add record route information if not full */ 381 /* add record route information if not full */
382 if ((icmph->msg_type == BATADV_ECHO_REPLY || 382 if ((icmph->msg_type == BATADV_ECHO_REPLY ||
383 icmph->msg_type == BATADV_ECHO_REQUEST) && 383 icmph->msg_type == BATADV_ECHO_REQUEST) &&
384 (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { 384 skb->len >= sizeof(struct batadv_icmp_packet_rr)) {
385 if (skb_linearize(skb) < 0) 385 if (skb_linearize(skb) < 0)
386 goto free_skb; 386 goto free_skb;
387 387
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 054a65e6eb68..7895323fd2a7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
142#ifdef CONFIG_BATMAN_ADV_BATMAN_V 142#ifdef CONFIG_BATMAN_ADV_BATMAN_V
143 hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr); 143 hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
144 144
145 if ((hardif_neigh) && (ret != NET_XMIT_DROP)) 145 if (hardif_neigh && ret != NET_XMIT_DROP)
146 hardif_neigh->bat_v.last_unicast_tx = jiffies; 146 hardif_neigh->bat_v.last_unicast_tx = jiffies;
147 147
148 if (hardif_neigh) 148 if (hardif_neigh)
@@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
615 * we delete only packets belonging to the given interface 615 * we delete only packets belonging to the given interface
616 */ 616 */
617 if (hard_iface && 617 if (hard_iface &&
618 (forw_packet->if_incoming != hard_iface) && 618 forw_packet->if_incoming != hard_iface &&
619 (forw_packet->if_outgoing != hard_iface)) 619 forw_packet->if_outgoing != hard_iface)
620 continue; 620 continue;
621 621
622 hlist_del(&forw_packet->list); 622 hlist_del(&forw_packet->list);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 10f7edfb176e..9f673cdfecf8 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
69 int result; 69 int result;
70 70
71 /* TODO: We must check if we can release all references to non-payload 71 /* TODO: We must check if we can release all references to non-payload
72 * data using skb_header_release in our skbs to allow skb_cow_header to 72 * data using __skb_header_release in our skbs to allow skb_cow_header
73 * work optimally. This means that those skbs are not allowed to read 73 * to work optimally. This means that those skbs are not allowed to read
74 * or write any data which is before the current position of skb->data 74 * or write any data which is before the current position of skb->data
75 * after that call and thus allow other skbs with the same data buffer 75 * after that call and thus allow other skbs with the same data buffer
76 * to write freely in that area. 76 * to write freely in that area.
@@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
160static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) 160static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
161{ 161{
162 /* check ranges */ 162 /* check ranges */
163 if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev))) 163 if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
164 return -EINVAL; 164 return -EINVAL;
165 165
166 dev->mtu = new_mtu; 166 dev->mtu = new_mtu;
@@ -863,11 +863,13 @@ free_bat_counters:
863 * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface 863 * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
864 * @dev: batadv_soft_interface used as master interface 864 * @dev: batadv_soft_interface used as master interface
865 * @slave_dev: net_device which should become the slave interface 865 * @slave_dev: net_device which should become the slave interface
866 * @extack: extended ACK report struct
866 * 867 *
867 * Return: 0 if successful or error otherwise. 868 * Return: 0 if successful or error otherwise.
868 */ 869 */
869static int batadv_softif_slave_add(struct net_device *dev, 870static int batadv_softif_slave_add(struct net_device *dev,
870 struct net_device *slave_dev) 871 struct net_device *slave_dev,
872 struct netlink_ext_ack *extack)
871{ 873{
872 struct batadv_hard_iface *hard_iface; 874 struct batadv_hard_iface *hard_iface;
873 struct net *net = dev_net(dev); 875 struct net *net = dev_net(dev);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0ae8b30e4eaa..aa187fd42475 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
925 if (hard_iface->if_status == status_tmp) 925 if (hard_iface->if_status == status_tmp)
926 goto out; 926 goto out;
927 927
928 if ((hard_iface->soft_iface) && 928 if (hard_iface->soft_iface &&
929 (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) 929 strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)
930 goto out; 930 goto out;
931 931
932 if (status_tmp == BATADV_IF_NOT_IN_USE) { 932 if (status_tmp == BATADV_IF_NOT_IN_USE) {
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bfe8effe9238..15cd2139381e 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -488,9 +488,9 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and 488 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
489 * reset the cwnd to 3*MSS 489 * reset the cwnd to 3*MSS
490 */ 490 */
491static void batadv_tp_sender_timeout(unsigned long arg) 491static void batadv_tp_sender_timeout(struct timer_list *t)
492{ 492{
493 struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; 493 struct batadv_tp_vars *tp_vars = from_timer(tp_vars, t, timer);
494 struct batadv_priv *bat_priv = tp_vars->bat_priv; 494 struct batadv_priv *bat_priv = tp_vars->bat_priv;
495 495
496 if (atomic_read(&tp_vars->sending) == 0) 496 if (atomic_read(&tp_vars->sending) == 0)
@@ -1020,8 +1020,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
1020 atomic64_set(&tp_vars->tot_sent, 0); 1020 atomic64_set(&tp_vars->tot_sent, 0);
1021 1021
1022 kref_get(&tp_vars->refcount); 1022 kref_get(&tp_vars->refcount);
1023 setup_timer(&tp_vars->timer, batadv_tp_sender_timeout, 1023 timer_setup(&tp_vars->timer, batadv_tp_sender_timeout, 0);
1024 (unsigned long)tp_vars);
1025 1024
1026 tp_vars->bat_priv = bat_priv; 1025 tp_vars->bat_priv = bat_priv;
1027 tp_vars->start_time = jiffies; 1026 tp_vars->start_time = jiffies;
@@ -1109,9 +1108,9 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
1109 * reached without received ack 1108 * reached without received ack
1110 * @arg: address of the related tp_vars 1109 * @arg: address of the related tp_vars
1111 */ 1110 */
1112static void batadv_tp_receiver_shutdown(unsigned long arg) 1111static void batadv_tp_receiver_shutdown(struct timer_list *t)
1113{ 1112{
1114 struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; 1113 struct batadv_tp_vars *tp_vars = from_timer(tp_vars, t, timer);
1115 struct batadv_tp_unacked *un, *safe; 1114 struct batadv_tp_unacked *un, *safe;
1116 struct batadv_priv *bat_priv; 1115 struct batadv_priv *bat_priv;
1117 1116
@@ -1206,7 +1205,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
1206 1205
1207 /* send the ack */ 1206 /* send the ack */
1208 r = batadv_send_skb_to_orig(skb, orig_node, NULL); 1207 r = batadv_send_skb_to_orig(skb, orig_node, NULL);
1209 if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { 1208 if (unlikely(r < 0) || r == NET_XMIT_DROP) {
1210 ret = BATADV_TP_REASON_DST_UNREACHABLE; 1209 ret = BATADV_TP_REASON_DST_UNREACHABLE;
1211 goto out; 1210 goto out;
1212 } 1211 }
@@ -1373,8 +1372,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv,
1373 hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); 1372 hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
1374 1373
1375 kref_get(&tp_vars->refcount); 1374 kref_get(&tp_vars->refcount);
1376 setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown, 1375 timer_setup(&tp_vars->timer, batadv_tp_receiver_shutdown, 0);
1377 (unsigned long)tp_vars);
1378 1376
1379 batadv_tp_reset_receiver_timer(tp_vars); 1377 batadv_tp_reset_receiver_timer(tp_vars);
1380 1378
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index aad994edd3bb..51c2cf2d8923 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -573,7 +573,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
573 hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, 573 hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK,
574 &mgr->l2cap_conn->hcon->dst); 574 &mgr->l2cap_conn->hcon->dst);
575 if (!hcon) { 575 if (!hcon) {
576 BT_ERR("No phys link exist"); 576 bt_dev_err(hdev, "no phys link exist");
577 rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS; 577 rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS;
578 goto clean; 578 goto clean;
579 } 579 }
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index ebcab5bbadd7..78bec8df8525 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -187,7 +187,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
187 187
188 /* Legacy key */ 188 /* Legacy key */
189 if (conn->key_type < 3) { 189 if (conn->key_type < 3) {
190 BT_ERR("Legacy key type %d", conn->key_type); 190 bt_dev_err(hdev, "legacy key type %d", conn->key_type);
191 return -EACCES; 191 return -EACCES;
192 } 192 }
193 193
@@ -207,7 +207,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
207 /* Derive Generic AMP Link Key (gamp) */ 207 /* Derive Generic AMP Link Key (gamp) */
208 err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key); 208 err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key);
209 if (err) { 209 if (err) {
210 BT_ERR("Could not derive Generic AMP Key: err %d", err); 210 bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err);
211 return err; 211 return err;
212 } 212 }
213 213
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index c7b1a9aee579..2155ce802877 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -23,7 +23,6 @@
23#include "ecdh_helper.h" 23#include "ecdh_helper.h"
24 24
25#include <linux/scatterlist.h> 25#include <linux/scatterlist.h>
26#include <crypto/kpp.h>
27#include <crypto/ecdh.h> 26#include <crypto/ecdh.h>
28 27
29struct ecdh_completion { 28struct ecdh_completion {
@@ -50,55 +49,35 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
50 out[i] = __swab64(in[ndigits - 1 - i]); 49 out[i] = __swab64(in[ndigits - 1 - i]);
51} 50}
52 51
53bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], 52/* compute_ecdh_secret() - function assumes that the private key was
54 u8 secret[32]) 53 * already set.
54 * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
55 * @public_key: pair's ecc public key.
56 * secret: memory where the ecdh computed shared secret will be saved.
57 *
58 * Return: zero on success; error code in case of error.
59 */
60int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
61 u8 secret[32])
55{ 62{
56 struct crypto_kpp *tfm;
57 struct kpp_request *req; 63 struct kpp_request *req;
58 struct ecdh p; 64 u8 *tmp;
59 struct ecdh_completion result; 65 struct ecdh_completion result;
60 struct scatterlist src, dst; 66 struct scatterlist src, dst;
61 u8 *tmp, *buf; 67 int err;
62 unsigned int buf_len;
63 int err = -ENOMEM;
64 68
65 tmp = kmalloc(64, GFP_KERNEL); 69 tmp = kmalloc(64, GFP_KERNEL);
66 if (!tmp) 70 if (!tmp)
67 return false; 71 return -ENOMEM;
68 72
69 tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); 73 req = kpp_request_alloc(tfm, GFP_KERNEL);
70 if (IS_ERR(tfm)) { 74 if (!req) {
71 pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", 75 err = -ENOMEM;
72 PTR_ERR(tfm));
73 goto free_tmp; 76 goto free_tmp;
74 } 77 }
75 78
76 req = kpp_request_alloc(tfm, GFP_KERNEL);
77 if (!req)
78 goto free_kpp;
79
80 init_completion(&result.completion); 79 init_completion(&result.completion);
81 80
82 /* Security Manager Protocol holds digits in litte-endian order
83 * while ECC API expect big-endian data
84 */
85 swap_digits((u64 *)private_key, (u64 *)tmp, 4);
86 p.key = (char *)tmp;
87 p.key_size = 32;
88 /* Set curve_id */
89 p.curve_id = ECC_CURVE_NIST_P256;
90 buf_len = crypto_ecdh_key_len(&p);
91 buf = kmalloc(buf_len, GFP_KERNEL);
92 if (!buf)
93 goto free_req;
94
95 crypto_ecdh_encode_key(buf, buf_len, &p);
96
97 /* Set A private Key */
98 err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len);
99 if (err)
100 goto free_all;
101
102 swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */ 81 swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */
103 swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */ 82 swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */
104 83
@@ -123,104 +102,129 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
123 memcpy(secret, tmp, 32); 102 memcpy(secret, tmp, 32);
124 103
125free_all: 104free_all:
126 kzfree(buf);
127free_req:
128 kpp_request_free(req); 105 kpp_request_free(req);
129free_kpp:
130 crypto_free_kpp(tfm);
131free_tmp: 106free_tmp:
132 kfree(tmp); 107 kzfree(tmp);
133 return (err == 0); 108 return err;
134} 109}
135 110
136bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]) 111/* set_ecdh_privkey() - set or generate ecc private key.
112 *
113 * Function generates an ecc private key in the crypto subsystem when receiving
114 * a NULL private key or sets the received key when not NULL.
115 *
116 * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
117 * @private_key: user's ecc private key. When not NULL, the key is expected
118 * in little endian format.
119 *
120 * Return: zero on success; error code in case of error.
121 */
122int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32])
123{
124 u8 *buf, *tmp = NULL;
125 unsigned int buf_len;
126 int err;
127 struct ecdh p = {0};
128
129 p.curve_id = ECC_CURVE_NIST_P256;
130
131 if (private_key) {
132 tmp = kmalloc(32, GFP_KERNEL);
133 if (!tmp)
134 return -ENOMEM;
135 swap_digits((u64 *)private_key, (u64 *)tmp, 4);
136 p.key = tmp;
137 p.key_size = 32;
138 }
139
140 buf_len = crypto_ecdh_key_len(&p);
141 buf = kmalloc(buf_len, GFP_KERNEL);
142 if (!buf) {
143 err = -ENOMEM;
144 goto free_tmp;
145 }
146
147 err = crypto_ecdh_encode_key(buf, buf_len, &p);
148 if (err)
149 goto free_all;
150
151 err = crypto_kpp_set_secret(tfm, buf, buf_len);
152 /* fall through */
153free_all:
154 kzfree(buf);
155free_tmp:
156 kzfree(tmp);
157 return err;
158}
159
160/* generate_ecdh_public_key() - function assumes that the private key was
161 * already set.
162 *
163 * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
164 * @public_key: memory where the computed ecc public key will be saved.
165 *
166 * Return: zero on success; error code in case of error.
167 */
168int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64])
137{ 169{
138 struct crypto_kpp *tfm;
139 struct kpp_request *req; 170 struct kpp_request *req;
140 struct ecdh p; 171 u8 *tmp;
141 struct ecdh_completion result; 172 struct ecdh_completion result;
142 struct scatterlist dst; 173 struct scatterlist dst;
143 u8 *tmp, *buf; 174 int err;
144 unsigned int buf_len;
145 int err = -ENOMEM;
146 const unsigned short max_tries = 16;
147 unsigned short tries = 0;
148 175
149 tmp = kmalloc(64, GFP_KERNEL); 176 tmp = kmalloc(64, GFP_KERNEL);
150 if (!tmp) 177 if (!tmp)
151 return false; 178 return -ENOMEM;
152 179
153 tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); 180 req = kpp_request_alloc(tfm, GFP_KERNEL);
154 if (IS_ERR(tfm)) { 181 if (!req) {
155 pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", 182 err = -ENOMEM;
156 PTR_ERR(tfm));
157 goto free_tmp; 183 goto free_tmp;
158 } 184 }
159 185
160 req = kpp_request_alloc(tfm, GFP_KERNEL);
161 if (!req)
162 goto free_kpp;
163
164 init_completion(&result.completion); 186 init_completion(&result.completion);
187 sg_init_one(&dst, tmp, 64);
188 kpp_request_set_input(req, NULL, 0);
189 kpp_request_set_output(req, &dst, 64);
190 kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
191 ecdh_complete, &result);
165 192
166 /* Set curve_id */ 193 err = crypto_kpp_generate_public_key(req);
167 p.curve_id = ECC_CURVE_NIST_P256; 194 if (err == -EINPROGRESS) {
168 p.key_size = 32; 195 wait_for_completion(&result.completion);
169 buf_len = crypto_ecdh_key_len(&p); 196 err = result.err;
170 buf = kmalloc(buf_len, GFP_KERNEL); 197 }
171 if (!buf) 198 if (err < 0)
172 goto free_req; 199 goto free_all;
173 200
174 do { 201 /* The public key is handed back in little endian as expected by
175 if (tries++ >= max_tries) 202 * the Security Manager Protocol.
176 goto free_all;
177
178 /* Set private Key */
179 p.key = (char *)private_key;
180 crypto_ecdh_encode_key(buf, buf_len, &p);
181 err = crypto_kpp_set_secret(tfm, buf, buf_len);
182 if (err)
183 goto free_all;
184
185 sg_init_one(&dst, tmp, 64);
186 kpp_request_set_input(req, NULL, 0);
187 kpp_request_set_output(req, &dst, 64);
188 kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
189 ecdh_complete, &result);
190
191 err = crypto_kpp_generate_public_key(req);
192
193 if (err == -EINPROGRESS) {
194 wait_for_completion(&result.completion);
195 err = result.err;
196 }
197
198 /* Private key is not valid. Regenerate */
199 if (err == -EINVAL)
200 continue;
201
202 if (err < 0)
203 goto free_all;
204 else
205 break;
206
207 } while (true);
208
209 /* Keys are handed back in little endian as expected by Security
210 * Manager Protocol
211 */ 203 */
212 swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */ 204 swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */
213 swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */ 205 swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */
214 swap_digits((u64 *)private_key, (u64 *)tmp, 4);
215 memcpy(private_key, tmp, 32);
216 206
217free_all: 207free_all:
218 kzfree(buf);
219free_req:
220 kpp_request_free(req); 208 kpp_request_free(req);
221free_kpp:
222 crypto_free_kpp(tfm);
223free_tmp: 209free_tmp:
224 kfree(tmp); 210 kfree(tmp);
225 return (err == 0); 211 return err;
212}
213
214/* generate_ecdh_keys() - generate ecc key pair.
215 *
216 * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
217 * @public_key: memory where the computed ecc public key will be saved.
218 *
219 * Return: zero on success; error code in case of error.
220 */
221int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64])
222{
223 int err;
224
225 err = set_ecdh_privkey(tfm, NULL);
226 if (err)
227 return err;
228
229 return generate_ecdh_public_key(tfm, public_key);
226} 230}
diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
index 7a423faf76e5..a6f8d03d4aaf 100644
--- a/net/bluetooth/ecdh_helper.h
+++ b/net/bluetooth/ecdh_helper.h
@@ -20,8 +20,11 @@
20 * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 20 * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
21 * SOFTWARE IS DISCLAIMED. 21 * SOFTWARE IS DISCLAIMED.
22 */ 22 */
23#include <crypto/kpp.h>
23#include <linux/types.h> 24#include <linux/types.h>
24 25
25bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32], 26int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64],
26 u8 secret[32]); 27 u8 secret[32]);
27bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]); 28int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
29int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]);
30int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index dc59eae54717..a9682534c377 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -729,8 +729,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
729 goto done; 729 goto done;
730 } 730 }
731 731
732 BT_ERR("HCI request failed to create LE connection: status 0x%2.2x", 732 bt_dev_err(hdev, "request failed to create LE connection: "
733 status); 733 "status 0x%2.2x", status);
734 734
735 if (!conn) 735 if (!conn)
736 goto done; 736 goto done;
@@ -907,7 +907,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
907 */ 907 */
908 if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && 908 if (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
909 hdev->le_scan_type == LE_SCAN_ACTIVE) { 909 hdev->le_scan_type == LE_SCAN_ACTIVE) {
910 skb_queue_purge(&req.cmd_q); 910 hci_req_purge(&req);
911 hci_conn_del(conn); 911 hci_conn_del(conn);
912 return ERR_PTR(-EBUSY); 912 return ERR_PTR(-EBUSY);
913 } 913 }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 6bc679cd3481..40d260f2bea5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -267,7 +267,7 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt)
267 amp_init1(req); 267 amp_init1(req);
268 break; 268 break;
269 default: 269 default:
270 BT_ERR("Unknown device type %d", hdev->dev_type); 270 bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
271 break; 271 break;
272 } 272 }
273 273
@@ -2150,8 +2150,7 @@ static void hci_error_reset(struct work_struct *work)
2150 if (hdev->hw_error) 2150 if (hdev->hw_error)
2151 hdev->hw_error(hdev, hdev->hw_error_code); 2151 hdev->hw_error(hdev, hdev->hw_error_code);
2152 else 2152 else
2153 BT_ERR("%s hardware error 0x%2.2x", hdev->name, 2153 bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
2154 hdev->hw_error_code);
2155 2154
2156 if (hci_dev_do_close(hdev)) 2155 if (hci_dev_do_close(hdev))
2157 return; 2156 return;
@@ -2524,9 +2523,9 @@ static void hci_cmd_timeout(struct work_struct *work)
2524 struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; 2523 struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
2525 u16 opcode = __le16_to_cpu(sent->opcode); 2524 u16 opcode = __le16_to_cpu(sent->opcode);
2526 2525
2527 BT_ERR("%s command 0x%4.4x tx timeout", hdev->name, opcode); 2526 bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode);
2528 } else { 2527 } else {
2529 BT_ERR("%s command tx timeout", hdev->name); 2528 bt_dev_err(hdev, "command tx timeout");
2530 } 2529 }
2531 2530
2532 atomic_set(&hdev->cmd_cnt, 1); 2531 atomic_set(&hdev->cmd_cnt, 1);
@@ -2858,7 +2857,7 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
2858 2857
2859 params = kzalloc(sizeof(*params), GFP_KERNEL); 2858 params = kzalloc(sizeof(*params), GFP_KERNEL);
2860 if (!params) { 2859 if (!params) {
2861 BT_ERR("Out of memory"); 2860 bt_dev_err(hdev, "out of memory");
2862 return NULL; 2861 return NULL;
2863 } 2862 }
2864 2863
@@ -3393,7 +3392,7 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
3393 3392
3394 err = hdev->send(hdev, skb); 3393 err = hdev->send(hdev, skb);
3395 if (err < 0) { 3394 if (err < 0) {
3396 BT_ERR("%s sending frame failed (%d)", hdev->name, err); 3395 bt_dev_err(hdev, "sending frame failed (%d)", err);
3397 kfree_skb(skb); 3396 kfree_skb(skb);
3398 } 3397 }
3399} 3398}
@@ -3408,7 +3407,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
3408 3407
3409 skb = hci_prepare_cmd(hdev, opcode, plen, param); 3408 skb = hci_prepare_cmd(hdev, opcode, plen, param);
3410 if (!skb) { 3409 if (!skb) {
3411 BT_ERR("%s no memory for command", hdev->name); 3410 bt_dev_err(hdev, "no memory for command");
3412 return -ENOMEM; 3411 return -ENOMEM;
3413 } 3412 }
3414 3413
@@ -3493,7 +3492,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
3493 hci_add_acl_hdr(skb, chan->handle, flags); 3492 hci_add_acl_hdr(skb, chan->handle, flags);
3494 break; 3493 break;
3495 default: 3494 default:
3496 BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type); 3495 bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
3497 return; 3496 return;
3498 } 3497 }
3499 3498
@@ -3618,7 +3617,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
3618 break; 3617 break;
3619 default: 3618 default:
3620 cnt = 0; 3619 cnt = 0;
3621 BT_ERR("Unknown link type"); 3620 bt_dev_err(hdev, "unknown link type %d", conn->type);
3622 } 3621 }
3623 3622
3624 q = cnt / num; 3623 q = cnt / num;
@@ -3635,15 +3634,15 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
3635 struct hci_conn_hash *h = &hdev->conn_hash; 3634 struct hci_conn_hash *h = &hdev->conn_hash;
3636 struct hci_conn *c; 3635 struct hci_conn *c;
3637 3636
3638 BT_ERR("%s link tx timeout", hdev->name); 3637 bt_dev_err(hdev, "link tx timeout");
3639 3638
3640 rcu_read_lock(); 3639 rcu_read_lock();
3641 3640
3642 /* Kill stalled connections */ 3641 /* Kill stalled connections */
3643 list_for_each_entry_rcu(c, &h->list, list) { 3642 list_for_each_entry_rcu(c, &h->list, list) {
3644 if (c->type == type && c->sent) { 3643 if (c->type == type && c->sent) {
3645 BT_ERR("%s killing stalled connection %pMR", 3644 bt_dev_err(hdev, "killing stalled connection %pMR",
3646 hdev->name, &c->dst); 3645 &c->dst);
3647 hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); 3646 hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
3648 } 3647 }
3649 } 3648 }
@@ -3724,7 +3723,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
3724 break; 3723 break;
3725 default: 3724 default:
3726 cnt = 0; 3725 cnt = 0;
3727 BT_ERR("Unknown link type"); 3726 bt_dev_err(hdev, "unknown link type %d", chan->conn->type);
3728 } 3727 }
3729 3728
3730 q = cnt / num; 3729 q = cnt / num;
@@ -4066,8 +4065,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
4066 l2cap_recv_acldata(conn, skb, flags); 4065 l2cap_recv_acldata(conn, skb, flags);
4067 return; 4066 return;
4068 } else { 4067 } else {
4069 BT_ERR("%s ACL packet for unknown connection handle %d", 4068 bt_dev_err(hdev, "ACL packet for unknown connection handle %d",
4070 hdev->name, handle); 4069 handle);
4071 } 4070 }
4072 4071
4073 kfree_skb(skb); 4072 kfree_skb(skb);
@@ -4097,8 +4096,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
4097 sco_recv_scodata(conn, skb); 4096 sco_recv_scodata(conn, skb);
4098 return; 4097 return;
4099 } else { 4098 } else {
4100 BT_ERR("%s SCO packet for unknown connection handle %d", 4099 bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
4101 hdev->name, handle); 4100 handle);
4102 } 4101 }
4103 4102
4104 kfree_skb(skb); 4103 kfree_skb(skb);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0b4dba08a14e..cd3bbb766c24 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1188,7 +1188,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
1188 break; 1188 break;
1189 1189
1190 default: 1190 default:
1191 BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable); 1191 bt_dev_err(hdev, "use of reserved LE_Scan_Enable param %d",
1192 cp->enable);
1192 break; 1193 break;
1193 } 1194 }
1194 1195
@@ -1485,7 +1486,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
1485 conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr, 1486 conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr,
1486 HCI_ROLE_MASTER); 1487 HCI_ROLE_MASTER);
1487 if (!conn) 1488 if (!conn)
1488 BT_ERR("No memory for new connection"); 1489 bt_dev_err(hdev, "no memory for new connection");
1489 } 1490 }
1490 } 1491 }
1491 1492
@@ -2269,7 +2270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
2269 conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, 2270 conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
2270 HCI_ROLE_SLAVE); 2271 HCI_ROLE_SLAVE);
2271 if (!conn) { 2272 if (!conn) {
2272 BT_ERR("No memory for new connection"); 2273 bt_dev_err(hdev, "no memory for new connection");
2273 hci_dev_unlock(hdev); 2274 hci_dev_unlock(hdev);
2274 return; 2275 return;
2275 } 2276 }
@@ -2431,7 +2432,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2431 2432
2432 if (!hci_conn_ssp_enabled(conn) && 2433 if (!hci_conn_ssp_enabled(conn) &&
2433 test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) { 2434 test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
2434 BT_INFO("re-auth of legacy device is not possible."); 2435 bt_dev_info(hdev, "re-auth of legacy device is not possible.");
2435 } else { 2436 } else {
2436 set_bit(HCI_CONN_AUTH, &conn->flags); 2437 set_bit(HCI_CONN_AUTH, &conn->flags);
2437 conn->sec_level = conn->pending_sec_level; 2438 conn->sec_level = conn->pending_sec_level;
@@ -2535,8 +2536,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
2535 BT_DBG("%s status 0x%02x", hdev->name, status); 2536 BT_DBG("%s status 0x%02x", hdev->name, status);
2536 2537
2537 if (!skb || skb->len < sizeof(*rp)) { 2538 if (!skb || skb->len < sizeof(*rp)) {
2538 BT_ERR("%s invalid HCI Read Encryption Key Size response", 2539 bt_dev_err(hdev, "invalid read key size response");
2539 hdev->name);
2540 return; 2540 return;
2541 } 2541 }
2542 2542
@@ -2554,8 +2554,8 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
2554 * supported. 2554 * supported.
2555 */ 2555 */
2556 if (rp->status) { 2556 if (rp->status) {
2557 BT_ERR("%s failed to read key size for handle %u", hdev->name, 2557 bt_dev_err(hdev, "failed to read key size for handle %u",
2558 handle); 2558 handle);
2559 conn->enc_key_size = HCI_LINK_KEY_SIZE; 2559 conn->enc_key_size = HCI_LINK_KEY_SIZE;
2560 } else { 2560 } else {
2561 conn->enc_key_size = rp->key_size; 2561 conn->enc_key_size = rp->key_size;
@@ -2664,7 +2664,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
2664 hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); 2664 hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
2665 2665
2666 if (hci_req_run_skb(&req, read_enc_key_size_complete)) { 2666 if (hci_req_run_skb(&req, read_enc_key_size_complete)) {
2667 BT_ERR("Sending HCI Read Encryption Key Size failed"); 2667 bt_dev_err(hdev, "sending read key size failed");
2668 conn->enc_key_size = HCI_LINK_KEY_SIZE; 2668 conn->enc_key_size = HCI_LINK_KEY_SIZE;
2669 goto notify; 2669 goto notify;
2670 } 2670 }
@@ -3197,7 +3197,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
3197 int i; 3197 int i;
3198 3198
3199 if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { 3199 if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
3200 BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); 3200 bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
3201 return; 3201 return;
3202 } 3202 }
3203 3203
@@ -3249,7 +3249,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
3249 break; 3249 break;
3250 3250
3251 default: 3251 default:
3252 BT_ERR("Unknown type %d conn %p", conn->type, conn); 3252 bt_dev_err(hdev, "unknown type %d conn %p",
3253 conn->type, conn);
3253 break; 3254 break;
3254 } 3255 }
3255 } 3256 }
@@ -3271,7 +3272,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
3271 return chan->conn; 3272 return chan->conn;
3272 break; 3273 break;
3273 default: 3274 default:
3274 BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type); 3275 bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
3275 break; 3276 break;
3276 } 3277 }
3277 3278
@@ -3284,7 +3285,7 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
3284 int i; 3285 int i;
3285 3286
3286 if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) { 3287 if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) {
3287 BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); 3288 bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
3288 return; 3289 return;
3289 } 3290 }
3290 3291
@@ -3320,7 +3321,8 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
3320 break; 3321 break;
3321 3322
3322 default: 3323 default:
3323 BT_ERR("Unknown type %d conn %p", conn->type, conn); 3324 bt_dev_err(hdev, "unknown type %d conn %p",
3325 conn->type, conn);
3324 break; 3326 break;
3325 } 3327 }
3326 } 3328 }
@@ -4479,7 +4481,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
4479 if (!conn) { 4481 if (!conn) {
4480 conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role); 4482 conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role);
4481 if (!conn) { 4483 if (!conn) {
4482 BT_ERR("No memory for new connection"); 4484 bt_dev_err(hdev, "no memory for new connection");
4483 goto unlock; 4485 goto unlock;
4484 } 4486 }
4485 4487
@@ -4749,8 +4751,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
4749 case LE_ADV_SCAN_RSP: 4751 case LE_ADV_SCAN_RSP:
4750 break; 4752 break;
4751 default: 4753 default:
4752 BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x", 4754 bt_dev_err_ratelimited(hdev, "unknown advertising packet "
4753 type); 4755 "type: 0x%02x", type);
4754 return; 4756 return;
4755 } 4757 }
4756 4758
@@ -4769,8 +4771,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
4769 4771
4770 /* Adjust for actual length */ 4772 /* Adjust for actual length */
4771 if (len != real_len) { 4773 if (len != real_len) {
4772 BT_ERR_RATELIMITED("%s advertising data length corrected", 4774 bt_dev_err_ratelimited(hdev, "advertising data len corrected");
4773 hdev->name);
4774 len = real_len; 4775 len = real_len;
4775 } 4776 }
4776 4777
@@ -5192,7 +5193,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
5192 return false; 5193 return false;
5193 5194
5194 if (skb->len < sizeof(*hdr)) { 5195 if (skb->len < sizeof(*hdr)) {
5195 BT_ERR("Too short HCI event"); 5196 bt_dev_err(hdev, "too short HCI event");
5196 return false; 5197 return false;
5197 } 5198 }
5198 5199
@@ -5206,12 +5207,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
5206 } 5207 }
5207 5208
5208 if (hdr->evt != HCI_EV_CMD_COMPLETE) { 5209 if (hdr->evt != HCI_EV_CMD_COMPLETE) {
5209 BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); 5210 bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)",
5211 hdr->evt);
5210 return false; 5212 return false;
5211 } 5213 }
5212 5214
5213 if (skb->len < sizeof(*ev)) { 5215 if (skb->len < sizeof(*ev)) {
5214 BT_ERR("Too short cmd_complete event"); 5216 bt_dev_err(hdev, "too short cmd_complete event");
5215 return false; 5217 return false;
5216 } 5218 }
5217 5219
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b73ac149de34..abc0f3224dd1 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -41,6 +41,11 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
41 req->err = 0; 41 req->err = 0;
42} 42}
43 43
44void hci_req_purge(struct hci_request *req)
45{
46 skb_queue_purge(&req->cmd_q);
47}
48
44static int req_run(struct hci_request *req, hci_req_complete_t complete, 49static int req_run(struct hci_request *req, hci_req_complete_t complete,
45 hci_req_complete_skb_t complete_skb) 50 hci_req_complete_skb_t complete_skb)
46{ 51{
@@ -331,8 +336,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
331 336
332 skb = hci_prepare_cmd(hdev, opcode, plen, param); 337 skb = hci_prepare_cmd(hdev, opcode, plen, param);
333 if (!skb) { 338 if (!skb) {
334 BT_ERR("%s no memory for command (opcode 0x%4.4x)", 339 bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
335 hdev->name, opcode); 340 opcode);
336 req->err = -ENOMEM; 341 req->err = -ENOMEM;
337 return; 342 return;
338 } 343 }
@@ -1421,7 +1426,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
1421 1426
1422 err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); 1427 err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
1423 if (err < 0) { 1428 if (err < 0) {
1424 BT_ERR("%s failed to generate new RPA", hdev->name); 1429 bt_dev_err(hdev, "failed to generate new RPA");
1425 return err; 1430 return err;
1426 } 1431 }
1427 1432
@@ -1783,7 +1788,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
1783 1788
1784 err = hci_req_run(&req, abort_conn_complete); 1789 err = hci_req_run(&req, abort_conn_complete);
1785 if (err && err != -ENODATA) { 1790 if (err && err != -ENODATA) {
1786 BT_ERR("Failed to run HCI request: err %d", err); 1791 bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err);
1787 return err; 1792 return err;
1788 } 1793 }
1789 1794
@@ -1867,7 +1872,8 @@ static void le_scan_disable_work(struct work_struct *work)
1867 1872
1868 hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); 1873 hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status);
1869 if (status) { 1874 if (status) {
1870 BT_ERR("Failed to disable LE scan: status 0x%02x", status); 1875 bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x",
1876 status);
1871 return; 1877 return;
1872 } 1878 }
1873 1879
@@ -1898,7 +1904,7 @@ static void le_scan_disable_work(struct work_struct *work)
1898 hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN, 1904 hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN,
1899 HCI_CMD_TIMEOUT, &status); 1905 HCI_CMD_TIMEOUT, &status);
1900 if (status) { 1906 if (status) {
1901 BT_ERR("Inquiry failed: status 0x%02x", status); 1907 bt_dev_err(hdev, "inquiry failed: status 0x%02x", status);
1902 goto discov_stopped; 1908 goto discov_stopped;
1903 } 1909 }
1904 1910
@@ -1940,7 +1946,8 @@ static void le_scan_restart_work(struct work_struct *work)
1940 1946
1941 hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); 1947 hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
1942 if (status) { 1948 if (status) {
1943 BT_ERR("Failed to restart LE scan: status %d", status); 1949 bt_dev_err(hdev, "failed to restart LE scan: status %d",
1950 status);
1944 return; 1951 return;
1945 } 1952 }
1946 1953
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index dde77bd59f91..702beb140d9f 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -36,6 +36,7 @@ struct hci_request {
36}; 36};
37 37
38void hci_req_init(struct hci_request *req, struct hci_dev *hdev); 38void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
39void hci_req_purge(struct hci_request *req);
39int hci_req_run(struct hci_request *req, hci_req_complete_t complete); 40int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
40int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); 41int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
41void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, 42void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 65d734c165bd..923e9a271872 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
251} 251}
252 252
253/* Send frame to sockets with specific channel */ 253/* Send frame to sockets with specific channel */
254void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, 254static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
255 int flag, struct sock *skip_sk) 255 int flag, struct sock *skip_sk)
256{ 256{
257 struct sock *sk; 257 struct sock *sk;
258 258
259 BT_DBG("channel %u len %d", channel, skb->len); 259 BT_DBG("channel %u len %d", channel, skb->len);
260 260
261 read_lock(&hci_sk_list.lock);
262
263 sk_for_each(sk, &hci_sk_list.head) { 261 sk_for_each(sk, &hci_sk_list.head) {
264 struct sk_buff *nskb; 262 struct sk_buff *nskb;
265 263
@@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
285 kfree_skb(nskb); 283 kfree_skb(nskb);
286 } 284 }
287 285
286}
287
288void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
289 int flag, struct sock *skip_sk)
290{
291 read_lock(&hci_sk_list.lock);
292 __hci_send_to_channel(channel, skb, flag, skip_sk);
288 read_unlock(&hci_sk_list.lock); 293 read_unlock(&hci_sk_list.lock);
289} 294}
290 295
@@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
388 hdr->index = index; 393 hdr->index = index;
389 hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); 394 hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
390 395
391 hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, 396 __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
392 HCI_SOCK_TRUSTED, NULL); 397 HCI_SOCK_TRUSTED, NULL);
393 kfree_skb(skb); 398 kfree_skb(skb);
394 } 399 }
395 400
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index d5c7c89ec4d6..9874844a95a9 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -51,7 +51,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
51 dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); 51 dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
52 52
53 if (device_add(&conn->dev) < 0) { 53 if (device_add(&conn->dev) < 0) {
54 BT_ERR("Failed to register connection device"); 54 bt_dev_err(hdev, "failed to register connection device");
55 return; 55 return;
56 } 56 }
57 57
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 8112893037bd..f2cec70d520c 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -398,9 +398,9 @@ static int hidp_raw_request(struct hid_device *hid, unsigned char reportnum,
398 } 398 }
399} 399}
400 400
401static void hidp_idle_timeout(unsigned long arg) 401static void hidp_idle_timeout(struct timer_list *t)
402{ 402{
403 struct hidp_session *session = (struct hidp_session *) arg; 403 struct hidp_session *session = from_timer(session, t, timer);
404 404
405 /* The HIDP user-space API only contains calls to add and remove 405 /* The HIDP user-space API only contains calls to add and remove
406 * devices. There is no way to forward events of any kind. Therefore, 406 * devices. There is no way to forward events of any kind. Therefore,
@@ -944,8 +944,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
944 944
945 /* device management */ 945 /* device management */
946 INIT_WORK(&session->dev_init, hidp_session_dev_work); 946 INIT_WORK(&session->dev_init, hidp_session_dev_work);
947 setup_timer(&session->timer, hidp_idle_timeout, 947 timer_setup(&session->timer, hidp_idle_timeout, 0);
948 (unsigned long)session);
949 948
950 /* session data */ 949 /* session data */
951 mutex_init(&session->report_mutex); 950 mutex_init(&session->report_mutex);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1fba2a03f8ae..6e9fc86d8daf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2159,8 +2159,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
2159 2159
2160 key_count = __le16_to_cpu(cp->key_count); 2160 key_count = __le16_to_cpu(cp->key_count);
2161 if (key_count > max_key_count) { 2161 if (key_count > max_key_count) {
2162 BT_ERR("load_link_keys: too big key_count value %u", 2162 bt_dev_err(hdev, "load_link_keys: too big key_count value %u",
2163 key_count); 2163 key_count);
2164 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 2164 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
2165 MGMT_STATUS_INVALID_PARAMS); 2165 MGMT_STATUS_INVALID_PARAMS);
2166 } 2166 }
@@ -2168,8 +2168,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
2168 expected_len = sizeof(*cp) + key_count * 2168 expected_len = sizeof(*cp) + key_count *
2169 sizeof(struct mgmt_link_key_info); 2169 sizeof(struct mgmt_link_key_info);
2170 if (expected_len != len) { 2170 if (expected_len != len) {
2171 BT_ERR("load_link_keys: expected %u bytes, got %u bytes", 2171 bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes",
2172 expected_len, len); 2172 expected_len, len);
2173 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 2173 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
2174 MGMT_STATUS_INVALID_PARAMS); 2174 MGMT_STATUS_INVALID_PARAMS);
2175 } 2175 }
@@ -2561,7 +2561,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data,
2561 2561
2562 memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr)); 2562 memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr));
2563 2563
2564 BT_ERR("PIN code is not 16 bytes long"); 2564 bt_dev_err(hdev, "PIN code is not 16 bytes long");
2565 2565
2566 err = send_pin_code_neg_reply(sk, hdev, &ncp); 2566 err = send_pin_code_neg_reply(sk, hdev, &ncp);
2567 if (err >= 0) 2567 if (err >= 0)
@@ -3391,7 +3391,8 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
3391 MGMT_OP_ADD_REMOTE_OOB_DATA, 3391 MGMT_OP_ADD_REMOTE_OOB_DATA,
3392 status, &cp->addr, sizeof(cp->addr)); 3392 status, &cp->addr, sizeof(cp->addr));
3393 } else { 3393 } else {
3394 BT_ERR("add_remote_oob_data: invalid length of %u bytes", len); 3394 bt_dev_err(hdev, "add_remote_oob_data: invalid len of %u bytes",
3395 len);
3395 err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, 3396 err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
3396 MGMT_STATUS_INVALID_PARAMS); 3397 MGMT_STATUS_INVALID_PARAMS);
3397 } 3398 }
@@ -3604,8 +3605,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
3604 3605
3605 uuid_count = __le16_to_cpu(cp->uuid_count); 3606 uuid_count = __le16_to_cpu(cp->uuid_count);
3606 if (uuid_count > max_uuid_count) { 3607 if (uuid_count > max_uuid_count) {
3607 BT_ERR("service_discovery: too big uuid_count value %u", 3608 bt_dev_err(hdev, "service_discovery: too big uuid_count value %u",
3608 uuid_count); 3609 uuid_count);
3609 err = mgmt_cmd_complete(sk, hdev->id, 3610 err = mgmt_cmd_complete(sk, hdev->id,
3610 MGMT_OP_START_SERVICE_DISCOVERY, 3611 MGMT_OP_START_SERVICE_DISCOVERY,
3611 MGMT_STATUS_INVALID_PARAMS, &cp->type, 3612 MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3615,8 +3616,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
3615 3616
3616 expected_len = sizeof(*cp) + uuid_count * 16; 3617 expected_len = sizeof(*cp) + uuid_count * 16;
3617 if (expected_len != len) { 3618 if (expected_len != len) {
3618 BT_ERR("service_discovery: expected %u bytes, got %u bytes", 3619 bt_dev_err(hdev, "service_discovery: expected %u bytes, got %u bytes",
3619 expected_len, len); 3620 expected_len, len);
3620 err = mgmt_cmd_complete(sk, hdev->id, 3621 err = mgmt_cmd_complete(sk, hdev->id,
3621 MGMT_OP_START_SERVICE_DISCOVERY, 3622 MGMT_OP_START_SERVICE_DISCOVERY,
3622 MGMT_STATUS_INVALID_PARAMS, &cp->type, 3623 MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3943,7 +3944,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
3943 err = hci_req_run(&req, enable_advertising_instance); 3944 err = hci_req_run(&req, enable_advertising_instance);
3944 3945
3945 if (err) 3946 if (err)
3946 BT_ERR("Failed to re-configure advertising"); 3947 bt_dev_err(hdev, "failed to re-configure advertising");
3947 3948
3948unlock: 3949unlock:
3949 hci_dev_unlock(hdev); 3950 hci_dev_unlock(hdev);
@@ -4664,15 +4665,16 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
4664 4665
4665 irk_count = __le16_to_cpu(cp->irk_count); 4666 irk_count = __le16_to_cpu(cp->irk_count);
4666 if (irk_count > max_irk_count) { 4667 if (irk_count > max_irk_count) {
4667 BT_ERR("load_irks: too big irk_count value %u", irk_count); 4668 bt_dev_err(hdev, "load_irks: too big irk_count value %u",
4669 irk_count);
4668 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, 4670 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
4669 MGMT_STATUS_INVALID_PARAMS); 4671 MGMT_STATUS_INVALID_PARAMS);
4670 } 4672 }
4671 4673
4672 expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info); 4674 expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
4673 if (expected_len != len) { 4675 if (expected_len != len) {
4674 BT_ERR("load_irks: expected %u bytes, got %u bytes", 4676 bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes",
4675 expected_len, len); 4677 expected_len, len);
4676 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, 4678 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
4677 MGMT_STATUS_INVALID_PARAMS); 4679 MGMT_STATUS_INVALID_PARAMS);
4678 } 4680 }
@@ -4745,7 +4747,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
4745 4747
4746 key_count = __le16_to_cpu(cp->key_count); 4748 key_count = __le16_to_cpu(cp->key_count);
4747 if (key_count > max_key_count) { 4749 if (key_count > max_key_count) {
4748 BT_ERR("load_ltks: too big key_count value %u", key_count); 4750 bt_dev_err(hdev, "load_ltks: too big key_count value %u",
4751 key_count);
4749 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 4752 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
4750 MGMT_STATUS_INVALID_PARAMS); 4753 MGMT_STATUS_INVALID_PARAMS);
4751 } 4754 }
@@ -4753,8 +4756,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
4753 expected_len = sizeof(*cp) + key_count * 4756 expected_len = sizeof(*cp) + key_count *
4754 sizeof(struct mgmt_ltk_info); 4757 sizeof(struct mgmt_ltk_info);
4755 if (expected_len != len) { 4758 if (expected_len != len) {
4756 BT_ERR("load_keys: expected %u bytes, got %u bytes", 4759 bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes",
4757 expected_len, len); 4760 expected_len, len);
4758 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 4761 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
4759 MGMT_STATUS_INVALID_PARAMS); 4762 MGMT_STATUS_INVALID_PARAMS);
4760 } 4763 }
@@ -4873,14 +4876,15 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
4873 } 4876 }
4874 4877
4875 if (!cp) { 4878 if (!cp) {
4876 BT_ERR("invalid sent_cmd in conn_info response"); 4879 bt_dev_err(hdev, "invalid sent_cmd in conn_info response");
4877 goto unlock; 4880 goto unlock;
4878 } 4881 }
4879 4882
4880 handle = __le16_to_cpu(cp->handle); 4883 handle = __le16_to_cpu(cp->handle);
4881 conn = hci_conn_hash_lookup_handle(hdev, handle); 4884 conn = hci_conn_hash_lookup_handle(hdev, handle);
4882 if (!conn) { 4885 if (!conn) {
4883 BT_ERR("unknown handle (%d) in conn_info response", handle); 4886 bt_dev_err(hdev, "unknown handle (%d) in conn_info response",
4887 handle);
4884 goto unlock; 4888 goto unlock;
4885 } 4889 }
4886 4890
@@ -5477,8 +5481,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
5477 5481
5478 param_count = __le16_to_cpu(cp->param_count); 5482 param_count = __le16_to_cpu(cp->param_count);
5479 if (param_count > max_param_count) { 5483 if (param_count > max_param_count) {
5480 BT_ERR("load_conn_param: too big param_count value %u", 5484 bt_dev_err(hdev, "load_conn_param: too big param_count value %u",
5481 param_count); 5485 param_count);
5482 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 5486 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
5483 MGMT_STATUS_INVALID_PARAMS); 5487 MGMT_STATUS_INVALID_PARAMS);
5484 } 5488 }
@@ -5486,8 +5490,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
5486 expected_len = sizeof(*cp) + param_count * 5490 expected_len = sizeof(*cp) + param_count *
5487 sizeof(struct mgmt_conn_param); 5491 sizeof(struct mgmt_conn_param);
5488 if (expected_len != len) { 5492 if (expected_len != len) {
5489 BT_ERR("load_conn_param: expected %u bytes, got %u bytes", 5493 bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes",
5490 expected_len, len); 5494 expected_len, len);
5491 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 5495 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
5492 MGMT_STATUS_INVALID_PARAMS); 5496 MGMT_STATUS_INVALID_PARAMS);
5493 } 5497 }
@@ -5512,7 +5516,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
5512 } else if (param->addr.type == BDADDR_LE_RANDOM) { 5516 } else if (param->addr.type == BDADDR_LE_RANDOM) {
5513 addr_type = ADDR_LE_DEV_RANDOM; 5517 addr_type = ADDR_LE_DEV_RANDOM;
5514 } else { 5518 } else {
5515 BT_ERR("Ignoring invalid connection parameters"); 5519 bt_dev_err(hdev, "ignoring invalid connection parameters");
5516 continue; 5520 continue;
5517 } 5521 }
5518 5522
@@ -5525,14 +5529,14 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
5525 min, max, latency, timeout); 5529 min, max, latency, timeout);
5526 5530
5527 if (hci_check_conn_params(min, max, latency, timeout) < 0) { 5531 if (hci_check_conn_params(min, max, latency, timeout) < 0) {
5528 BT_ERR("Ignoring invalid connection parameters"); 5532 bt_dev_err(hdev, "ignoring invalid connection parameters");
5529 continue; 5533 continue;
5530 } 5534 }
5531 5535
5532 hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr, 5536 hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr,
5533 addr_type); 5537 addr_type);
5534 if (!hci_param) { 5538 if (!hci_param) {
5535 BT_ERR("Failed to add connection parameters"); 5539 bt_dev_err(hdev, "failed to add connection parameters");
5536 continue; 5540 continue;
5537 } 5541 }
5538 5542
@@ -6383,6 +6387,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
6383 if (skb_queue_empty(&req.cmd_q) || 6387 if (skb_queue_empty(&req.cmd_q) ||
6384 !hdev_is_powered(hdev) || 6388 !hdev_is_powered(hdev) ||
6385 hci_dev_test_flag(hdev, HCI_ADVERTISING)) { 6389 hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
6390 hci_req_purge(&req);
6386 rp.instance = cp->instance; 6391 rp.instance = cp->instance;
6387 err = mgmt_cmd_complete(sk, hdev->id, 6392 err = mgmt_cmd_complete(sk, hdev->id,
6388 MGMT_OP_REMOVE_ADVERTISING, 6393 MGMT_OP_REMOVE_ADVERTISING,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4a0b41d75c84..b98225d65e87 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -233,9 +233,9 @@ static int rfcomm_check_security(struct rfcomm_dlc *d)
233 d->out); 233 d->out);
234} 234}
235 235
236static void rfcomm_session_timeout(unsigned long arg) 236static void rfcomm_session_timeout(struct timer_list *t)
237{ 237{
238 struct rfcomm_session *s = (void *) arg; 238 struct rfcomm_session *s = from_timer(s, t, timer);
239 239
240 BT_DBG("session %p state %ld", s, s->state); 240 BT_DBG("session %p state %ld", s, s->state);
241 241
@@ -258,9 +258,9 @@ static void rfcomm_session_clear_timer(struct rfcomm_session *s)
258} 258}
259 259
260/* ---- RFCOMM DLCs ---- */ 260/* ---- RFCOMM DLCs ---- */
261static void rfcomm_dlc_timeout(unsigned long arg) 261static void rfcomm_dlc_timeout(struct timer_list *t)
262{ 262{
263 struct rfcomm_dlc *d = (void *) arg; 263 struct rfcomm_dlc *d = from_timer(d, t, timer);
264 264
265 BT_DBG("dlc %p state %ld", d, d->state); 265 BT_DBG("dlc %p state %ld", d, d->state);
266 266
@@ -307,7 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
307 if (!d) 307 if (!d)
308 return NULL; 308 return NULL;
309 309
310 setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d); 310 timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
311 311
312 skb_queue_head_init(&d->tx_queue); 312 skb_queue_head_init(&d->tx_queue);
313 mutex_init(&d->lock); 313 mutex_init(&d->lock);
@@ -650,7 +650,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
650 650
651 BT_DBG("session %p sock %p", s, sock); 651 BT_DBG("session %p sock %p", s, sock);
652 652
653 setup_timer(&s->timer, rfcomm_session_timeout, (unsigned long) s); 653 timer_setup(&s->timer, rfcomm_session_timeout, 0);
654 654
655 INIT_LIST_HEAD(&s->dlcs); 655 INIT_LIST_HEAD(&s->dlcs);
656 s->state = state; 656 s->state = state;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 795e920a3281..08df57665e1f 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -73,9 +73,9 @@ struct sco_pinfo {
73#define SCO_CONN_TIMEOUT (HZ * 40) 73#define SCO_CONN_TIMEOUT (HZ * 40)
74#define SCO_DISCONN_TIMEOUT (HZ * 2) 74#define SCO_DISCONN_TIMEOUT (HZ * 2)
75 75
76static void sco_sock_timeout(unsigned long arg) 76static void sco_sock_timeout(struct timer_list *t)
77{ 77{
78 struct sock *sk = (struct sock *)arg; 78 struct sock *sk = from_timer(sk, t, sk_timer);
79 79
80 BT_DBG("sock %p state %d", sk, sk->sk_state); 80 BT_DBG("sock %p state %d", sk, sk->sk_state);
81 81
@@ -487,7 +487,7 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
487 487
488 sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; 488 sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
489 489
490 setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk); 490 timer_setup(&sk->sk_timer, sco_sock_timeout, 0);
491 491
492 bt_sock_link(&sco_sk_list, sk); 492 bt_sock_link(&sco_sk_list, sk);
493 return sk; 493 return sk;
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index 34a1227f4391..03e3c89c3046 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -138,12 +138,12 @@ static const u8 dhkey_3[32] __initconst = {
138 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70, 138 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70,
139}; 139};
140 140
141static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], 141static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32],
142 const u8 pub_a[64], const u8 pub_b[64], 142 const u8 priv_b[32], const u8 pub_a[64],
143 const u8 dhkey[32]) 143 const u8 pub_b[64], const u8 dhkey[32])
144{ 144{
145 u8 *tmp, *dhkey_a, *dhkey_b; 145 u8 *tmp, *dhkey_a, *dhkey_b;
146 int ret = 0; 146 int ret;
147 147
148 tmp = kmalloc(64, GFP_KERNEL); 148 tmp = kmalloc(64, GFP_KERNEL);
149 if (!tmp) 149 if (!tmp)
@@ -152,17 +152,30 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
152 dhkey_a = &tmp[0]; 152 dhkey_a = &tmp[0];
153 dhkey_b = &tmp[32]; 153 dhkey_b = &tmp[32];
154 154
155 compute_ecdh_secret(pub_b, priv_a, dhkey_a); 155 ret = set_ecdh_privkey(tfm, priv_a);
156 compute_ecdh_secret(pub_a, priv_b, dhkey_b); 156 if (ret)
157 goto out;
158
159 ret = compute_ecdh_secret(tfm, pub_b, dhkey_a);
160 if (ret)
161 goto out;
157 162
158 if (memcmp(dhkey_a, dhkey, 32)) { 163 if (memcmp(dhkey_a, dhkey, 32)) {
159 ret = -EINVAL; 164 ret = -EINVAL;
160 goto out; 165 goto out;
161 } 166 }
162 167
168 ret = set_ecdh_privkey(tfm, priv_b);
169 if (ret)
170 goto out;
171
172 ret = compute_ecdh_secret(tfm, pub_a, dhkey_b);
173 if (ret)
174 goto out;
175
163 if (memcmp(dhkey_b, dhkey, 32)) 176 if (memcmp(dhkey_b, dhkey, 32))
164 ret = -EINVAL; 177 ret = -EINVAL;
165 178 /* fall through*/
166out: 179out:
167 kfree(tmp); 180 kfree(tmp);
168 return ret; 181 return ret;
@@ -185,30 +198,43 @@ static const struct file_operations test_ecdh_fops = {
185 198
186static int __init test_ecdh(void) 199static int __init test_ecdh(void)
187{ 200{
201 struct crypto_kpp *tfm;
188 ktime_t calltime, delta, rettime; 202 ktime_t calltime, delta, rettime;
189 unsigned long long duration; 203 unsigned long long duration = 0;
190 int err; 204 int err;
191 205
192 calltime = ktime_get(); 206 calltime = ktime_get();
193 207
194 err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1); 208 tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
209 if (IS_ERR(tfm)) {
210 BT_ERR("Unable to create ECDH crypto context");
211 err = PTR_ERR(tfm);
212 goto done;
213 }
214
215 err = test_ecdh_sample(tfm, priv_a_1, priv_b_1, pub_a_1, pub_b_1,
216 dhkey_1);
195 if (err) { 217 if (err) {
196 BT_ERR("ECDH sample 1 failed"); 218 BT_ERR("ECDH sample 1 failed");
197 goto done; 219 goto done;
198 } 220 }
199 221
200 err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2); 222 err = test_ecdh_sample(tfm, priv_a_2, priv_b_2, pub_a_2, pub_b_2,
223 dhkey_2);
201 if (err) { 224 if (err) {
202 BT_ERR("ECDH sample 2 failed"); 225 BT_ERR("ECDH sample 2 failed");
203 goto done; 226 goto done;
204 } 227 }
205 228
206 err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3); 229 err = test_ecdh_sample(tfm, priv_a_3, priv_a_3, pub_a_3, pub_a_3,
230 dhkey_3);
207 if (err) { 231 if (err) {
208 BT_ERR("ECDH sample 3 failed"); 232 BT_ERR("ECDH sample 3 failed");
209 goto done; 233 goto done;
210 } 234 }
211 235
236 crypto_free_kpp(tfm);
237
212 rettime = ktime_get(); 238 rettime = ktime_get();
213 delta = ktime_sub(rettime, calltime); 239 delta = ktime_sub(rettime, calltime);
214 duration = (unsigned long long) ktime_to_ns(delta) >> 10; 240 duration = (unsigned long long) ktime_to_ns(delta) >> 10;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a0ef89772c36..01117ae84f1d 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -26,6 +26,7 @@
26#include <crypto/algapi.h> 26#include <crypto/algapi.h>
27#include <crypto/b128ops.h> 27#include <crypto/b128ops.h>
28#include <crypto/hash.h> 28#include <crypto/hash.h>
29#include <crypto/kpp.h>
29 30
30#include <net/bluetooth/bluetooth.h> 31#include <net/bluetooth/bluetooth.h>
31#include <net/bluetooth/hci_core.h> 32#include <net/bluetooth/hci_core.h>
@@ -83,7 +84,6 @@ enum {
83struct smp_dev { 84struct smp_dev {
84 /* Secure Connections OOB data */ 85 /* Secure Connections OOB data */
85 u8 local_pk[64]; 86 u8 local_pk[64];
86 u8 local_sk[32];
87 u8 local_rand[16]; 87 u8 local_rand[16];
88 bool debug_key; 88 bool debug_key;
89 89
@@ -92,6 +92,7 @@ struct smp_dev {
92 92
93 struct crypto_cipher *tfm_aes; 93 struct crypto_cipher *tfm_aes;
94 struct crypto_shash *tfm_cmac; 94 struct crypto_shash *tfm_cmac;
95 struct crypto_kpp *tfm_ecdh;
95}; 96};
96 97
97struct smp_chan { 98struct smp_chan {
@@ -124,13 +125,13 @@ struct smp_chan {
124 125
125 /* Secure Connections variables */ 126 /* Secure Connections variables */
126 u8 local_pk[64]; 127 u8 local_pk[64];
127 u8 local_sk[32];
128 u8 remote_pk[64]; 128 u8 remote_pk[64];
129 u8 dhkey[32]; 129 u8 dhkey[32];
130 u8 mackey[16]; 130 u8 mackey[16];
131 131
132 struct crypto_cipher *tfm_aes; 132 struct crypto_cipher *tfm_aes;
133 struct crypto_shash *tfm_cmac; 133 struct crypto_shash *tfm_cmac;
134 struct crypto_kpp *tfm_ecdh;
134}; 135};
135 136
136/* These debug key values are defined in the SMP section of the core 137/* These debug key values are defined in the SMP section of the core
@@ -565,22 +566,22 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
565 566
566 if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { 567 if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
567 BT_DBG("Using debug keys"); 568 BT_DBG("Using debug keys");
569 err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk);
570 if (err)
571 return err;
568 memcpy(smp->local_pk, debug_pk, 64); 572 memcpy(smp->local_pk, debug_pk, 64);
569 memcpy(smp->local_sk, debug_sk, 32);
570 smp->debug_key = true; 573 smp->debug_key = true;
571 } else { 574 } else {
572 while (true) { 575 while (true) {
573 /* Seed private key with random number */ 576 /* Generate key pair for Secure Connections */
574 get_random_bytes(smp->local_sk, 32); 577 err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk);
575 578 if (err)
576 /* Generate local key pair for Secure Connections */ 579 return err;
577 if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
578 return -EIO;
579 580
580 /* This is unlikely, but we need to check that 581 /* This is unlikely, but we need to check that
581 * we didn't accidentially generate a debug key. 582 * we didn't accidentially generate a debug key.
582 */ 583 */
583 if (crypto_memneq(smp->local_sk, debug_sk, 32)) 584 if (crypto_memneq(smp->local_pk, debug_pk, 64))
584 break; 585 break;
585 } 586 }
586 smp->debug_key = false; 587 smp->debug_key = false;
@@ -588,7 +589,6 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
588 589
589 SMP_DBG("OOB Public Key X: %32phN", smp->local_pk); 590 SMP_DBG("OOB Public Key X: %32phN", smp->local_pk);
590 SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32); 591 SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32);
591 SMP_DBG("OOB Private Key: %32phN", smp->local_sk);
592 592
593 get_random_bytes(smp->local_rand, 16); 593 get_random_bytes(smp->local_rand, 16);
594 594
@@ -771,6 +771,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
771 771
772 crypto_free_cipher(smp->tfm_aes); 772 crypto_free_cipher(smp->tfm_aes);
773 crypto_free_shash(smp->tfm_cmac); 773 crypto_free_shash(smp->tfm_cmac);
774 crypto_free_kpp(smp->tfm_ecdh);
774 775
775 /* Ensure that we don't leave any debug key around if debug key 776 /* Ensure that we don't leave any debug key around if debug key
776 * support hasn't been explicitly enabled. 777 * support hasn't been explicitly enabled.
@@ -995,7 +996,8 @@ static u8 smp_random(struct smp_chan *smp)
995 return SMP_UNSPECIFIED; 996 return SMP_UNSPECIFIED;
996 997
997 if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) { 998 if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) {
998 BT_ERR("Pairing failed (confirmation values mismatch)"); 999 bt_dev_err(hcon->hdev, "pairing failed "
1000 "(confirmation values mismatch)");
999 return SMP_CONFIRM_FAILED; 1001 return SMP_CONFIRM_FAILED;
1000 } 1002 }
1001 1003
@@ -1209,7 +1211,7 @@ static void sc_generate_ltk(struct smp_chan *smp)
1209 1211
1210 key = hci_find_link_key(hdev, &hcon->dst); 1212 key = hci_find_link_key(hdev, &hcon->dst);
1211 if (!key) { 1213 if (!key) {
1212 BT_ERR("%s No Link Key found to generate LTK", hdev->name); 1214 bt_dev_err(hdev, "no Link Key found to generate LTK");
1213 return; 1215 return;
1214 } 1216 }
1215 1217
@@ -1391,16 +1393,19 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
1391 smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 1393 smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
1392 if (IS_ERR(smp->tfm_aes)) { 1394 if (IS_ERR(smp->tfm_aes)) {
1393 BT_ERR("Unable to create AES crypto context"); 1395 BT_ERR("Unable to create AES crypto context");
1394 kzfree(smp); 1396 goto zfree_smp;
1395 return NULL;
1396 } 1397 }
1397 1398
1398 smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0); 1399 smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
1399 if (IS_ERR(smp->tfm_cmac)) { 1400 if (IS_ERR(smp->tfm_cmac)) {
1400 BT_ERR("Unable to create CMAC crypto context"); 1401 BT_ERR("Unable to create CMAC crypto context");
1401 crypto_free_cipher(smp->tfm_aes); 1402 goto free_cipher;
1402 kzfree(smp); 1403 }
1403 return NULL; 1404
1405 smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
1406 if (IS_ERR(smp->tfm_ecdh)) {
1407 BT_ERR("Unable to create ECDH crypto context");
1408 goto free_shash;
1404 } 1409 }
1405 1410
1406 smp->conn = conn; 1411 smp->conn = conn;
@@ -1413,6 +1418,14 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
1413 hci_conn_hold(conn->hcon); 1418 hci_conn_hold(conn->hcon);
1414 1419
1415 return smp; 1420 return smp;
1421
1422free_shash:
1423 crypto_free_shash(smp->tfm_cmac);
1424free_cipher:
1425 crypto_free_cipher(smp->tfm_aes);
1426zfree_smp:
1427 kzfree(smp);
1428 return NULL;
1416} 1429}
1417 1430
1418static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) 1431static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16])
@@ -1883,7 +1896,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
1883 smp_dev = chan->data; 1896 smp_dev = chan->data;
1884 1897
1885 memcpy(smp->local_pk, smp_dev->local_pk, 64); 1898 memcpy(smp->local_pk, smp_dev->local_pk, 64);
1886 memcpy(smp->local_sk, smp_dev->local_sk, 32);
1887 memcpy(smp->lr, smp_dev->local_rand, 16); 1899 memcpy(smp->lr, smp_dev->local_rand, 16);
1888 1900
1889 if (smp_dev->debug_key) 1901 if (smp_dev->debug_key)
@@ -1894,22 +1906,20 @@ static u8 sc_send_public_key(struct smp_chan *smp)
1894 1906
1895 if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { 1907 if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
1896 BT_DBG("Using debug keys"); 1908 BT_DBG("Using debug keys");
1909 if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk))
1910 return SMP_UNSPECIFIED;
1897 memcpy(smp->local_pk, debug_pk, 64); 1911 memcpy(smp->local_pk, debug_pk, 64);
1898 memcpy(smp->local_sk, debug_sk, 32);
1899 set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); 1912 set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
1900 } else { 1913 } else {
1901 while (true) { 1914 while (true) {
1902 /* Seed private key with random number */ 1915 /* Generate key pair for Secure Connections */
1903 get_random_bytes(smp->local_sk, 32); 1916 if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk))
1904
1905 /* Generate local key pair for Secure Connections */
1906 if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
1907 return SMP_UNSPECIFIED; 1917 return SMP_UNSPECIFIED;
1908 1918
1909 /* This is unlikely, but we need to check that 1919 /* This is unlikely, but we need to check that
1910 * we didn't accidentially generate a debug key. 1920 * we didn't accidentially generate a debug key.
1911 */ 1921 */
1912 if (crypto_memneq(smp->local_sk, debug_sk, 32)) 1922 if (crypto_memneq(smp->local_pk, debug_pk, 64))
1913 break; 1923 break;
1914 } 1924 }
1915 } 1925 }
@@ -1917,7 +1927,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
1917done: 1927done:
1918 SMP_DBG("Local Public Key X: %32phN", smp->local_pk); 1928 SMP_DBG("Local Public Key X: %32phN", smp->local_pk);
1919 SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32); 1929 SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32);
1920 SMP_DBG("Local Private Key: %32phN", smp->local_sk);
1921 1930
1922 smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk); 1931 smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk);
1923 1932
@@ -2059,11 +2068,11 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
2059 return SMP_UNSPECIFIED; 2068 return SMP_UNSPECIFIED;
2060 2069
2061 if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { 2070 if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
2062 BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode"); 2071 bt_dev_err(hdev, "refusing legacy fallback in SC-only mode");
2063 return SMP_UNSPECIFIED; 2072 return SMP_UNSPECIFIED;
2064 } 2073 }
2065 2074
2066 BT_ERR("Trying to fall back to legacy SMP"); 2075 bt_dev_err(hdev, "trying to fall back to legacy SMP");
2067 2076
2068 req = (void *) &smp->preq[1]; 2077 req = (void *) &smp->preq[1];
2069 rsp = (void *) &smp->prsp[1]; 2078 rsp = (void *) &smp->prsp[1];
@@ -2074,7 +2083,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
2074 auth = req->auth_req & AUTH_REQ_MASK(hdev); 2083 auth = req->auth_req & AUTH_REQ_MASK(hdev);
2075 2084
2076 if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) { 2085 if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) {
2077 BT_ERR("Failed to fall back to legacy SMP"); 2086 bt_dev_err(hdev, "failed to fall back to legacy SMP");
2078 return SMP_UNSPECIFIED; 2087 return SMP_UNSPECIFIED;
2079 } 2088 }
2080 2089
@@ -2347,7 +2356,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
2347 2356
2348 chan = conn->smp; 2357 chan = conn->smp;
2349 if (!chan) { 2358 if (!chan) {
2350 BT_ERR("SMP security requested but not available"); 2359 bt_dev_err(hcon->hdev, "security requested but not available");
2351 return 1; 2360 return 1;
2352 } 2361 }
2353 2362
@@ -2540,7 +2549,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
2540 */ 2549 */
2541 if (!bacmp(&info->bdaddr, BDADDR_ANY) || 2550 if (!bacmp(&info->bdaddr, BDADDR_ANY) ||
2542 !hci_is_identity_address(&info->bdaddr, info->addr_type)) { 2551 !hci_is_identity_address(&info->bdaddr, info->addr_type)) {
2543 BT_ERR("Ignoring IRK with no identity address"); 2552 bt_dev_err(hcon->hdev, "ignoring IRK with no identity address");
2544 goto distribute; 2553 goto distribute;
2545 } 2554 }
2546 2555
@@ -2645,6 +2654,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
2645 struct l2cap_chan *chan = conn->smp; 2654 struct l2cap_chan *chan = conn->smp;
2646 struct smp_chan *smp = chan->data; 2655 struct smp_chan *smp = chan->data;
2647 struct hci_dev *hdev = hcon->hdev; 2656 struct hci_dev *hdev = hcon->hdev;
2657 struct crypto_kpp *tfm_ecdh;
2648 struct smp_cmd_pairing_confirm cfm; 2658 struct smp_cmd_pairing_confirm cfm;
2649 int err; 2659 int err;
2650 2660
@@ -2677,7 +2687,18 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
2677 SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk); 2687 SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk);
2678 SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32); 2688 SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32);
2679 2689
2680 if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey)) 2690 /* Compute the shared secret on the same crypto tfm on which the private
2691 * key was set/generated.
2692 */
2693 if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) {
2694 struct smp_dev *smp_dev = chan->data;
2695
2696 tfm_ecdh = smp_dev->tfm_ecdh;
2697 } else {
2698 tfm_ecdh = smp->tfm_ecdh;
2699 }
2700
2701 if (compute_ecdh_secret(tfm_ecdh, smp->remote_pk, smp->dhkey))
2681 return SMP_UNSPECIFIED; 2702 return SMP_UNSPECIFIED;
2682 2703
2683 SMP_DBG("DHKey %32phN", smp->dhkey); 2704 SMP_DBG("DHKey %32phN", smp->dhkey);
@@ -2933,8 +2954,8 @@ done:
2933 return err; 2954 return err;
2934 2955
2935drop: 2956drop:
2936 BT_ERR("%s unexpected SMP command 0x%02x from %pMR", hcon->hdev->name, 2957 bt_dev_err(hcon->hdev, "unexpected SMP command 0x%02x from %pMR",
2937 code, &hcon->dst); 2958 code, &hcon->dst);
2938 kfree_skb(skb); 2959 kfree_skb(skb);
2939 return 0; 2960 return 0;
2940} 2961}
@@ -3001,8 +3022,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
3001 3022
3002 smp = smp_chan_create(conn); 3023 smp = smp_chan_create(conn);
3003 if (!smp) { 3024 if (!smp) {
3004 BT_ERR("%s unable to create SMP context for BR/EDR", 3025 bt_dev_err(hdev, "unable to create SMP context for BR/EDR");
3005 hdev->name);
3006 return; 3026 return;
3007 } 3027 }
3008 3028
@@ -3169,6 +3189,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3169 struct smp_dev *smp; 3189 struct smp_dev *smp;
3170 struct crypto_cipher *tfm_aes; 3190 struct crypto_cipher *tfm_aes;
3171 struct crypto_shash *tfm_cmac; 3191 struct crypto_shash *tfm_cmac;
3192 struct crypto_kpp *tfm_ecdh;
3172 3193
3173 if (cid == L2CAP_CID_SMP_BREDR) { 3194 if (cid == L2CAP_CID_SMP_BREDR) {
3174 smp = NULL; 3195 smp = NULL;
@@ -3194,8 +3215,18 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3194 return ERR_CAST(tfm_cmac); 3215 return ERR_CAST(tfm_cmac);
3195 } 3216 }
3196 3217
3218 tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
3219 if (IS_ERR(tfm_ecdh)) {
3220 BT_ERR("Unable to create ECDH crypto context");
3221 crypto_free_shash(tfm_cmac);
3222 crypto_free_cipher(tfm_aes);
3223 kzfree(smp);
3224 return ERR_CAST(tfm_ecdh);
3225 }
3226
3197 smp->tfm_aes = tfm_aes; 3227 smp->tfm_aes = tfm_aes;
3198 smp->tfm_cmac = tfm_cmac; 3228 smp->tfm_cmac = tfm_cmac;
3229 smp->tfm_ecdh = tfm_ecdh;
3199 smp->min_key_size = SMP_MIN_ENC_KEY_SIZE; 3230 smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
3200 smp->max_key_size = SMP_MAX_ENC_KEY_SIZE; 3231 smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
3201 3232
@@ -3205,6 +3236,7 @@ create_chan:
3205 if (smp) { 3236 if (smp) {
3206 crypto_free_cipher(smp->tfm_aes); 3237 crypto_free_cipher(smp->tfm_aes);
3207 crypto_free_shash(smp->tfm_cmac); 3238 crypto_free_shash(smp->tfm_cmac);
3239 crypto_free_kpp(smp->tfm_ecdh);
3208 kzfree(smp); 3240 kzfree(smp);
3209 } 3241 }
3210 return ERR_PTR(-ENOMEM); 3242 return ERR_PTR(-ENOMEM);
@@ -3252,6 +3284,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
3252 chan->data = NULL; 3284 chan->data = NULL;
3253 crypto_free_cipher(smp->tfm_aes); 3285 crypto_free_cipher(smp->tfm_aes);
3254 crypto_free_shash(smp->tfm_cmac); 3286 crypto_free_shash(smp->tfm_cmac);
3287 crypto_free_kpp(smp->tfm_ecdh);
3255 kzfree(smp); 3288 kzfree(smp);
3256 } 3289 }
3257 3290
@@ -3490,25 +3523,18 @@ void smp_unregister(struct hci_dev *hdev)
3490 3523
3491#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) 3524#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
3492 3525
3493static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) 3526static int __init test_debug_key(struct crypto_kpp *tfm_ecdh)
3494{ 3527{
3495 int i; 3528 u8 pk[64];
3496 3529 int err;
3497 for (i = 0; i < ndigits; i++)
3498 out[i] = __swab64(in[ndigits - 1 - i]);
3499}
3500
3501static int __init test_debug_key(void)
3502{
3503 u8 pk[64], sk[32];
3504
3505 swap_digits((u64 *)debug_sk, (u64 *)sk, 4);
3506 3530
3507 if (!generate_ecdh_keys(pk, sk)) 3531 err = set_ecdh_privkey(tfm_ecdh, debug_sk);
3508 return -EINVAL; 3532 if (err)
3533 return err;
3509 3534
3510 if (crypto_memneq(sk, debug_sk, 32)) 3535 err = generate_ecdh_public_key(tfm_ecdh, pk);
3511 return -EINVAL; 3536 if (err)
3537 return err;
3512 3538
3513 if (crypto_memneq(pk, debug_pk, 64)) 3539 if (crypto_memneq(pk, debug_pk, 64))
3514 return -EINVAL; 3540 return -EINVAL;
@@ -3763,7 +3789,8 @@ static const struct file_operations test_smp_fops = {
3763}; 3789};
3764 3790
3765static int __init run_selftests(struct crypto_cipher *tfm_aes, 3791static int __init run_selftests(struct crypto_cipher *tfm_aes,
3766 struct crypto_shash *tfm_cmac) 3792 struct crypto_shash *tfm_cmac,
3793 struct crypto_kpp *tfm_ecdh)
3767{ 3794{
3768 ktime_t calltime, delta, rettime; 3795 ktime_t calltime, delta, rettime;
3769 unsigned long long duration; 3796 unsigned long long duration;
@@ -3771,7 +3798,7 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes,
3771 3798
3772 calltime = ktime_get(); 3799 calltime = ktime_get();
3773 3800
3774 err = test_debug_key(); 3801 err = test_debug_key(tfm_ecdh);
3775 if (err) { 3802 if (err) {
3776 BT_ERR("debug_key test failed"); 3803 BT_ERR("debug_key test failed");
3777 goto done; 3804 goto done;
@@ -3848,6 +3875,7 @@ int __init bt_selftest_smp(void)
3848{ 3875{
3849 struct crypto_cipher *tfm_aes; 3876 struct crypto_cipher *tfm_aes;
3850 struct crypto_shash *tfm_cmac; 3877 struct crypto_shash *tfm_cmac;
3878 struct crypto_kpp *tfm_ecdh;
3851 int err; 3879 int err;
3852 3880
3853 tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 3881 tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
@@ -3863,10 +3891,19 @@ int __init bt_selftest_smp(void)
3863 return PTR_ERR(tfm_cmac); 3891 return PTR_ERR(tfm_cmac);
3864 } 3892 }
3865 3893
3866 err = run_selftests(tfm_aes, tfm_cmac); 3894 tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
3895 if (IS_ERR(tfm_ecdh)) {
3896 BT_ERR("Unable to create ECDH crypto context");
3897 crypto_free_shash(tfm_cmac);
3898 crypto_free_cipher(tfm_aes);
3899 return PTR_ERR(tfm_ecdh);
3900 }
3901
3902 err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh);
3867 3903
3868 crypto_free_shash(tfm_cmac); 3904 crypto_free_shash(tfm_cmac);
3869 crypto_free_cipher(tfm_aes); 3905 crypto_free_cipher(tfm_aes);
3906 crypto_free_kpp(tfm_ecdh);
3870 3907
3871 return err; 3908 return err;
3872} 3909}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6be41a44d688..a86e6687026e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
133 if (is_l2) 133 if (is_l2)
134 __skb_push(skb, ETH_HLEN); 134 __skb_push(skb, ETH_HLEN);
135 if (is_direct_pkt_access) 135 if (is_direct_pkt_access)
136 bpf_compute_data_end(skb); 136 bpf_compute_data_pointers(skb);
137 retval = bpf_test_run(prog, skb, repeat, &duration); 137 retval = bpf_test_run(prog, skb, repeat, &duration);
138 if (!is_l2) 138 if (!is_l2)
139 __skb_push(skb, ETH_HLEN); 139 __skb_push(skb, ETH_HLEN);
@@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
162 162
163 xdp.data_hard_start = data; 163 xdp.data_hard_start = data;
164 xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN; 164 xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
165 xdp.data_meta = xdp.data;
165 xdp.data_end = xdp.data + size; 166 xdp.data_end = xdp.data + size;
166 167
167 retval = bpf_test_run(prog, &xdp, repeat, &duration); 168 retval = bpf_test_run(prog, &xdp, repeat, &duration);
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index f760e62a672a..ac9ef337f0fa 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o
8bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ 8bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
9 br_ioctl.o br_stp.o br_stp_bpdu.o \ 9 br_ioctl.o br_stp.o br_stp_bpdu.o \
10 br_stp_if.o br_stp_timer.o br_netlink.o \ 10 br_stp_if.o br_stp_timer.o br_netlink.o \
11 br_netlink_tunnel.o 11 br_netlink_tunnel.o br_arp_nd_proxy.o
12 12
13bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o 13bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
14 14
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1407d1ba7577..6bf06e756df2 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -112,7 +112,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
112 /* Events that may cause spanning tree to refresh */ 112 /* Events that may cause spanning tree to refresh */
113 if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || 113 if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
114 event == NETDEV_CHANGE || event == NETDEV_DOWN) 114 event == NETDEV_CHANGE || event == NETDEV_DOWN)
115 br_ifinfo_notify(RTM_NEWLINK, p); 115 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
116 116
117 return NOTIFY_DONE; 117 return NOTIFY_DONE;
118} 118}
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
new file mode 100644
index 000000000000..2cf7716254be
--- /dev/null
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -0,0 +1,469 @@
1/*
2 * Handle bridge arp/nd proxy/suppress
3 *
4 * Copyright (C) 2017 Cumulus Networks
5 * Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com>
6 *
7 * Authors:
8 * Roopa Prabhu <roopa@cumulusnetworks.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/kernel.h>
17#include <linux/netdevice.h>
18#include <linux/etherdevice.h>
19#include <linux/neighbour.h>
20#include <net/arp.h>
21#include <linux/if_vlan.h>
22#include <linux/inetdevice.h>
23#include <net/addrconf.h>
24#if IS_ENABLED(CONFIG_IPV6)
25#include <net/ip6_checksum.h>
26#endif
27
28#include "br_private.h"
29
30void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
31{
32 struct net_bridge_port *p;
33 bool neigh_suppress = false;
34
35 list_for_each_entry(p, &br->port_list, list) {
36 if (p->flags & BR_NEIGH_SUPPRESS) {
37 neigh_suppress = true;
38 break;
39 }
40 }
41
42 br->neigh_suppress_enabled = neigh_suppress;
43}
44
45#if IS_ENABLED(CONFIG_INET)
46static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
47 struct net_device *dev, __be32 dest_ip, __be32 src_ip,
48 const unsigned char *dest_hw,
49 const unsigned char *src_hw,
50 const unsigned char *target_hw,
51 __be16 vlan_proto, u16 vlan_tci)
52{
53 struct net_bridge_vlan_group *vg;
54 struct sk_buff *skb;
55 u16 pvid;
56
57 netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n",
58 dev->name, &dest_ip, dest_hw, &src_ip, src_hw);
59
60 if (!vlan_tci) {
61 arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
62 dest_hw, src_hw, target_hw);
63 return;
64 }
65
66 skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
67 dest_hw, src_hw, target_hw);
68 if (!skb)
69 return;
70
71 if (p)
72 vg = nbp_vlan_group_rcu(p);
73 else
74 vg = br_vlan_group_rcu(br);
75 pvid = br_get_pvid(vg);
76 if (pvid == (vlan_tci & VLAN_VID_MASK))
77 vlan_tci = 0;
78
79 if (vlan_tci)
80 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
81
82 if (p) {
83 arp_xmit(skb);
84 } else {
85 skb_reset_mac_header(skb);
86 __skb_pull(skb, skb_network_offset(skb));
87 skb->ip_summed = CHECKSUM_UNNECESSARY;
88 skb->pkt_type = PACKET_HOST;
89
90 netif_rx_ni(skb);
91 }
92}
93
94static int br_chk_addr_ip(struct net_device *dev, void *data)
95{
96 __be32 ip = *(__be32 *)data;
97 struct in_device *in_dev;
98 __be32 addr = 0;
99
100 in_dev = __in_dev_get_rcu(dev);
101 if (in_dev)
102 addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip,
103 RT_SCOPE_HOST);
104
105 if (addr == ip)
106 return 1;
107
108 return 0;
109}
110
111static bool br_is_local_ip(struct net_device *dev, __be32 ip)
112{
113 if (br_chk_addr_ip(dev, &ip))
114 return true;
115
116 /* check if ip is configured on upper dev */
117 if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip))
118 return true;
119
120 return false;
121}
122
123void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
124 u16 vid, struct net_bridge_port *p)
125{
126 struct net_device *dev = br->dev;
127 struct net_device *vlandev = dev;
128 struct neighbour *n;
129 struct arphdr *parp;
130 u8 *arpptr, *sha;
131 __be32 sip, tip;
132
133 BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
134
135 if ((dev->flags & IFF_NOARP) ||
136 !pskb_may_pull(skb, arp_hdr_len(dev)))
137 return;
138
139 parp = arp_hdr(skb);
140
141 if (parp->ar_pro != htons(ETH_P_IP) ||
142 parp->ar_hln != dev->addr_len ||
143 parp->ar_pln != 4)
144 return;
145
146 arpptr = (u8 *)parp + sizeof(struct arphdr);
147 sha = arpptr;
148 arpptr += dev->addr_len; /* sha */
149 memcpy(&sip, arpptr, sizeof(sip));
150 arpptr += sizeof(sip);
151 arpptr += dev->addr_len; /* tha */
152 memcpy(&tip, arpptr, sizeof(tip));
153
154 if (ipv4_is_loopback(tip) ||
155 ipv4_is_multicast(tip))
156 return;
157
158 if (br->neigh_suppress_enabled) {
159 if (p && (p->flags & BR_NEIGH_SUPPRESS))
160 return;
161 if (ipv4_is_zeronet(sip) || sip == tip) {
162 /* prevent flooding to neigh suppress ports */
163 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
164 return;
165 }
166 }
167
168 if (parp->ar_op != htons(ARPOP_REQUEST))
169 return;
170
171 if (vid != 0) {
172 vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
173 vid);
174 if (!vlandev)
175 return;
176 }
177
178 if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) {
179 /* its our local ip, so don't proxy reply
180 * and don't forward to neigh suppress ports
181 */
182 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
183 return;
184 }
185
186 n = neigh_lookup(&arp_tbl, &tip, vlandev);
187 if (n) {
188 struct net_bridge_fdb_entry *f;
189
190 if (!(n->nud_state & NUD_VALID)) {
191 neigh_release(n);
192 return;
193 }
194
195 f = br_fdb_find_rcu(br, n->ha, vid);
196 if (f) {
197 bool replied = false;
198
199 if ((p && (p->flags & BR_PROXYARP)) ||
200 (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
201 BR_NEIGH_SUPPRESS)))) {
202 if (!vid)
203 br_arp_send(br, p, skb->dev, sip, tip,
204 sha, n->ha, sha, 0, 0);
205 else
206 br_arp_send(br, p, skb->dev, sip, tip,
207 sha, n->ha, sha,
208 skb->vlan_proto,
209 skb_vlan_tag_get(skb));
210 replied = true;
211 }
212
213 /* If we have replied or as long as we know the
214 * mac, indicate to arp replied
215 */
216 if (replied || br->neigh_suppress_enabled)
217 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
218 }
219
220 neigh_release(n);
221 }
222}
223#endif
224
225#if IS_ENABLED(CONFIG_IPV6)
226struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg)
227{
228 struct nd_msg *m;
229
230 m = skb_header_pointer(skb, skb_network_offset(skb) +
231 sizeof(struct ipv6hdr), sizeof(*msg), msg);
232 if (!m)
233 return NULL;
234
235 if (m->icmph.icmp6_code != 0 ||
236 (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION &&
237 m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT))
238 return NULL;
239
240 return m;
241}
242
243static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
244 struct sk_buff *request, struct neighbour *n,
245 __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns)
246{
247 struct net_device *dev = request->dev;
248 struct net_bridge_vlan_group *vg;
249 struct sk_buff *reply;
250 struct nd_msg *na;
251 struct ipv6hdr *pip6;
252 int na_olen = 8; /* opt hdr + ETH_ALEN for target */
253 int ns_olen;
254 int i, len;
255 u8 *daddr;
256 u16 pvid;
257
258 if (!dev)
259 return;
260
261 len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
262 sizeof(*na) + na_olen + dev->needed_tailroom;
263
264 reply = alloc_skb(len, GFP_ATOMIC);
265 if (!reply)
266 return;
267
268 reply->protocol = htons(ETH_P_IPV6);
269 reply->dev = dev;
270 skb_reserve(reply, LL_RESERVED_SPACE(dev));
271 skb_push(reply, sizeof(struct ethhdr));
272 skb_set_mac_header(reply, 0);
273
274 daddr = eth_hdr(request)->h_source;
275
276 /* Do we need option processing ? */
277 ns_olen = request->len - (skb_network_offset(request) +
278 sizeof(struct ipv6hdr)) - sizeof(*ns);
279 for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) {
280 if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
281 daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
282 break;
283 }
284 }
285
286 /* Ethernet header */
287 ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
288 ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
289 eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
290 reply->protocol = htons(ETH_P_IPV6);
291
292 skb_pull(reply, sizeof(struct ethhdr));
293 skb_set_network_header(reply, 0);
294 skb_put(reply, sizeof(struct ipv6hdr));
295
296 /* IPv6 header */
297 pip6 = ipv6_hdr(reply);
298 memset(pip6, 0, sizeof(struct ipv6hdr));
299 pip6->version = 6;
300 pip6->priority = ipv6_hdr(request)->priority;
301 pip6->nexthdr = IPPROTO_ICMPV6;
302 pip6->hop_limit = 255;
303 pip6->daddr = ipv6_hdr(request)->saddr;
304 pip6->saddr = *(struct in6_addr *)n->primary_key;
305
306 skb_pull(reply, sizeof(struct ipv6hdr));
307 skb_set_transport_header(reply, 0);
308
309 na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
310
311 /* Neighbor Advertisement */
312 memset(na, 0, sizeof(*na) + na_olen);
313 na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
314 na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */
315 na->icmph.icmp6_override = 1;
316 na->icmph.icmp6_solicited = 1;
317 na->target = ns->target;
318 ether_addr_copy(&na->opt[2], n->ha);
319 na->opt[0] = ND_OPT_TARGET_LL_ADDR;
320 na->opt[1] = na_olen >> 3;
321
322 na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
323 &pip6->daddr,
324 sizeof(*na) + na_olen,
325 IPPROTO_ICMPV6,
326 csum_partial(na, sizeof(*na) + na_olen, 0));
327
328 pip6->payload_len = htons(sizeof(*na) + na_olen);
329
330 skb_push(reply, sizeof(struct ipv6hdr));
331 skb_push(reply, sizeof(struct ethhdr));
332
333 reply->ip_summed = CHECKSUM_UNNECESSARY;
334
335 if (p)
336 vg = nbp_vlan_group_rcu(p);
337 else
338 vg = br_vlan_group_rcu(br);
339 pvid = br_get_pvid(vg);
340 if (pvid == (vlan_tci & VLAN_VID_MASK))
341 vlan_tci = 0;
342
343 if (vlan_tci)
344 __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci);
345
346 netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n",
347 dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha);
348
349 if (p) {
350 dev_queue_xmit(reply);
351 } else {
352 skb_reset_mac_header(reply);
353 __skb_pull(reply, skb_network_offset(reply));
354 reply->ip_summed = CHECKSUM_UNNECESSARY;
355 reply->pkt_type = PACKET_HOST;
356
357 netif_rx_ni(reply);
358 }
359}
360
361static int br_chk_addr_ip6(struct net_device *dev, void *data)
362{
363 struct in6_addr *addr = (struct in6_addr *)data;
364
365 if (ipv6_chk_addr(dev_net(dev), addr, dev, 0))
366 return 1;
367
368 return 0;
369}
370
371static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr)
372
373{
374 if (br_chk_addr_ip6(dev, addr))
375 return true;
376
377 /* check if ip is configured on upper dev */
378 if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr))
379 return true;
380
381 return false;
382}
383
384void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
385 u16 vid, struct net_bridge_port *p, struct nd_msg *msg)
386{
387 struct net_device *dev = br->dev;
388 struct net_device *vlandev = NULL;
389 struct in6_addr *saddr, *daddr;
390 struct ipv6hdr *iphdr;
391 struct neighbour *n;
392
393 BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
394
395 if (p && (p->flags & BR_NEIGH_SUPPRESS))
396 return;
397
398 if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
399 !msg->icmph.icmp6_solicited) {
400 /* prevent flooding to neigh suppress ports */
401 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
402 return;
403 }
404
405 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
406 return;
407
408 iphdr = ipv6_hdr(skb);
409 saddr = &iphdr->saddr;
410 daddr = &iphdr->daddr;
411
412 if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) {
413 /* prevent flooding to neigh suppress ports */
414 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
415 return;
416 }
417
418 if (vid != 0) {
419 /* build neigh table lookup on the vlan device */
420 vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
421 vid);
422 if (!vlandev)
423 return;
424 } else {
425 vlandev = dev;
426 }
427
428 if (br_is_local_ip6(vlandev, &msg->target)) {
429 /* its our own ip, so don't proxy reply
430 * and don't forward to arp suppress ports
431 */
432 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
433 return;
434 }
435
436 n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev);
437 if (n) {
438 struct net_bridge_fdb_entry *f;
439
440 if (!(n->nud_state & NUD_VALID)) {
441 neigh_release(n);
442 return;
443 }
444
445 f = br_fdb_find_rcu(br, n->ha, vid);
446 if (f) {
447 bool replied = false;
448
449 if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
450 if (vid != 0)
451 br_nd_send(br, p, skb, n,
452 skb->vlan_proto,
453 skb_vlan_tag_get(skb), msg);
454 else
455 br_nd_send(br, p, skb, n, 0, 0, msg);
456 replied = true;
457 }
458
459 /* If we have replied or as long as we know the
460 * mac, indicate to NEIGH_SUPPRESS ports that we
461 * have replied
462 */
463 if (replied || br->neigh_suppress_enabled)
464 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
465 }
466 neigh_release(n);
467 }
468}
469#endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f6b6a92f1c48..af5b8c87f590 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
39 struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); 39 struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
40 const struct nf_br_ops *nf_ops; 40 const struct nf_br_ops *nf_ops;
41 const unsigned char *dest; 41 const unsigned char *dest;
42 struct ethhdr *eth;
42 u16 vid = 0; 43 u16 vid = 0;
43 44
44 rcu_read_lock(); 45 rcu_read_lock();
@@ -57,11 +58,30 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
57 BR_INPUT_SKB_CB(skb)->brdev = dev; 58 BR_INPUT_SKB_CB(skb)->brdev = dev;
58 59
59 skb_reset_mac_header(skb); 60 skb_reset_mac_header(skb);
61 eth = eth_hdr(skb);
60 skb_pull(skb, ETH_HLEN); 62 skb_pull(skb, ETH_HLEN);
61 63
62 if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) 64 if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
63 goto out; 65 goto out;
64 66
67 if (IS_ENABLED(CONFIG_INET) &&
68 (eth->h_proto == htons(ETH_P_ARP) ||
69 eth->h_proto == htons(ETH_P_RARP)) &&
70 br->neigh_suppress_enabled) {
71 br_do_proxy_suppress_arp(skb, br, vid, NULL);
72 } else if (IS_ENABLED(CONFIG_IPV6) &&
73 skb->protocol == htons(ETH_P_IPV6) &&
74 br->neigh_suppress_enabled &&
75 pskb_may_pull(skb, sizeof(struct ipv6hdr) +
76 sizeof(struct nd_msg)) &&
77 ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
78 struct nd_msg *msg, _msg;
79
80 msg = br_is_nd_neigh_msg(skb, &_msg);
81 if (msg)
82 br_do_suppress_nd(skb, br, vid, NULL, msg);
83 }
84
65 dest = eth_hdr(skb)->h_dest; 85 dest = eth_hdr(skb)->h_dest;
66 if (is_broadcast_ether_addr(dest)) { 86 if (is_broadcast_ether_addr(dest)) {
67 br_flood(br, skb, BR_PKT_BROADCAST, false, true); 87 br_flood(br, skb, BR_PKT_BROADCAST, false, true);
@@ -320,12 +340,13 @@ void br_netpoll_disable(struct net_bridge_port *p)
320 340
321#endif 341#endif
322 342
323static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) 343static int br_add_slave(struct net_device *dev, struct net_device *slave_dev,
344 struct netlink_ext_ack *extack)
324 345
325{ 346{
326 struct net_bridge *br = netdev_priv(dev); 347 struct net_bridge *br = netdev_priv(dev);
327 348
328 return br_add_if(br, slave_dev); 349 return br_add_if(br, slave_dev, extack);
329} 350}
330 351
331static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) 352static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
@@ -400,7 +421,7 @@ void br_dev_setup(struct net_device *dev)
400 br->bridge_id.prio[0] = 0x80; 421 br->bridge_id.prio[0] = 0x80;
401 br->bridge_id.prio[1] = 0x00; 422 br->bridge_id.prio[1] = 0x00;
402 423
403 ether_addr_copy(br->group_addr, eth_reserved_addr_base); 424 ether_addr_copy(br->group_addr, eth_stp_addr);
404 425
405 br->stp_enabled = BR_NO_STP; 426 br->stp_enabled = BR_NO_STP;
406 br->group_fwd_mask = BR_GROUPFWD_DEFAULT; 427 br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 48fb17417fac..b4eed113d2ec 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -204,7 +204,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
204 /* Do not flood to ports that enable proxy ARP */ 204 /* Do not flood to ports that enable proxy ARP */
205 if (p->flags & BR_PROXYARP) 205 if (p->flags & BR_PROXYARP)
206 continue; 206 continue;
207 if ((p->flags & BR_PROXYARP_WIFI) && 207 if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
208 BR_INPUT_SKB_CB(skb)->proxyarp_replied) 208 BR_INPUT_SKB_CB(skb)->proxyarp_replied)
209 continue; 209 continue;
210 210
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3aef22931ab..9ba4ed65c52b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -271,7 +271,7 @@ static void del_nbp(struct net_bridge_port *p)
271 br_stp_disable_port(p); 271 br_stp_disable_port(p);
272 spin_unlock_bh(&br->lock); 272 spin_unlock_bh(&br->lock);
273 273
274 br_ifinfo_notify(RTM_DELLINK, p); 274 br_ifinfo_notify(RTM_DELLINK, NULL, p);
275 275
276 list_del_rcu(&p->list); 276 list_del_rcu(&p->list);
277 if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom) 277 if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
@@ -310,6 +310,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
310 del_nbp(p); 310 del_nbp(p);
311 } 311 }
312 312
313 br_recalculate_neigh_suppress_enabled(br);
314
313 br_fdb_delete_by_port(br, NULL, 0, 1); 315 br_fdb_delete_by_port(br, NULL, 0, 1);
314 316
315 cancel_delayed_work_sync(&br->gc_work); 317 cancel_delayed_work_sync(&br->gc_work);
@@ -480,7 +482,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
480} 482}
481 483
482/* called with RTNL */ 484/* called with RTNL */
483int br_add_if(struct net_bridge *br, struct net_device *dev) 485int br_add_if(struct net_bridge *br, struct net_device *dev,
486 struct netlink_ext_ack *extack)
484{ 487{
485 struct net_bridge_port *p; 488 struct net_bridge_port *p;
486 int err = 0; 489 int err = 0;
@@ -500,16 +503,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
500 return -EINVAL; 503 return -EINVAL;
501 504
502 /* No bridging of bridges */ 505 /* No bridging of bridges */
503 if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) 506 if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
507 NL_SET_ERR_MSG(extack,
508 "Can not enslave a bridge to a bridge");
504 return -ELOOP; 509 return -ELOOP;
510 }
505 511
506 /* Device is already being bridged */ 512 /* Device is already being bridged */
507 if (br_port_exists(dev)) 513 if (br_port_exists(dev))
508 return -EBUSY; 514 return -EBUSY;
509 515
510 /* No bridging devices that dislike that (e.g. wireless) */ 516 /* No bridging devices that dislike that (e.g. wireless) */
511 if (dev->priv_flags & IFF_DONT_BRIDGE) 517 if (dev->priv_flags & IFF_DONT_BRIDGE) {
518 NL_SET_ERR_MSG(extack,
519 "Device does not allow enslaving to a bridge");
512 return -EOPNOTSUPP; 520 return -EOPNOTSUPP;
521 }
513 522
514 p = new_nbp(br, dev); 523 p = new_nbp(br, dev);
515 if (IS_ERR(p)) 524 if (IS_ERR(p))
@@ -540,7 +549,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
540 549
541 dev->priv_flags |= IFF_BRIDGE_PORT; 550 dev->priv_flags |= IFF_BRIDGE_PORT;
542 551
543 err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); 552 err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
544 if (err) 553 if (err)
545 goto err5; 554 goto err5;
546 555
@@ -580,7 +589,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
580 br_stp_enable_port(p); 589 br_stp_enable_port(p);
581 spin_unlock_bh(&br->lock); 590 spin_unlock_bh(&br->lock);
582 591
583 br_ifinfo_notify(RTM_NEWLINK, p); 592 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
584 593
585 if (changed_addr) 594 if (changed_addr)
586 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); 595 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
@@ -653,4 +662,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
653 662
654 if (mask & BR_AUTO_MASK) 663 if (mask & BR_AUTO_MASK)
655 nbp_update_port_count(br); 664 nbp_update_port_count(br);
665
666 if (mask & BR_NEIGH_SUPPRESS)
667 br_recalculate_neigh_suppress_enabled(br);
656} 668}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7637f58c1226..7f98a7d25866 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -71,62 +71,6 @@ static int br_pass_frame_up(struct sk_buff *skb)
71 br_netif_receive_skb); 71 br_netif_receive_skb);
72} 72}
73 73
74static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
75 u16 vid, struct net_bridge_port *p)
76{
77 struct net_device *dev = br->dev;
78 struct neighbour *n;
79 struct arphdr *parp;
80 u8 *arpptr, *sha;
81 __be32 sip, tip;
82
83 BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
84
85 if ((dev->flags & IFF_NOARP) ||
86 !pskb_may_pull(skb, arp_hdr_len(dev)))
87 return;
88
89 parp = arp_hdr(skb);
90
91 if (parp->ar_pro != htons(ETH_P_IP) ||
92 parp->ar_op != htons(ARPOP_REQUEST) ||
93 parp->ar_hln != dev->addr_len ||
94 parp->ar_pln != 4)
95 return;
96
97 arpptr = (u8 *)parp + sizeof(struct arphdr);
98 sha = arpptr;
99 arpptr += dev->addr_len; /* sha */
100 memcpy(&sip, arpptr, sizeof(sip));
101 arpptr += sizeof(sip);
102 arpptr += dev->addr_len; /* tha */
103 memcpy(&tip, arpptr, sizeof(tip));
104
105 if (ipv4_is_loopback(tip) ||
106 ipv4_is_multicast(tip))
107 return;
108
109 n = neigh_lookup(&arp_tbl, &tip, dev);
110 if (n) {
111 struct net_bridge_fdb_entry *f;
112
113 if (!(n->nud_state & NUD_VALID)) {
114 neigh_release(n);
115 return;
116 }
117
118 f = br_fdb_find_rcu(br, n->ha, vid);
119 if (f && ((p->flags & BR_PROXYARP) ||
120 (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
121 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
122 sha, n->ha, sha);
123 BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
124 }
125
126 neigh_release(n);
127 }
128}
129
130/* note: already called with rcu_read_lock */ 74/* note: already called with rcu_read_lock */
131int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 75int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
132{ 76{
@@ -171,15 +115,29 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
171 115
172 BR_INPUT_SKB_CB(skb)->brdev = br->dev; 116 BR_INPUT_SKB_CB(skb)->brdev = br->dev;
173 117
174 if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) 118 if (IS_ENABLED(CONFIG_INET) &&
175 br_do_proxy_arp(skb, br, vid, p); 119 (skb->protocol == htons(ETH_P_ARP) ||
120 skb->protocol == htons(ETH_P_RARP))) {
121 br_do_proxy_suppress_arp(skb, br, vid, p);
122 } else if (IS_ENABLED(CONFIG_IPV6) &&
123 skb->protocol == htons(ETH_P_IPV6) &&
124 br->neigh_suppress_enabled &&
125 pskb_may_pull(skb, sizeof(struct ipv6hdr) +
126 sizeof(struct nd_msg)) &&
127 ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
128 struct nd_msg *msg, _msg;
129
130 msg = br_is_nd_neigh_msg(skb, &_msg);
131 if (msg)
132 br_do_suppress_nd(skb, br, vid, p, msg);
133 }
176 134
177 switch (pkt_type) { 135 switch (pkt_type) {
178 case BR_PKT_MULTICAST: 136 case BR_PKT_MULTICAST:
179 mdst = br_mdb_get(br, skb, vid); 137 mdst = br_mdb_get(br, skb, vid);
180 if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && 138 if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
181 br_multicast_querier_exists(br, eth_hdr(skb))) { 139 br_multicast_querier_exists(br, eth_hdr(skb))) {
182 if ((mdst && mdst->mglist) || 140 if ((mdst && mdst->host_joined) ||
183 br_multicast_is_router(br)) { 141 br_multicast_is_router(br)) {
184 local_rcv = true; 142 local_rcv = true;
185 br->dev->stats.multicast++; 143 br->dev->stats.multicast++;
@@ -289,6 +247,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
289 * 247 *
290 * Others reserved for future standardization 248 * Others reserved for future standardization
291 */ 249 */
250 fwd_mask |= p->group_fwd_mask;
292 switch (dest[5]) { 251 switch (dest[5]) {
293 case 0x00: /* Bridge Group Address */ 252 case 0x00: /* Bridge Group Address */
294 /* If STP is turned off, 253 /* If STP is turned off,
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 7970f8540cbb..73b957fd639d 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -98,7 +98,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
98 return -EINVAL; 98 return -EINVAL;
99 99
100 if (isadd) 100 if (isadd)
101 ret = br_add_if(br, dev); 101 ret = br_add_if(br, dev, NULL);
102 else 102 else
103 ret = br_del_if(br, dev); 103 ret = br_del_if(br, dev);
104 104
@@ -293,7 +293,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
293 293
294 if (!ret) { 294 if (!ret) {
295 if (p) 295 if (p)
296 br_ifinfo_notify(RTM_NEWLINK, p); 296 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
297 else 297 else
298 netdev_state_change(br->dev); 298 netdev_state_change(br->dev);
299 } 299 }
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 31ddff22563e..b0f4c734900b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -292,6 +292,46 @@ err:
292 kfree(priv); 292 kfree(priv);
293} 293}
294 294
295static void br_mdb_switchdev_host_port(struct net_device *dev,
296 struct net_device *lower_dev,
297 struct br_mdb_entry *entry, int type)
298{
299 struct switchdev_obj_port_mdb mdb = {
300 .obj = {
301 .id = SWITCHDEV_OBJ_ID_HOST_MDB,
302 .flags = SWITCHDEV_F_DEFER,
303 },
304 .vid = entry->vid,
305 };
306
307 if (entry->addr.proto == htons(ETH_P_IP))
308 ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
309#if IS_ENABLED(CONFIG_IPV6)
310 else
311 ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
312#endif
313
314 mdb.obj.orig_dev = dev;
315 switch (type) {
316 case RTM_NEWMDB:
317 switchdev_port_obj_add(lower_dev, &mdb.obj);
318 break;
319 case RTM_DELMDB:
320 switchdev_port_obj_del(lower_dev, &mdb.obj);
321 break;
322 }
323}
324
325static void br_mdb_switchdev_host(struct net_device *dev,
326 struct br_mdb_entry *entry, int type)
327{
328 struct net_device *lower_dev;
329 struct list_head *iter;
330
331 netdev_for_each_lower_dev(dev, lower_dev, iter)
332 br_mdb_switchdev_host_port(dev, lower_dev, entry, type);
333}
334
295static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, 335static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
296 struct br_mdb_entry *entry, int type) 336 struct br_mdb_entry *entry, int type)
297{ 337{
@@ -317,7 +357,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
317#endif 357#endif
318 358
319 mdb.obj.orig_dev = port_dev; 359 mdb.obj.orig_dev = port_dev;
320 if (port_dev && type == RTM_NEWMDB) { 360 if (p && port_dev && type == RTM_NEWMDB) {
321 complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); 361 complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
322 if (complete_info) { 362 if (complete_info) {
323 complete_info->port = p; 363 complete_info->port = p;
@@ -327,10 +367,13 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
327 if (switchdev_port_obj_add(port_dev, &mdb.obj)) 367 if (switchdev_port_obj_add(port_dev, &mdb.obj))
328 kfree(complete_info); 368 kfree(complete_info);
329 } 369 }
330 } else if (port_dev && type == RTM_DELMDB) { 370 } else if (p && port_dev && type == RTM_DELMDB) {
331 switchdev_port_obj_del(port_dev, &mdb.obj); 371 switchdev_port_obj_del(port_dev, &mdb.obj);
332 } 372 }
333 373
374 if (!p)
375 br_mdb_switchdev_host(dev, entry, type);
376
334 skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); 377 skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
335 if (!skb) 378 if (!skb)
336 goto errout; 379 goto errout;
@@ -353,7 +396,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
353 struct br_mdb_entry entry; 396 struct br_mdb_entry entry;
354 397
355 memset(&entry, 0, sizeof(entry)); 398 memset(&entry, 0, sizeof(entry));
356 entry.ifindex = port->dev->ifindex; 399 if (port)
400 entry.ifindex = port->dev->ifindex;
401 else
402 entry.ifindex = dev->ifindex;
357 entry.addr.proto = group->proto; 403 entry.addr.proto = group->proto;
358 entry.addr.u.ip4 = group->u.ip4; 404 entry.addr.u.ip4 = group->u.ip4;
359#if IS_ENABLED(CONFIG_IPV6) 405#if IS_ENABLED(CONFIG_IPV6)
@@ -655,7 +701,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
655 call_rcu_bh(&p->rcu, br_multicast_free_pg); 701 call_rcu_bh(&p->rcu, br_multicast_free_pg);
656 err = 0; 702 err = 0;
657 703
658 if (!mp->ports && !mp->mglist && 704 if (!mp->ports && !mp->host_joined &&
659 netif_running(br->dev)) 705 netif_running(br->dev))
660 mod_timer(&mp->timer, jiffies); 706 mod_timer(&mp->timer, jiffies);
661 break; 707 break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8dc5c8d69bcd..cb4729539b82 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -239,9 +239,9 @@ static void br_multicast_free_group(struct rcu_head *head)
239 kfree(mp); 239 kfree(mp);
240} 240}
241 241
242static void br_multicast_group_expired(unsigned long data) 242static void br_multicast_group_expired(struct timer_list *t)
243{ 243{
244 struct net_bridge_mdb_entry *mp = (void *)data; 244 struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
245 struct net_bridge *br = mp->br; 245 struct net_bridge *br = mp->br;
246 struct net_bridge_mdb_htable *mdb; 246 struct net_bridge_mdb_htable *mdb;
247 247
@@ -249,7 +249,8 @@ static void br_multicast_group_expired(unsigned long data)
249 if (!netif_running(br->dev) || timer_pending(&mp->timer)) 249 if (!netif_running(br->dev) || timer_pending(&mp->timer))
250 goto out; 250 goto out;
251 251
252 mp->mglist = false; 252 mp->host_joined = false;
253 br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
253 254
254 if (mp->ports) 255 if (mp->ports)
255 goto out; 256 goto out;
@@ -292,7 +293,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
292 p->flags); 293 p->flags);
293 call_rcu_bh(&p->rcu, br_multicast_free_pg); 294 call_rcu_bh(&p->rcu, br_multicast_free_pg);
294 295
295 if (!mp->ports && !mp->mglist && 296 if (!mp->ports && !mp->host_joined &&
296 netif_running(br->dev)) 297 netif_running(br->dev))
297 mod_timer(&mp->timer, jiffies); 298 mod_timer(&mp->timer, jiffies);
298 299
@@ -302,9 +303,9 @@ static void br_multicast_del_pg(struct net_bridge *br,
302 WARN_ON(1); 303 WARN_ON(1);
303} 304}
304 305
305static void br_multicast_port_group_expired(unsigned long data) 306static void br_multicast_port_group_expired(struct timer_list *t)
306{ 307{
307 struct net_bridge_port_group *pg = (void *)data; 308 struct net_bridge_port_group *pg = from_timer(pg, t, timer);
308 struct net_bridge *br = pg->port->br; 309 struct net_bridge *br = pg->port->br;
309 310
310 spin_lock(&br->multicast_lock); 311 spin_lock(&br->multicast_lock);
@@ -701,8 +702,7 @@ rehash:
701 702
702 mp->br = br; 703 mp->br = br;
703 mp->addr = *group; 704 mp->addr = *group;
704 setup_timer(&mp->timer, br_multicast_group_expired, 705 timer_setup(&mp->timer, br_multicast_group_expired, 0);
705 (unsigned long)mp);
706 706
707 hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); 707 hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
708 mdb->size++; 708 mdb->size++;
@@ -729,8 +729,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
729 p->flags = flags; 729 p->flags = flags;
730 rcu_assign_pointer(p->next, next); 730 rcu_assign_pointer(p->next, next);
731 hlist_add_head(&p->mglist, &port->mglist); 731 hlist_add_head(&p->mglist, &port->mglist);
732 setup_timer(&p->timer, br_multicast_port_group_expired, 732 timer_setup(&p->timer, br_multicast_port_group_expired, 0);
733 (unsigned long)p);
734 733
735 if (src) 734 if (src)
736 memcpy(p->eth_addr, src, ETH_ALEN); 735 memcpy(p->eth_addr, src, ETH_ALEN);
@@ -775,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br,
775 goto err; 774 goto err;
776 775
777 if (!port) { 776 if (!port) {
778 mp->mglist = true; 777 if (!mp->host_joined) {
778 mp->host_joined = true;
779 br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
780 }
779 mod_timer(&mp->timer, now + br->multicast_membership_interval); 781 mod_timer(&mp->timer, now + br->multicast_membership_interval);
780 goto out; 782 goto out;
781 } 783 }
@@ -843,9 +845,10 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
843} 845}
844#endif 846#endif
845 847
846static void br_multicast_router_expired(unsigned long data) 848static void br_multicast_router_expired(struct timer_list *t)
847{ 849{
848 struct net_bridge_port *port = (void *)data; 850 struct net_bridge_port *port =
851 from_timer(port, t, multicast_router_timer);
849 struct net_bridge *br = port->br; 852 struct net_bridge *br = port->br;
850 853
851 spin_lock(&br->multicast_lock); 854 spin_lock(&br->multicast_lock);
@@ -859,8 +862,32 @@ out:
859 spin_unlock(&br->multicast_lock); 862 spin_unlock(&br->multicast_lock);
860} 863}
861 864
862static void br_multicast_local_router_expired(unsigned long data) 865static void br_mc_router_state_change(struct net_bridge *p,
866 bool is_mc_router)
867{
868 struct switchdev_attr attr = {
869 .orig_dev = p->dev,
870 .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
871 .flags = SWITCHDEV_F_DEFER,
872 .u.mrouter = is_mc_router,
873 };
874
875 switchdev_port_attr_set(p->dev, &attr);
876}
877
878static void br_multicast_local_router_expired(struct timer_list *t)
863{ 879{
880 struct net_bridge *br = from_timer(br, t, multicast_router_timer);
881
882 spin_lock(&br->multicast_lock);
883 if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
884 br->multicast_router == MDB_RTR_TYPE_PERM ||
885 timer_pending(&br->multicast_router_timer))
886 goto out;
887
888 br_mc_router_state_change(br, false);
889out:
890 spin_unlock(&br->multicast_lock);
864} 891}
865 892
866static void br_multicast_querier_expired(struct net_bridge *br, 893static void br_multicast_querier_expired(struct net_bridge *br,
@@ -876,17 +903,17 @@ out:
876 spin_unlock(&br->multicast_lock); 903 spin_unlock(&br->multicast_lock);
877} 904}
878 905
879static void br_ip4_multicast_querier_expired(unsigned long data) 906static void br_ip4_multicast_querier_expired(struct timer_list *t)
880{ 907{
881 struct net_bridge *br = (void *)data; 908 struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
882 909
883 br_multicast_querier_expired(br, &br->ip4_own_query); 910 br_multicast_querier_expired(br, &br->ip4_own_query);
884} 911}
885 912
886#if IS_ENABLED(CONFIG_IPV6) 913#if IS_ENABLED(CONFIG_IPV6)
887static void br_ip6_multicast_querier_expired(unsigned long data) 914static void br_ip6_multicast_querier_expired(struct timer_list *t)
888{ 915{
889 struct net_bridge *br = (void *)data; 916 struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
890 917
891 br_multicast_querier_expired(br, &br->ip6_own_query); 918 br_multicast_querier_expired(br, &br->ip6_own_query);
892} 919}
@@ -987,17 +1014,17 @@ out:
987 spin_unlock(&br->multicast_lock); 1014 spin_unlock(&br->multicast_lock);
988} 1015}
989 1016
990static void br_ip4_multicast_port_query_expired(unsigned long data) 1017static void br_ip4_multicast_port_query_expired(struct timer_list *t)
991{ 1018{
992 struct net_bridge_port *port = (void *)data; 1019 struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
993 1020
994 br_multicast_port_query_expired(port, &port->ip4_own_query); 1021 br_multicast_port_query_expired(port, &port->ip4_own_query);
995} 1022}
996 1023
997#if IS_ENABLED(CONFIG_IPV6) 1024#if IS_ENABLED(CONFIG_IPV6)
998static void br_ip6_multicast_port_query_expired(unsigned long data) 1025static void br_ip6_multicast_port_query_expired(struct timer_list *t)
999{ 1026{
1000 struct net_bridge_port *port = (void *)data; 1027 struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
1001 1028
1002 br_multicast_port_query_expired(port, &port->ip6_own_query); 1029 br_multicast_port_query_expired(port, &port->ip6_own_query);
1003} 1030}
@@ -1019,13 +1046,13 @@ int br_multicast_add_port(struct net_bridge_port *port)
1019{ 1046{
1020 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; 1047 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
1021 1048
1022 setup_timer(&port->multicast_router_timer, br_multicast_router_expired, 1049 timer_setup(&port->multicast_router_timer,
1023 (unsigned long)port); 1050 br_multicast_router_expired, 0);
1024 setup_timer(&port->ip4_own_query.timer, 1051 timer_setup(&port->ip4_own_query.timer,
1025 br_ip4_multicast_port_query_expired, (unsigned long)port); 1052 br_ip4_multicast_port_query_expired, 0);
1026#if IS_ENABLED(CONFIG_IPV6) 1053#if IS_ENABLED(CONFIG_IPV6)
1027 setup_timer(&port->ip6_own_query.timer, 1054 timer_setup(&port->ip6_own_query.timer,
1028 br_ip6_multicast_port_query_expired, (unsigned long)port); 1055 br_ip6_multicast_port_query_expired, 0);
1029#endif 1056#endif
1030 br_mc_disabled_update(port->dev, port->br->multicast_disabled); 1057 br_mc_disabled_update(port->dev, port->br->multicast_disabled);
1031 1058
@@ -1364,9 +1391,12 @@ static void br_multicast_mark_router(struct net_bridge *br,
1364 unsigned long now = jiffies; 1391 unsigned long now = jiffies;
1365 1392
1366 if (!port) { 1393 if (!port) {
1367 if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) 1394 if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
1395 if (!timer_pending(&br->multicast_router_timer))
1396 br_mc_router_state_change(br, true);
1368 mod_timer(&br->multicast_router_timer, 1397 mod_timer(&br->multicast_router_timer,
1369 now + br->multicast_querier_interval); 1398 now + br->multicast_querier_interval);
1399 }
1370 return; 1400 return;
1371 } 1401 }
1372 1402
@@ -1451,7 +1481,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1451 1481
1452 max_delay *= br->multicast_last_member_count; 1482 max_delay *= br->multicast_last_member_count;
1453 1483
1454 if (mp->mglist && 1484 if (mp->host_joined &&
1455 (timer_pending(&mp->timer) ? 1485 (timer_pending(&mp->timer) ?
1456 time_after(mp->timer.expires, now + max_delay) : 1486 time_after(mp->timer.expires, now + max_delay) :
1457 try_to_del_timer_sync(&mp->timer) >= 0)) 1487 try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1535,7 +1565,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1535 goto out; 1565 goto out;
1536 1566
1537 max_delay *= br->multicast_last_member_count; 1567 max_delay *= br->multicast_last_member_count;
1538 if (mp->mglist && 1568 if (mp->host_joined &&
1539 (timer_pending(&mp->timer) ? 1569 (timer_pending(&mp->timer) ?
1540 time_after(mp->timer.expires, now + max_delay) : 1570 time_after(mp->timer.expires, now + max_delay) :
1541 try_to_del_timer_sync(&mp->timer) >= 0)) 1571 try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1596,7 +1626,7 @@ br_multicast_leave_group(struct net_bridge *br,
1596 br_mdb_notify(br->dev, port, group, RTM_DELMDB, 1626 br_mdb_notify(br->dev, port, group, RTM_DELMDB,
1597 p->flags); 1627 p->flags);
1598 1628
1599 if (!mp->ports && !mp->mglist && 1629 if (!mp->ports && !mp->host_joined &&
1600 netif_running(br->dev)) 1630 netif_running(br->dev))
1601 mod_timer(&mp->timer, jiffies); 1631 mod_timer(&mp->timer, jiffies);
1602 } 1632 }
@@ -1636,7 +1666,7 @@ br_multicast_leave_group(struct net_bridge *br,
1636 br->multicast_last_member_interval; 1666 br->multicast_last_member_interval;
1637 1667
1638 if (!port) { 1668 if (!port) {
1639 if (mp->mglist && 1669 if (mp->host_joined &&
1640 (timer_pending(&mp->timer) ? 1670 (timer_pending(&mp->timer) ?
1641 time_after(mp->timer.expires, time) : 1671 time_after(mp->timer.expires, time) :
1642 try_to_del_timer_sync(&mp->timer) >= 0)) { 1672 try_to_del_timer_sync(&mp->timer) >= 0)) {
@@ -1906,17 +1936,17 @@ static void br_multicast_query_expired(struct net_bridge *br,
1906 spin_unlock(&br->multicast_lock); 1936 spin_unlock(&br->multicast_lock);
1907} 1937}
1908 1938
1909static void br_ip4_multicast_query_expired(unsigned long data) 1939static void br_ip4_multicast_query_expired(struct timer_list *t)
1910{ 1940{
1911 struct net_bridge *br = (void *)data; 1941 struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
1912 1942
1913 br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); 1943 br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
1914} 1944}
1915 1945
1916#if IS_ENABLED(CONFIG_IPV6) 1946#if IS_ENABLED(CONFIG_IPV6)
1917static void br_ip6_multicast_query_expired(unsigned long data) 1947static void br_ip6_multicast_query_expired(struct timer_list *t)
1918{ 1948{
1919 struct net_bridge *br = (void *)data; 1949 struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
1920 1950
1921 br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); 1951 br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
1922} 1952}
@@ -1951,17 +1981,17 @@ void br_multicast_init(struct net_bridge *br)
1951 br->has_ipv6_addr = 1; 1981 br->has_ipv6_addr = 1;
1952 1982
1953 spin_lock_init(&br->multicast_lock); 1983 spin_lock_init(&br->multicast_lock);
1954 setup_timer(&br->multicast_router_timer, 1984 timer_setup(&br->multicast_router_timer,
1955 br_multicast_local_router_expired, 0); 1985 br_multicast_local_router_expired, 0);
1956 setup_timer(&br->ip4_other_query.timer, 1986 timer_setup(&br->ip4_other_query.timer,
1957 br_ip4_multicast_querier_expired, (unsigned long)br); 1987 br_ip4_multicast_querier_expired, 0);
1958 setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired, 1988 timer_setup(&br->ip4_own_query.timer,
1959 (unsigned long)br); 1989 br_ip4_multicast_query_expired, 0);
1960#if IS_ENABLED(CONFIG_IPV6) 1990#if IS_ENABLED(CONFIG_IPV6)
1961 setup_timer(&br->ip6_other_query.timer, 1991 timer_setup(&br->ip6_other_query.timer,
1962 br_ip6_multicast_querier_expired, (unsigned long)br); 1992 br_ip6_multicast_querier_expired, 0);
1963 setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired, 1993 timer_setup(&br->ip6_own_query.timer,
1964 (unsigned long)br); 1994 br_ip6_multicast_query_expired, 0);
1965#endif 1995#endif
1966} 1996}
1967 1997
@@ -2042,9 +2072,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
2042 switch (val) { 2072 switch (val) {
2043 case MDB_RTR_TYPE_DISABLED: 2073 case MDB_RTR_TYPE_DISABLED:
2044 case MDB_RTR_TYPE_PERM: 2074 case MDB_RTR_TYPE_PERM:
2075 br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
2045 del_timer(&br->multicast_router_timer); 2076 del_timer(&br->multicast_router_timer);
2046 /* fall through */ 2077 br->multicast_router = val;
2078 err = 0;
2079 break;
2047 case MDB_RTR_TYPE_TEMP_QUERY: 2080 case MDB_RTR_TYPE_TEMP_QUERY:
2081 if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
2082 br_mc_router_state_change(br, false);
2048 br->multicast_router = val; 2083 br->multicast_router = val;
2049 err = 0; 2084 err = 0;
2050 break; 2085 break;
@@ -2184,6 +2219,18 @@ bool br_multicast_enabled(const struct net_device *dev)
2184} 2219}
2185EXPORT_SYMBOL_GPL(br_multicast_enabled); 2220EXPORT_SYMBOL_GPL(br_multicast_enabled);
2186 2221
2222bool br_multicast_router(const struct net_device *dev)
2223{
2224 struct net_bridge *br = netdev_priv(dev);
2225 bool is_router;
2226
2227 spin_lock_bh(&br->multicast_lock);
2228 is_router = br_multicast_is_router(br);
2229 spin_unlock_bh(&br->multicast_lock);
2230 return is_router;
2231}
2232EXPORT_SYMBOL_GPL(br_multicast_router);
2233
2187int br_multicast_set_querier(struct net_bridge *br, unsigned long val) 2234int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
2188{ 2235{
2189 unsigned long max_delay; 2236 unsigned long max_delay;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index de2152730809..d0ef0a8e8831 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void)
138 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ 138 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
139 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ 139 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
140 + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ 140 + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
141 + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
141 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ 142 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
142 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ 143 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
143 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ 144 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -152,6 +153,7 @@ static inline size_t br_port_info_size(void)
152#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 153#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
153 + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ 154 + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
154#endif 155#endif
156 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
155 + 0; 157 + 0;
156} 158}
157 159
@@ -208,7 +210,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
208 p->topology_change_ack) || 210 p->topology_change_ack) ||
209 nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || 211 nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
210 nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & 212 nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
211 BR_VLAN_TUNNEL))) 213 BR_VLAN_TUNNEL)) ||
214 nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
215 nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
216 !!(p->flags & BR_NEIGH_SUPPRESS)))
212 return -EMSGSIZE; 217 return -EMSGSIZE;
213 218
214 timerval = br_timer_value(&p->message_age_timer); 219 timerval = br_timer_value(&p->message_age_timer);
@@ -356,14 +361,14 @@ nla_put_failure:
356 * Contains port and master info as well as carrier and bridge state. 361 * Contains port and master info as well as carrier and bridge state.
357 */ 362 */
358static int br_fill_ifinfo(struct sk_buff *skb, 363static int br_fill_ifinfo(struct sk_buff *skb,
359 struct net_bridge_port *port, 364 const struct net_bridge_port *port,
360 u32 pid, u32 seq, int event, unsigned int flags, 365 u32 pid, u32 seq, int event, unsigned int flags,
361 u32 filter_mask, const struct net_device *dev) 366 u32 filter_mask, const struct net_device *dev)
362{ 367{
368 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
363 struct net_bridge *br; 369 struct net_bridge *br;
364 struct ifinfomsg *hdr; 370 struct ifinfomsg *hdr;
365 struct nlmsghdr *nlh; 371 struct nlmsghdr *nlh;
366 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
367 372
368 if (port) 373 if (port)
369 br = port->br; 374 br = port->br;
@@ -449,28 +454,36 @@ nla_put_failure:
449 return -EMSGSIZE; 454 return -EMSGSIZE;
450} 455}
451 456
452/* 457/* Notify listeners of a change in bridge or port information */
453 * Notify listeners of a change in port information 458void br_ifinfo_notify(int event, const struct net_bridge *br,
454 */ 459 const struct net_bridge_port *port)
455void br_ifinfo_notify(int event, struct net_bridge_port *port)
456{ 460{
457 struct net *net; 461 u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
462 struct net_device *dev;
458 struct sk_buff *skb; 463 struct sk_buff *skb;
459 int err = -ENOBUFS; 464 int err = -ENOBUFS;
460 u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; 465 struct net *net;
466 u16 port_no = 0;
461 467
462 if (!port) 468 if (WARN_ON(!port && !br))
463 return; 469 return;
464 470
465 net = dev_net(port->dev); 471 if (port) {
466 br_debug(port->br, "port %u(%s) event %d\n", 472 dev = port->dev;
467 (unsigned int)port->port_no, port->dev->name, event); 473 br = port->br;
474 port_no = port->port_no;
475 } else {
476 dev = br->dev;
477 }
478
479 net = dev_net(dev);
480 br_debug(br, "port %u(%s) event %d\n", port_no, dev->name, event);
468 481
469 skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC); 482 skb = nlmsg_new(br_nlmsg_size(dev, filter), GFP_ATOMIC);
470 if (skb == NULL) 483 if (skb == NULL)
471 goto errout; 484 goto errout;
472 485
473 err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev); 486 err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev);
474 if (err < 0) { 487 if (err < 0) {
475 /* -EMSGSIZE implies BUG in br_nlmsg_size() */ 488 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
476 WARN_ON(err == -EMSGSIZE); 489 WARN_ON(err == -EMSGSIZE);
@@ -483,7 +496,6 @@ errout:
483 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 496 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
484} 497}
485 498
486
487/* 499/*
488 * Dump information about all ports, in response to GETLINK 500 * Dump information about all ports, in response to GETLINK
489 */ 501 */
@@ -501,8 +513,9 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
501} 513}
502 514
503static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, 515static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
504 int cmd, struct bridge_vlan_info *vinfo) 516 int cmd, struct bridge_vlan_info *vinfo, bool *changed)
505{ 517{
518 bool curr_change;
506 int err = 0; 519 int err = 0;
507 520
508 switch (cmd) { 521 switch (cmd) {
@@ -511,22 +524,27 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
511 /* if the MASTER flag is set this will act on the global 524 /* if the MASTER flag is set this will act on the global
512 * per-VLAN entry as well 525 * per-VLAN entry as well
513 */ 526 */
514 err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); 527 err = nbp_vlan_add(p, vinfo->vid, vinfo->flags,
515 if (err) 528 &curr_change);
516 break;
517 } else { 529 } else {
518 vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY; 530 vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
519 err = br_vlan_add(br, vinfo->vid, vinfo->flags); 531 err = br_vlan_add(br, vinfo->vid, vinfo->flags,
532 &curr_change);
520 } 533 }
534 if (curr_change)
535 *changed = true;
521 break; 536 break;
522 537
523 case RTM_DELLINK: 538 case RTM_DELLINK:
524 if (p) { 539 if (p) {
525 nbp_vlan_delete(p, vinfo->vid); 540 if (!nbp_vlan_delete(p, vinfo->vid))
526 if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) 541 *changed = true;
527 br_vlan_delete(p->br, vinfo->vid); 542
528 } else { 543 if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) &&
529 br_vlan_delete(br, vinfo->vid); 544 !br_vlan_delete(p->br, vinfo->vid))
545 *changed = true;
546 } else if (!br_vlan_delete(br, vinfo->vid)) {
547 *changed = true;
530 } 548 }
531 break; 549 break;
532 } 550 }
@@ -537,7 +555,8 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
537static int br_process_vlan_info(struct net_bridge *br, 555static int br_process_vlan_info(struct net_bridge *br,
538 struct net_bridge_port *p, int cmd, 556 struct net_bridge_port *p, int cmd,
539 struct bridge_vlan_info *vinfo_curr, 557 struct bridge_vlan_info *vinfo_curr,
540 struct bridge_vlan_info **vinfo_last) 558 struct bridge_vlan_info **vinfo_last,
559 bool *changed)
541{ 560{
542 if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK) 561 if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
543 return -EINVAL; 562 return -EINVAL;
@@ -567,7 +586,7 @@ static int br_process_vlan_info(struct net_bridge *br,
567 sizeof(struct bridge_vlan_info)); 586 sizeof(struct bridge_vlan_info));
568 for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) { 587 for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
569 tmp_vinfo.vid = v; 588 tmp_vinfo.vid = v;
570 err = br_vlan_info(br, p, cmd, &tmp_vinfo); 589 err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed);
571 if (err) 590 if (err)
572 break; 591 break;
573 } 592 }
@@ -576,13 +595,13 @@ static int br_process_vlan_info(struct net_bridge *br,
576 return err; 595 return err;
577 } 596 }
578 597
579 return br_vlan_info(br, p, cmd, vinfo_curr); 598 return br_vlan_info(br, p, cmd, vinfo_curr, changed);
580} 599}
581 600
582static int br_afspec(struct net_bridge *br, 601static int br_afspec(struct net_bridge *br,
583 struct net_bridge_port *p, 602 struct net_bridge_port *p,
584 struct nlattr *af_spec, 603 struct nlattr *af_spec,
585 int cmd) 604 int cmd, bool *changed)
586{ 605{
587 struct bridge_vlan_info *vinfo_curr = NULL; 606 struct bridge_vlan_info *vinfo_curr = NULL;
588 struct bridge_vlan_info *vinfo_last = NULL; 607 struct bridge_vlan_info *vinfo_last = NULL;
@@ -602,7 +621,8 @@ static int br_afspec(struct net_bridge *br,
602 return err; 621 return err;
603 err = br_process_vlan_tunnel_info(br, p, cmd, 622 err = br_process_vlan_tunnel_info(br, p, cmd,
604 &tinfo_curr, 623 &tinfo_curr,
605 &tinfo_last); 624 &tinfo_last,
625 changed);
606 if (err) 626 if (err)
607 return err; 627 return err;
608 break; 628 break;
@@ -611,7 +631,7 @@ static int br_afspec(struct net_bridge *br,
611 return -EINVAL; 631 return -EINVAL;
612 vinfo_curr = nla_data(attr); 632 vinfo_curr = nla_data(attr);
613 err = br_process_vlan_info(br, p, cmd, vinfo_curr, 633 err = br_process_vlan_info(br, p, cmd, vinfo_curr,
614 &vinfo_last); 634 &vinfo_last, changed);
615 if (err) 635 if (err)
616 return err; 636 return err;
617 break; 637 break;
@@ -637,6 +657,9 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
637 [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, 657 [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
638 [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, 658 [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
639 [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, 659 [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
660 [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 },
661 [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
662 [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
640}; 663};
641 664
642/* Change the state of the port and notify spanning tree */ 665/* Change the state of the port and notify spanning tree */
@@ -773,6 +796,20 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
773 return err; 796 return err;
774 } 797 }
775#endif 798#endif
799
800 if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
801 u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]);
802
803 if (fwd_mask & BR_GROUPFWD_MACPAUSE)
804 return -EINVAL;
805 p->group_fwd_mask = fwd_mask;
806 }
807
808 err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS,
809 BR_NEIGH_SUPPRESS);
810 if (err)
811 return err;
812
776 br_port_flags_change(p, old_flags ^ p->flags); 813 br_port_flags_change(p, old_flags ^ p->flags);
777 return 0; 814 return 0;
778} 815}
@@ -780,10 +817,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
780/* Change state and parameters on port. */ 817/* Change state and parameters on port. */
781int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) 818int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
782{ 819{
820 struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
821 struct nlattr *tb[IFLA_BRPORT_MAX + 1];
822 struct net_bridge_port *p;
783 struct nlattr *protinfo; 823 struct nlattr *protinfo;
784 struct nlattr *afspec; 824 struct nlattr *afspec;
785 struct net_bridge_port *p; 825 bool changed = false;
786 struct nlattr *tb[IFLA_BRPORT_MAX + 1];
787 int err = 0; 826 int err = 0;
788 827
789 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); 828 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
@@ -819,15 +858,14 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
819 } 858 }
820 if (err) 859 if (err)
821 goto out; 860 goto out;
861 changed = true;
822 } 862 }
823 863
824 if (afspec) { 864 if (afspec)
825 err = br_afspec((struct net_bridge *)netdev_priv(dev), p, 865 err = br_afspec(br, p, afspec, RTM_SETLINK, &changed);
826 afspec, RTM_SETLINK);
827 }
828 866
829 if (err == 0) 867 if (changed)
830 br_ifinfo_notify(RTM_NEWLINK, p); 868 br_ifinfo_notify(RTM_NEWLINK, br, p);
831out: 869out:
832 return err; 870 return err;
833} 871}
@@ -835,8 +873,10 @@ out:
835/* Delete port information */ 873/* Delete port information */
836int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) 874int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
837{ 875{
838 struct nlattr *afspec; 876 struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
839 struct net_bridge_port *p; 877 struct net_bridge_port *p;
878 struct nlattr *afspec;
879 bool changed = false;
840 int err = 0; 880 int err = 0;
841 881
842 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 882 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -848,13 +888,12 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
848 if (!p && !(dev->priv_flags & IFF_EBRIDGE)) 888 if (!p && !(dev->priv_flags & IFF_EBRIDGE))
849 return -EINVAL; 889 return -EINVAL;
850 890
851 err = br_afspec((struct net_bridge *)netdev_priv(dev), p, 891 err = br_afspec(br, p, afspec, RTM_DELLINK, &changed);
852 afspec, RTM_DELLINK); 892 if (changed)
853 if (err == 0)
854 /* Send RTM_NEWLINK because userspace 893 /* Send RTM_NEWLINK because userspace
855 * expects RTM_NEWLINK for vlan dels 894 * expects RTM_NEWLINK for vlan dels
856 */ 895 */
857 br_ifinfo_notify(RTM_NEWLINK, p); 896 br_ifinfo_notify(RTM_NEWLINK, br, p);
858 897
859 return err; 898 return err;
860} 899}
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 3712c7f0e00c..da8cb99fd259 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -198,7 +198,7 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX +
198}; 198};
199 199
200static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, 200static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
201 u16 vid, u32 tun_id) 201 u16 vid, u32 tun_id, bool *changed)
202{ 202{
203 int err = 0; 203 int err = 0;
204 204
@@ -208,9 +208,12 @@ static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
208 switch (cmd) { 208 switch (cmd) {
209 case RTM_SETLINK: 209 case RTM_SETLINK:
210 err = nbp_vlan_tunnel_info_add(p, vid, tun_id); 210 err = nbp_vlan_tunnel_info_add(p, vid, tun_id);
211 if (!err)
212 *changed = true;
211 break; 213 break;
212 case RTM_DELLINK: 214 case RTM_DELLINK:
213 nbp_vlan_tunnel_info_delete(p, vid); 215 if (!nbp_vlan_tunnel_info_delete(p, vid))
216 *changed = true;
214 break; 217 break;
215 } 218 }
216 219
@@ -254,7 +257,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr,
254int br_process_vlan_tunnel_info(struct net_bridge *br, 257int br_process_vlan_tunnel_info(struct net_bridge *br,
255 struct net_bridge_port *p, int cmd, 258 struct net_bridge_port *p, int cmd,
256 struct vtunnel_info *tinfo_curr, 259 struct vtunnel_info *tinfo_curr,
257 struct vtunnel_info *tinfo_last) 260 struct vtunnel_info *tinfo_last,
261 bool *changed)
258{ 262{
259 int err; 263 int err;
260 264
@@ -272,7 +276,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
272 return -EINVAL; 276 return -EINVAL;
273 t = tinfo_last->tunid; 277 t = tinfo_last->tunid;
274 for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { 278 for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) {
275 err = br_vlan_tunnel_info(p, cmd, v, t); 279 err = br_vlan_tunnel_info(p, cmd, v, t, changed);
276 if (err) 280 if (err)
277 return err; 281 return err;
278 t++; 282 t++;
@@ -283,7 +287,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
283 if (tinfo_last->flags) 287 if (tinfo_last->flags)
284 return -EINVAL; 288 return -EINVAL;
285 err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, 289 err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid,
286 tinfo_curr->tunid); 290 tinfo_curr->tunid, changed);
287 if (err) 291 if (err)
288 return err; 292 return err;
289 memset(tinfo_last, 0, sizeof(struct vtunnel_info)); 293 memset(tinfo_last, 0, sizeof(struct vtunnel_info));
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e870cfc85b14..1312b8d20ec3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -36,7 +36,14 @@
36/* Control of forwarding link local multicast */ 36/* Control of forwarding link local multicast */
37#define BR_GROUPFWD_DEFAULT 0 37#define BR_GROUPFWD_DEFAULT 0
38/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ 38/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
39#define BR_GROUPFWD_RESTRICTED 0x0007u 39enum {
40 BR_GROUPFWD_STP = BIT(0),
41 BR_GROUPFWD_MACPAUSE = BIT(1),
42 BR_GROUPFWD_LACP = BIT(2),
43};
44
45#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \
46 BR_GROUPFWD_LACP)
40/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ 47/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
41#define BR_GROUPFWD_8021AD 0xB801u 48#define BR_GROUPFWD_8021AD 0xB801u
42 49
@@ -202,7 +209,7 @@ struct net_bridge_mdb_entry
202 struct rcu_head rcu; 209 struct rcu_head rcu;
203 struct timer_list timer; 210 struct timer_list timer;
204 struct br_ip addr; 211 struct br_ip addr;
205 bool mglist; 212 bool host_joined;
206}; 213};
207 214
208struct net_bridge_mdb_htable 215struct net_bridge_mdb_htable
@@ -268,6 +275,7 @@ struct net_bridge_port {
268#ifdef CONFIG_NET_SWITCHDEV 275#ifdef CONFIG_NET_SWITCHDEV
269 int offload_fwd_mark; 276 int offload_fwd_mark;
270#endif 277#endif
278 u16 group_fwd_mask;
271}; 279};
272 280
273#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) 281#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -396,6 +404,7 @@ struct net_bridge {
396#ifdef CONFIG_NET_SWITCHDEV 404#ifdef CONFIG_NET_SWITCHDEV
397 int offload_fwd_mark; 405 int offload_fwd_mark;
398#endif 406#endif
407 bool neigh_suppress_enabled;
399}; 408};
400 409
401struct br_input_skb_cb { 410struct br_input_skb_cb {
@@ -558,7 +567,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
558void br_port_carrier_check(struct net_bridge_port *p); 567void br_port_carrier_check(struct net_bridge_port *p);
559int br_add_bridge(struct net *net, const char *name); 568int br_add_bridge(struct net *net, const char *name);
560int br_del_bridge(struct net *net, const char *name); 569int br_del_bridge(struct net *net, const char *name);
561int br_add_if(struct net_bridge *br, struct net_device *dev); 570int br_add_if(struct net_bridge *br, struct net_device *dev,
571 struct netlink_ext_ack *extack);
562int br_del_if(struct net_bridge *br, struct net_device *dev); 572int br_del_if(struct net_bridge *br, struct net_device *dev);
563int br_min_mtu(const struct net_bridge *br); 573int br_min_mtu(const struct net_bridge *br);
564netdev_features_t br_features_recompute(struct net_bridge *br, 574netdev_features_t br_features_recompute(struct net_bridge *br,
@@ -793,7 +803,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
793 const struct net_bridge_port *port, 803 const struct net_bridge_port *port,
794 struct net_bridge_vlan_group *vg, 804 struct net_bridge_vlan_group *vg,
795 struct sk_buff *skb); 805 struct sk_buff *skb);
796int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); 806int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
807 bool *changed);
797int br_vlan_delete(struct net_bridge *br, u16 vid); 808int br_vlan_delete(struct net_bridge *br, u16 vid);
798void br_vlan_flush(struct net_bridge *br); 809void br_vlan_flush(struct net_bridge *br);
799struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid); 810struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
@@ -806,7 +817,8 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
806int br_vlan_init(struct net_bridge *br); 817int br_vlan_init(struct net_bridge *br);
807int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); 818int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
808int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid); 819int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
809int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); 820int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
821 bool *changed);
810int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); 822int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
811void nbp_vlan_flush(struct net_bridge_port *port); 823void nbp_vlan_flush(struct net_bridge_port *port);
812int nbp_vlan_init(struct net_bridge_port *port); 824int nbp_vlan_init(struct net_bridge_port *port);
@@ -893,8 +905,10 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
893 return skb; 905 return skb;
894} 906}
895 907
896static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) 908static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
909 bool *changed)
897{ 910{
911 *changed = false;
898 return -EOPNOTSUPP; 912 return -EOPNOTSUPP;
899} 913}
900 914
@@ -916,8 +930,10 @@ static inline int br_vlan_init(struct net_bridge *br)
916 return 0; 930 return 0;
917} 931}
918 932
919static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) 933static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
934 bool *changed)
920{ 935{
936 *changed = false;
921 return -EOPNOTSUPP; 937 return -EOPNOTSUPP;
922} 938}
923 939
@@ -1055,7 +1071,8 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr)
1055extern struct rtnl_link_ops br_link_ops; 1071extern struct rtnl_link_ops br_link_ops;
1056int br_netlink_init(void); 1072int br_netlink_init(void);
1057void br_netlink_fini(void); 1073void br_netlink_fini(void);
1058void br_ifinfo_notify(int event, struct net_bridge_port *port); 1074void br_ifinfo_notify(int event, const struct net_bridge *br,
1075 const struct net_bridge_port *port);
1059int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); 1076int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
1060int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); 1077int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
1061int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, 1078int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
@@ -1130,4 +1147,11 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
1130} 1147}
1131#endif /* CONFIG_NET_SWITCHDEV */ 1148#endif /* CONFIG_NET_SWITCHDEV */
1132 1149
1150/* br_arp_nd_proxy.c */
1151void br_recalculate_neigh_suppress_enabled(struct net_bridge *br);
1152void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
1153 u16 vid, struct net_bridge_port *p);
1154void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
1155 u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
1156struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
1133#endif 1157#endif
diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index 4a447a378ab3..a259471bfd78 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h
@@ -26,7 +26,8 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
26 struct net_bridge_port *p, 26 struct net_bridge_port *p,
27 int cmd, 27 int cmd,
28 struct vtunnel_info *tinfo_curr, 28 struct vtunnel_info *tinfo_curr,
29 struct vtunnel_info *tinfo_last); 29 struct vtunnel_info *tinfo_last,
30 bool *changed);
30int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg); 31int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg);
31int br_fill_vlan_tunnel_info(struct sk_buff *skb, 32int br_fill_vlan_tunnel_info(struct sk_buff *skb,
32 struct net_bridge_vlan_group *vg); 33 struct net_bridge_vlan_group *vg);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 8f56c2d1f1a7..b6941961a876 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -123,7 +123,7 @@ static void br_root_port_block(const struct net_bridge *br,
123 (unsigned int) p->port_no, p->dev->name); 123 (unsigned int) p->port_no, p->dev->name);
124 124
125 br_set_state(p, BR_STATE_LISTENING); 125 br_set_state(p, BR_STATE_LISTENING);
126 br_ifinfo_notify(RTM_NEWLINK, p); 126 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
127 127
128 if (br->forward_delay > 0) 128 if (br->forward_delay > 0)
129 mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); 129 mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
@@ -403,7 +403,7 @@ static void br_make_blocking(struct net_bridge_port *p)
403 br_topology_change_detection(p->br); 403 br_topology_change_detection(p->br);
404 404
405 br_set_state(p, BR_STATE_BLOCKING); 405 br_set_state(p, BR_STATE_BLOCKING);
406 br_ifinfo_notify(RTM_NEWLINK, p); 406 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
407 407
408 del_timer(&p->forward_delay_timer); 408 del_timer(&p->forward_delay_timer);
409 } 409 }
@@ -426,7 +426,7 @@ static void br_make_forwarding(struct net_bridge_port *p)
426 else 426 else
427 br_set_state(p, BR_STATE_LEARNING); 427 br_set_state(p, BR_STATE_LEARNING);
428 428
429 br_ifinfo_notify(RTM_NEWLINK, p); 429 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
430 430
431 if (br->forward_delay != 0) 431 if (br->forward_delay != 0)
432 mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); 432 mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 89110319ef0f..808e2b914015 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -96,7 +96,7 @@ void br_stp_enable_port(struct net_bridge_port *p)
96{ 96{
97 br_init_port(p); 97 br_init_port(p);
98 br_port_state_selection(p->br); 98 br_port_state_selection(p->br);
99 br_ifinfo_notify(RTM_NEWLINK, p); 99 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
100} 100}
101 101
102/* called under bridge lock */ 102/* called under bridge lock */
@@ -111,7 +111,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
111 p->topology_change_ack = 0; 111 p->topology_change_ack = 0;
112 p->config_pending = 0; 112 p->config_pending = 0;
113 113
114 br_ifinfo_notify(RTM_NEWLINK, p); 114 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
115 115
116 del_timer(&p->message_age_timer); 116 del_timer(&p->message_age_timer);
117 del_timer(&p->forward_delay_timer); 117 del_timer(&p->forward_delay_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 60b6fe277a8b..e7739de5f0e1 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -31,9 +31,9 @@ static int br_is_designated_for_some_port(const struct net_bridge *br)
31 return 0; 31 return 0;
32} 32}
33 33
34static void br_hello_timer_expired(unsigned long arg) 34static void br_hello_timer_expired(struct timer_list *t)
35{ 35{
36 struct net_bridge *br = (struct net_bridge *)arg; 36 struct net_bridge *br = from_timer(br, t, hello_timer);
37 37
38 br_debug(br, "hello timer expired\n"); 38 br_debug(br, "hello timer expired\n");
39 spin_lock(&br->lock); 39 spin_lock(&br->lock);
@@ -47,9 +47,9 @@ static void br_hello_timer_expired(unsigned long arg)
47 spin_unlock(&br->lock); 47 spin_unlock(&br->lock);
48} 48}
49 49
50static void br_message_age_timer_expired(unsigned long arg) 50static void br_message_age_timer_expired(struct timer_list *t)
51{ 51{
52 struct net_bridge_port *p = (struct net_bridge_port *) arg; 52 struct net_bridge_port *p = from_timer(p, t, message_age_timer);
53 struct net_bridge *br = p->br; 53 struct net_bridge *br = p->br;
54 const bridge_id *id = &p->designated_bridge; 54 const bridge_id *id = &p->designated_bridge;
55 int was_root; 55 int was_root;
@@ -80,9 +80,9 @@ static void br_message_age_timer_expired(unsigned long arg)
80 spin_unlock(&br->lock); 80 spin_unlock(&br->lock);
81} 81}
82 82
83static void br_forward_delay_timer_expired(unsigned long arg) 83static void br_forward_delay_timer_expired(struct timer_list *t)
84{ 84{
85 struct net_bridge_port *p = (struct net_bridge_port *) arg; 85 struct net_bridge_port *p = from_timer(p, t, forward_delay_timer);
86 struct net_bridge *br = p->br; 86 struct net_bridge *br = p->br;
87 87
88 br_debug(br, "port %u(%s) forward delay timer\n", 88 br_debug(br, "port %u(%s) forward delay timer\n",
@@ -99,14 +99,14 @@ static void br_forward_delay_timer_expired(unsigned long arg)
99 netif_carrier_on(br->dev); 99 netif_carrier_on(br->dev);
100 } 100 }
101 rcu_read_lock(); 101 rcu_read_lock();
102 br_ifinfo_notify(RTM_NEWLINK, p); 102 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
103 rcu_read_unlock(); 103 rcu_read_unlock();
104 spin_unlock(&br->lock); 104 spin_unlock(&br->lock);
105} 105}
106 106
107static void br_tcn_timer_expired(unsigned long arg) 107static void br_tcn_timer_expired(struct timer_list *t)
108{ 108{
109 struct net_bridge *br = (struct net_bridge *) arg; 109 struct net_bridge *br = from_timer(br, t, tcn_timer);
110 110
111 br_debug(br, "tcn timer expired\n"); 111 br_debug(br, "tcn timer expired\n");
112 spin_lock(&br->lock); 112 spin_lock(&br->lock);
@@ -118,9 +118,9 @@ static void br_tcn_timer_expired(unsigned long arg)
118 spin_unlock(&br->lock); 118 spin_unlock(&br->lock);
119} 119}
120 120
121static void br_topology_change_timer_expired(unsigned long arg) 121static void br_topology_change_timer_expired(struct timer_list *t)
122{ 122{
123 struct net_bridge *br = (struct net_bridge *) arg; 123 struct net_bridge *br = from_timer(br, t, topology_change_timer);
124 124
125 br_debug(br, "topo change timer expired\n"); 125 br_debug(br, "topo change timer expired\n");
126 spin_lock(&br->lock); 126 spin_lock(&br->lock);
@@ -129,9 +129,9 @@ static void br_topology_change_timer_expired(unsigned long arg)
129 spin_unlock(&br->lock); 129 spin_unlock(&br->lock);
130} 130}
131 131
132static void br_hold_timer_expired(unsigned long arg) 132static void br_hold_timer_expired(struct timer_list *t)
133{ 133{
134 struct net_bridge_port *p = (struct net_bridge_port *) arg; 134 struct net_bridge_port *p = from_timer(p, t, hold_timer);
135 135
136 br_debug(p->br, "port %u(%s) hold timer expired\n", 136 br_debug(p->br, "port %u(%s) hold timer expired\n",
137 (unsigned int) p->port_no, p->dev->name); 137 (unsigned int) p->port_no, p->dev->name);
@@ -144,27 +144,17 @@ static void br_hold_timer_expired(unsigned long arg)
144 144
145void br_stp_timer_init(struct net_bridge *br) 145void br_stp_timer_init(struct net_bridge *br)
146{ 146{
147 setup_timer(&br->hello_timer, br_hello_timer_expired, 147 timer_setup(&br->hello_timer, br_hello_timer_expired, 0);
148 (unsigned long) br); 148 timer_setup(&br->tcn_timer, br_tcn_timer_expired, 0);
149 149 timer_setup(&br->topology_change_timer,
150 setup_timer(&br->tcn_timer, br_tcn_timer_expired, 150 br_topology_change_timer_expired, 0);
151 (unsigned long) br);
152
153 setup_timer(&br->topology_change_timer,
154 br_topology_change_timer_expired,
155 (unsigned long) br);
156} 151}
157 152
158void br_stp_port_timer_init(struct net_bridge_port *p) 153void br_stp_port_timer_init(struct net_bridge_port *p)
159{ 154{
160 setup_timer(&p->message_age_timer, br_message_age_timer_expired, 155 timer_setup(&p->message_age_timer, br_message_age_timer_expired, 0);
161 (unsigned long) p); 156 timer_setup(&p->forward_delay_timer, br_forward_delay_timer_expired, 0);
162 157 timer_setup(&p->hold_timer, br_hold_timer_expired, 0);
163 setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired,
164 (unsigned long) p);
165
166 setup_timer(&p->hold_timer, br_hold_timer_expired,
167 (unsigned long) p);
168} 158}
169 159
170/* Report ticks left (in USER_HZ) used for API */ 160/* Report ticks left (in USER_HZ) used for API */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 5d5d413a6cf8..0254c35b2bf0 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v)
165} 165}
166static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); 166static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
167 167
168static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
169{
170 return sprintf(buf, "%#x\n", p->group_fwd_mask);
171}
172
173static int store_group_fwd_mask(struct net_bridge_port *p,
174 unsigned long v)
175{
176 if (v & BR_GROUPFWD_MACPAUSE)
177 return -EINVAL;
178 p->group_fwd_mask = v;
179
180 return 0;
181}
182static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
183 store_group_fwd_mask);
184
168BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); 185BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
169BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); 186BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
170BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); 187BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
@@ -174,6 +191,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
174BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); 191BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
175BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); 192BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
176BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); 193BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
194BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS);
177 195
178#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 196#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
179static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) 197static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -223,6 +241,8 @@ static const struct brport_attribute *brport_attrs[] = {
223 &brport_attr_proxyarp_wifi, 241 &brport_attr_proxyarp_wifi,
224 &brport_attr_multicast_flood, 242 &brport_attr_multicast_flood,
225 &brport_attr_broadcast_flood, 243 &brport_attr_broadcast_flood,
244 &brport_attr_group_fwd_mask,
245 &brport_attr_neigh_suppress,
226 NULL 246 NULL
227}; 247};
228 248
@@ -260,7 +280,7 @@ static ssize_t brport_store(struct kobject *kobj,
260 ret = brport_attr->store(p, val); 280 ret = brport_attr->store(p, val);
261 spin_unlock_bh(&p->br->lock); 281 spin_unlock_bh(&p->br->lock);
262 if (!ret) { 282 if (!ret) {
263 br_ifinfo_notify(RTM_NEWLINK, p); 283 br_ifinfo_notify(RTM_NEWLINK, NULL, p);
264 ret = count; 284 ret = count;
265 } 285 }
266 } 286 }
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 233a30040c91..51935270c651 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -32,27 +32,34 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
32 return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params); 32 return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
33} 33}
34 34
35static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) 35static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
36{ 36{
37 if (vg->pvid == vid) 37 if (vg->pvid == vid)
38 return; 38 return false;
39 39
40 smp_wmb(); 40 smp_wmb();
41 vg->pvid = vid; 41 vg->pvid = vid;
42
43 return true;
42} 44}
43 45
44static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) 46static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
45{ 47{
46 if (vg->pvid != vid) 48 if (vg->pvid != vid)
47 return; 49 return false;
48 50
49 smp_wmb(); 51 smp_wmb();
50 vg->pvid = 0; 52 vg->pvid = 0;
53
54 return true;
51} 55}
52 56
53static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) 57/* return true if anything changed, false otherwise */
58static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
54{ 59{
55 struct net_bridge_vlan_group *vg; 60 struct net_bridge_vlan_group *vg;
61 u16 old_flags = v->flags;
62 bool ret;
56 63
57 if (br_vlan_is_master(v)) 64 if (br_vlan_is_master(v))
58 vg = br_vlan_group(v->br); 65 vg = br_vlan_group(v->br);
@@ -60,14 +67,16 @@ static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
60 vg = nbp_vlan_group(v->port); 67 vg = nbp_vlan_group(v->port);
61 68
62 if (flags & BRIDGE_VLAN_INFO_PVID) 69 if (flags & BRIDGE_VLAN_INFO_PVID)
63 __vlan_add_pvid(vg, v->vid); 70 ret = __vlan_add_pvid(vg, v->vid);
64 else 71 else
65 __vlan_delete_pvid(vg, v->vid); 72 ret = __vlan_delete_pvid(vg, v->vid);
66 73
67 if (flags & BRIDGE_VLAN_INFO_UNTAGGED) 74 if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
68 v->flags |= BRIDGE_VLAN_INFO_UNTAGGED; 75 v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
69 else 76 else
70 v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; 77 v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
78
79 return ret || !!(old_flags ^ v->flags);
71} 80}
72 81
73static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, 82static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
@@ -151,8 +160,10 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
151 vg = br_vlan_group(br); 160 vg = br_vlan_group(br);
152 masterv = br_vlan_find(vg, vid); 161 masterv = br_vlan_find(vg, vid);
153 if (!masterv) { 162 if (!masterv) {
163 bool changed;
164
154 /* missing global ctx, create it now */ 165 /* missing global ctx, create it now */
155 if (br_vlan_add(br, vid, 0)) 166 if (br_vlan_add(br, vid, 0, &changed))
156 return NULL; 167 return NULL;
157 masterv = br_vlan_find(vg, vid); 168 masterv = br_vlan_find(vg, vid);
158 if (WARN_ON(!masterv)) 169 if (WARN_ON(!masterv))
@@ -232,8 +243,11 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
232 243
233 /* need to work on the master vlan too */ 244 /* need to work on the master vlan too */
234 if (flags & BRIDGE_VLAN_INFO_MASTER) { 245 if (flags & BRIDGE_VLAN_INFO_MASTER) {
235 err = br_vlan_add(br, v->vid, flags | 246 bool changed;
236 BRIDGE_VLAN_INFO_BRENTRY); 247
248 err = br_vlan_add(br, v->vid,
249 flags | BRIDGE_VLAN_INFO_BRENTRY,
250 &changed);
237 if (err) 251 if (err)
238 goto out_filt; 252 goto out_filt;
239 } 253 }
@@ -550,8 +564,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
550 564
551/* Must be protected by RTNL. 565/* Must be protected by RTNL.
552 * Must be called with vid in range from 1 to 4094 inclusive. 566 * Must be called with vid in range from 1 to 4094 inclusive.
567 * changed must be true only if the vlan was created or updated
553 */ 568 */
554int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) 569int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
555{ 570{
556 struct net_bridge_vlan_group *vg; 571 struct net_bridge_vlan_group *vg;
557 struct net_bridge_vlan *vlan; 572 struct net_bridge_vlan *vlan;
@@ -559,6 +574,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
559 574
560 ASSERT_RTNL(); 575 ASSERT_RTNL();
561 576
577 *changed = false;
562 vg = br_vlan_group(br); 578 vg = br_vlan_group(br);
563 vlan = br_vlan_find(vg, vid); 579 vlan = br_vlan_find(vg, vid);
564 if (vlan) { 580 if (vlan) {
@@ -576,8 +592,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
576 refcount_inc(&vlan->refcnt); 592 refcount_inc(&vlan->refcnt);
577 vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; 593 vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
578 vg->num_vlans++; 594 vg->num_vlans++;
595 *changed = true;
579 } 596 }
580 __vlan_add_flags(vlan, flags); 597 if (__vlan_add_flags(vlan, flags))
598 *changed = true;
599
581 return 0; 600 return 0;
582 } 601 }
583 602
@@ -600,6 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
600 if (ret) { 619 if (ret) {
601 free_percpu(vlan->stats); 620 free_percpu(vlan->stats);
602 kfree(vlan); 621 kfree(vlan);
622 } else {
623 *changed = true;
603 } 624 }
604 625
605 return ret; 626 return ret;
@@ -824,9 +845,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
824 const struct net_bridge_vlan *pvent; 845 const struct net_bridge_vlan *pvent;
825 struct net_bridge_vlan_group *vg; 846 struct net_bridge_vlan_group *vg;
826 struct net_bridge_port *p; 847 struct net_bridge_port *p;
848 unsigned long *changed;
849 bool vlchange;
827 u16 old_pvid; 850 u16 old_pvid;
828 int err = 0; 851 int err = 0;
829 unsigned long *changed;
830 852
831 if (!pvid) { 853 if (!pvid) {
832 br_vlan_disable_default_pvid(br); 854 br_vlan_disable_default_pvid(br);
@@ -850,7 +872,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
850 err = br_vlan_add(br, pvid, 872 err = br_vlan_add(br, pvid,
851 BRIDGE_VLAN_INFO_PVID | 873 BRIDGE_VLAN_INFO_PVID |
852 BRIDGE_VLAN_INFO_UNTAGGED | 874 BRIDGE_VLAN_INFO_UNTAGGED |
853 BRIDGE_VLAN_INFO_BRENTRY); 875 BRIDGE_VLAN_INFO_BRENTRY,
876 &vlchange);
854 if (err) 877 if (err)
855 goto out; 878 goto out;
856 br_vlan_delete(br, old_pvid); 879 br_vlan_delete(br, old_pvid);
@@ -869,7 +892,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
869 892
870 err = nbp_vlan_add(p, pvid, 893 err = nbp_vlan_add(p, pvid,
871 BRIDGE_VLAN_INFO_PVID | 894 BRIDGE_VLAN_INFO_PVID |
872 BRIDGE_VLAN_INFO_UNTAGGED); 895 BRIDGE_VLAN_INFO_UNTAGGED,
896 &vlchange);
873 if (err) 897 if (err)
874 goto err_port; 898 goto err_port;
875 nbp_vlan_delete(p, old_pvid); 899 nbp_vlan_delete(p, old_pvid);
@@ -890,7 +914,8 @@ err_port:
890 if (old_pvid) 914 if (old_pvid)
891 nbp_vlan_add(p, old_pvid, 915 nbp_vlan_add(p, old_pvid,
892 BRIDGE_VLAN_INFO_PVID | 916 BRIDGE_VLAN_INFO_PVID |
893 BRIDGE_VLAN_INFO_UNTAGGED); 917 BRIDGE_VLAN_INFO_UNTAGGED,
918 &vlchange);
894 nbp_vlan_delete(p, pvid); 919 nbp_vlan_delete(p, pvid);
895 } 920 }
896 921
@@ -899,7 +924,8 @@ err_port:
899 br_vlan_add(br, old_pvid, 924 br_vlan_add(br, old_pvid,
900 BRIDGE_VLAN_INFO_PVID | 925 BRIDGE_VLAN_INFO_PVID |
901 BRIDGE_VLAN_INFO_UNTAGGED | 926 BRIDGE_VLAN_INFO_UNTAGGED |
902 BRIDGE_VLAN_INFO_BRENTRY); 927 BRIDGE_VLAN_INFO_BRENTRY,
928 &vlchange);
903 br_vlan_delete(br, pvid); 929 br_vlan_delete(br, pvid);
904 } 930 }
905 goto out; 931 goto out;
@@ -931,6 +957,7 @@ int br_vlan_init(struct net_bridge *br)
931{ 957{
932 struct net_bridge_vlan_group *vg; 958 struct net_bridge_vlan_group *vg;
933 int ret = -ENOMEM; 959 int ret = -ENOMEM;
960 bool changed;
934 961
935 vg = kzalloc(sizeof(*vg), GFP_KERNEL); 962 vg = kzalloc(sizeof(*vg), GFP_KERNEL);
936 if (!vg) 963 if (!vg)
@@ -947,7 +974,7 @@ int br_vlan_init(struct net_bridge *br)
947 rcu_assign_pointer(br->vlgrp, vg); 974 rcu_assign_pointer(br->vlgrp, vg);
948 ret = br_vlan_add(br, 1, 975 ret = br_vlan_add(br, 1,
949 BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | 976 BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
950 BRIDGE_VLAN_INFO_BRENTRY); 977 BRIDGE_VLAN_INFO_BRENTRY, &changed);
951 if (ret) 978 if (ret)
952 goto err_vlan_add; 979 goto err_vlan_add;
953 980
@@ -992,9 +1019,12 @@ int nbp_vlan_init(struct net_bridge_port *p)
992 INIT_LIST_HEAD(&vg->vlan_list); 1019 INIT_LIST_HEAD(&vg->vlan_list);
993 rcu_assign_pointer(p->vlgrp, vg); 1020 rcu_assign_pointer(p->vlgrp, vg);
994 if (p->br->default_pvid) { 1021 if (p->br->default_pvid) {
1022 bool changed;
1023
995 ret = nbp_vlan_add(p, p->br->default_pvid, 1024 ret = nbp_vlan_add(p, p->br->default_pvid,
996 BRIDGE_VLAN_INFO_PVID | 1025 BRIDGE_VLAN_INFO_PVID |
997 BRIDGE_VLAN_INFO_UNTAGGED); 1026 BRIDGE_VLAN_INFO_UNTAGGED,
1027 &changed);
998 if (ret) 1028 if (ret)
999 goto err_vlan_add; 1029 goto err_vlan_add;
1000 } 1030 }
@@ -1016,8 +1046,10 @@ err_vlan_enabled:
1016 1046
1017/* Must be protected by RTNL. 1047/* Must be protected by RTNL.
1018 * Must be called with vid in range from 1 to 4094 inclusive. 1048 * Must be called with vid in range from 1 to 4094 inclusive.
1049 * changed must be true only if the vlan was created or updated
1019 */ 1050 */
1020int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) 1051int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
1052 bool *changed)
1021{ 1053{
1022 struct switchdev_obj_port_vlan v = { 1054 struct switchdev_obj_port_vlan v = {
1023 .obj.orig_dev = port->dev, 1055 .obj.orig_dev = port->dev,
@@ -1031,13 +1063,15 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
1031 1063
1032 ASSERT_RTNL(); 1064 ASSERT_RTNL();
1033 1065
1066 *changed = false;
1034 vlan = br_vlan_find(nbp_vlan_group(port), vid); 1067 vlan = br_vlan_find(nbp_vlan_group(port), vid);
1035 if (vlan) { 1068 if (vlan) {
1036 /* Pass the flags to the hardware bridge */ 1069 /* Pass the flags to the hardware bridge */
1037 ret = switchdev_port_obj_add(port->dev, &v.obj); 1070 ret = switchdev_port_obj_add(port->dev, &v.obj);
1038 if (ret && ret != -EOPNOTSUPP) 1071 if (ret && ret != -EOPNOTSUPP)
1039 return ret; 1072 return ret;
1040 __vlan_add_flags(vlan, flags); 1073 *changed = __vlan_add_flags(vlan, flags);
1074
1041 return 0; 1075 return 0;
1042 } 1076 }
1043 1077
@@ -1050,6 +1084,8 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
1050 ret = __vlan_add(vlan, flags); 1084 ret = __vlan_add(vlan, flags);
1051 if (ret) 1085 if (ret)
1052 kfree(vlan); 1086 kfree(vlan);
1087 else
1088 *changed = true;
1053 1089
1054 return ret; 1090 return ret;
1055} 1091}
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3b3dcf719e07..37817d25b63d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2112,9 +2112,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
2112 for (i = 0, j = 1 ; j < 4 ; j++, i++) { 2112 for (i = 0, j = 1 ; j < 4 ; j++, i++) {
2113 struct compat_ebt_entry_mwt *match32; 2113 struct compat_ebt_entry_mwt *match32;
2114 unsigned int size; 2114 unsigned int size;
2115 char *buf = buf_start; 2115 char *buf = buf_start + offsets[i];
2116 2116
2117 buf = buf_start + offsets[i];
2118 if (offsets[i] > offsets[j]) 2117 if (offsets[i] > offsets[j])
2119 return -EINVAL; 2118 return -EINVAL;
2120 2119
diff --git a/net/can/af_can.c b/net/can/af_can.c
index ecd5c703d11e..003b2d6d655f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -887,8 +887,8 @@ static int can_pernet_init(struct net *net)
887 if (IS_ENABLED(CONFIG_PROC_FS)) { 887 if (IS_ENABLED(CONFIG_PROC_FS)) {
888 /* the statistics are updated every second (timer triggered) */ 888 /* the statistics are updated every second (timer triggered) */
889 if (stats_timer) { 889 if (stats_timer) {
890 setup_timer(&net->can.can_stattimer, can_stat_update, 890 timer_setup(&net->can.can_stattimer, can_stat_update,
891 (unsigned long)net); 891 0);
892 mod_timer(&net->can.can_stattimer, 892 mod_timer(&net->can.can_stattimer,
893 round_jiffies(jiffies + HZ)); 893 round_jiffies(jiffies + HZ));
894 } 894 }
diff --git a/net/can/af_can.h b/net/can/af_can.h
index d0ef45bb2a72..eca6463c6213 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -113,6 +113,6 @@ struct s_pstats {
113/* function prototypes for the CAN networklayer procfs (proc.c) */ 113/* function prototypes for the CAN networklayer procfs (proc.c) */
114void can_init_proc(struct net *net); 114void can_init_proc(struct net *net);
115void can_remove_proc(struct net *net); 115void can_remove_proc(struct net *net);
116void can_stat_update(unsigned long data); 116void can_stat_update(struct timer_list *t);
117 117
118#endif /* AF_CAN_H */ 118#endif /* AF_CAN_H */
diff --git a/net/can/proc.c b/net/can/proc.c
index 83045f00c63c..0c59f876fe6f 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -115,9 +115,9 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
115 return rate; 115 return rate;
116} 116}
117 117
118void can_stat_update(unsigned long data) 118void can_stat_update(struct timer_list *t)
119{ 119{
120 struct net *net = (struct net *)data; 120 struct net *net = from_timer(net, t, can.can_stattimer);
121 struct s_stats *can_stats = net->can.can_stats; 121 struct s_stats *can_stats = net->can.can_stats;
122 unsigned long j = jiffies; /* snapshot */ 122 unsigned long j = jiffies; /* snapshot */
123 123
diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c
index 67bb1f11e613..9a5850f264ed 100644
--- a/net/ceph/ceph_hash.c
+++ b/net/ceph/ceph_hash.c
@@ -47,28 +47,38 @@ unsigned int ceph_str_hash_rjenkins(const char *str, unsigned int length)
47 47
48 /* handle the last 11 bytes */ 48 /* handle the last 11 bytes */
49 c = c + length; 49 c = c + length;
50 switch (len) { /* all the case statements fall through */ 50 switch (len) {
51 case 11: 51 case 11:
52 c = c + ((__u32)k[10] << 24); 52 c = c + ((__u32)k[10] << 24);
53 /* fall through */
53 case 10: 54 case 10:
54 c = c + ((__u32)k[9] << 16); 55 c = c + ((__u32)k[9] << 16);
56 /* fall through */
55 case 9: 57 case 9:
56 c = c + ((__u32)k[8] << 8); 58 c = c + ((__u32)k[8] << 8);
57 /* the first byte of c is reserved for the length */ 59 /* the first byte of c is reserved for the length */
60 /* fall through */
58 case 8: 61 case 8:
59 b = b + ((__u32)k[7] << 24); 62 b = b + ((__u32)k[7] << 24);
63 /* fall through */
60 case 7: 64 case 7:
61 b = b + ((__u32)k[6] << 16); 65 b = b + ((__u32)k[6] << 16);
66 /* fall through */
62 case 6: 67 case 6:
63 b = b + ((__u32)k[5] << 8); 68 b = b + ((__u32)k[5] << 8);
69 /* fall through */
64 case 5: 70 case 5:
65 b = b + k[4]; 71 b = b + k[4];
72 /* fall through */
66 case 4: 73 case 4:
67 a = a + ((__u32)k[3] << 24); 74 a = a + ((__u32)k[3] << 24);
75 /* fall through */
68 case 3: 76 case 3:
69 a = a + ((__u32)k[2] << 16); 77 a = a + ((__u32)k[2] << 16);
78 /* fall through */
70 case 2: 79 case 2:
71 a = a + ((__u32)k[1] << 8); 80 a = a + ((__u32)k[1] << 8);
81 /* fall through */
72 case 1: 82 case 1:
73 a = a + k[0]; 83 a = a + k[0];
74 /* case 0: nothing left to add */ 84 /* case 0: nothing left to add */
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 489610ac1cdd..bf9d079cbafd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -37,7 +37,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf)
37 return -ENOTSUPP; 37 return -ENOTSUPP;
38 } 38 }
39 39
40 WARN_ON(!key->len); 40 if (!key->len)
41 return -EINVAL;
42
41 key->key = kmemdup(buf, key->len, GFP_NOIO); 43 key->key = kmemdup(buf, key->len, GFP_NOIO);
42 if (!key->key) { 44 if (!key->key) {
43 ret = -ENOMEM; 45 ret = -ENOMEM;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index ad93342c90d7..8a4d3758030b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -430,6 +430,7 @@ static void ceph_sock_state_change(struct sock *sk)
430 switch (sk->sk_state) { 430 switch (sk->sk_state) {
431 case TCP_CLOSE: 431 case TCP_CLOSE:
432 dout("%s TCP_CLOSE\n", __func__); 432 dout("%s TCP_CLOSE\n", __func__);
433 /* fall through */
433 case TCP_CLOSE_WAIT: 434 case TCP_CLOSE_WAIT:
434 dout("%s TCP_CLOSE_WAIT\n", __func__); 435 dout("%s TCP_CLOSE_WAIT\n", __func__);
435 con_sock_state_closing(con); 436 con_sock_state_closing(con);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9ae1bab8c05d..1547107f4854 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1279,9 +1279,10 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
1279 1279
1280 /* 1280 /*
1281 * Older OSDs don't set reply tid even if the orignal 1281 * Older OSDs don't set reply tid even if the orignal
1282 * request had a non-zero tid. Workaround this weirdness 1282 * request had a non-zero tid. Work around this weirdness
1283 * by falling through to the allocate case. 1283 * by allocating a new message.
1284 */ 1284 */
1285 /* fall through */
1285 case CEPH_MSG_MON_MAP: 1286 case CEPH_MSG_MON_MAP:
1286 case CEPH_MSG_MDS_MAP: 1287 case CEPH_MSG_MDS_MAP:
1287 case CEPH_MSG_OSD_MAP: 1288 case CEPH_MSG_OSD_MAP:
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index ee43bc13221c..a3d0adc828e6 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -25,9 +25,9 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
25 return ERR_PTR(-ENOMEM); 25 return ERR_PTR(-ENOMEM);
26 26
27 while (got < num_pages) { 27 while (got < num_pages) {
28 rc = get_user_pages_unlocked( 28 rc = get_user_pages_fast(
29 (unsigned long)data + ((unsigned long)got * PAGE_SIZE), 29 (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
30 num_pages - got, pages + got, write_page ? FOLL_WRITE : 0); 30 num_pages - got, write_page, pages + got);
31 if (rc < 0) 31 if (rc < 0)
32 break; 32 break;
33 BUG_ON(rc == 0); 33 BUG_ON(rc == 0);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 3964c108b169..522873ed120b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -189,7 +189,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
189 } 189 }
190 if (!skb->len) { 190 if (!skb->len) {
191 skb = skb_set_peeked(skb); 191 skb = skb_set_peeked(skb);
192 if (unlikely(IS_ERR(skb))) { 192 if (IS_ERR(skb)) {
193 *err = PTR_ERR(skb); 193 *err = PTR_ERR(skb);
194 return NULL; 194 return NULL;
195 } 195 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 61559ca3980b..8ee29f4f5fa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
145#include <linux/crash_dump.h> 145#include <linux/crash_dump.h>
146#include <linux/sctp.h> 146#include <linux/sctp.h>
147#include <net/udp_tunnel.h> 147#include <net/udp_tunnel.h>
148#include <linux/net_namespace.h>
148 149
149#include "net-sysfs.h" 150#include "net-sysfs.h"
150 151
@@ -162,7 +163,6 @@ static struct list_head offload_base __read_mostly;
162 163
163static int netif_rx_internal(struct sk_buff *skb); 164static int netif_rx_internal(struct sk_buff *skb);
164static int call_netdevice_notifiers_info(unsigned long val, 165static int call_netdevice_notifiers_info(unsigned long val,
165 struct net_device *dev,
166 struct netdev_notifier_info *info); 166 struct netdev_notifier_info *info);
167static struct napi_struct *napi_by_id(unsigned int napi_id); 167static struct napi_struct *napi_by_id(unsigned int napi_id);
168 168
@@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id);
188DEFINE_RWLOCK(dev_base_lock); 188DEFINE_RWLOCK(dev_base_lock);
189EXPORT_SYMBOL(dev_base_lock); 189EXPORT_SYMBOL(dev_base_lock);
190 190
191static DEFINE_MUTEX(ifalias_mutex);
192
191/* protects napi_hash addition/deletion and napi_gen_id */ 193/* protects napi_hash addition/deletion and napi_gen_id */
192static DEFINE_SPINLOCK(napi_hash_lock); 194static DEFINE_SPINLOCK(napi_hash_lock);
193 195
@@ -1062,7 +1064,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1062 unsigned long *inuse; 1064 unsigned long *inuse;
1063 struct net_device *d; 1065 struct net_device *d;
1064 1066
1065 p = strnchr(name, IFNAMSIZ-1, '%'); 1067 if (!dev_valid_name(name))
1068 return -EINVAL;
1069
1070 p = strchr(name, '%');
1066 if (p) { 1071 if (p) {
1067 /* 1072 /*
1068 * Verify the string as this thing may have come from 1073 * Verify the string as this thing may have come from
@@ -1093,8 +1098,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1093 free_page((unsigned long) inuse); 1098 free_page((unsigned long) inuse);
1094 } 1099 }
1095 1100
1096 if (buf != name) 1101 snprintf(buf, IFNAMSIZ, name, i);
1097 snprintf(buf, IFNAMSIZ, name, i);
1098 if (!__dev_get_by_name(net, buf)) 1102 if (!__dev_get_by_name(net, buf))
1099 return i; 1103 return i;
1100 1104
@@ -1102,7 +1106,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1102 * when the name is long and there isn't enough space left 1106 * when the name is long and there isn't enough space left
1103 * for the digits, or if all bits are used. 1107 * for the digits, or if all bits are used.
1104 */ 1108 */
1105 return -ENFILE; 1109 return p ? -ENFILE : -EEXIST;
1110}
1111
1112static int dev_alloc_name_ns(struct net *net,
1113 struct net_device *dev,
1114 const char *name)
1115{
1116 char buf[IFNAMSIZ];
1117 int ret;
1118
1119 BUG_ON(!net);
1120 ret = __dev_alloc_name(net, name, buf);
1121 if (ret >= 0)
1122 strlcpy(dev->name, buf, IFNAMSIZ);
1123 return ret;
1106} 1124}
1107 1125
1108/** 1126/**
@@ -1121,48 +1139,14 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1121 1139
1122int dev_alloc_name(struct net_device *dev, const char *name) 1140int dev_alloc_name(struct net_device *dev, const char *name)
1123{ 1141{
1124 char buf[IFNAMSIZ]; 1142 return dev_alloc_name_ns(dev_net(dev), dev, name);
1125 struct net *net;
1126 int ret;
1127
1128 BUG_ON(!dev_net(dev));
1129 net = dev_net(dev);
1130 ret = __dev_alloc_name(net, name, buf);
1131 if (ret >= 0)
1132 strlcpy(dev->name, buf, IFNAMSIZ);
1133 return ret;
1134} 1143}
1135EXPORT_SYMBOL(dev_alloc_name); 1144EXPORT_SYMBOL(dev_alloc_name);
1136 1145
1137static int dev_alloc_name_ns(struct net *net,
1138 struct net_device *dev,
1139 const char *name)
1140{
1141 char buf[IFNAMSIZ];
1142 int ret;
1143
1144 ret = __dev_alloc_name(net, name, buf);
1145 if (ret >= 0)
1146 strlcpy(dev->name, buf, IFNAMSIZ);
1147 return ret;
1148}
1149
1150int dev_get_valid_name(struct net *net, struct net_device *dev, 1146int dev_get_valid_name(struct net *net, struct net_device *dev,
1151 const char *name) 1147 const char *name)
1152{ 1148{
1153 BUG_ON(!net); 1149 return dev_alloc_name_ns(net, dev, name);
1154
1155 if (!dev_valid_name(name))
1156 return -EINVAL;
1157
1158 if (strchr(name, '%'))
1159 return dev_alloc_name_ns(net, dev, name);
1160 else if (__dev_get_by_name(net, name))
1161 return -EEXIST;
1162 else if (dev->name != name)
1163 strlcpy(dev->name, name, IFNAMSIZ);
1164
1165 return 0;
1166} 1150}
1167EXPORT_SYMBOL(dev_get_valid_name); 1151EXPORT_SYMBOL(dev_get_valid_name);
1168 1152
@@ -1265,29 +1249,53 @@ rollback:
1265 */ 1249 */
1266int dev_set_alias(struct net_device *dev, const char *alias, size_t len) 1250int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1267{ 1251{
1268 char *new_ifalias; 1252 struct dev_ifalias *new_alias = NULL;
1269
1270 ASSERT_RTNL();
1271 1253
1272 if (len >= IFALIASZ) 1254 if (len >= IFALIASZ)
1273 return -EINVAL; 1255 return -EINVAL;
1274 1256
1275 if (!len) { 1257 if (len) {
1276 kfree(dev->ifalias); 1258 new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
1277 dev->ifalias = NULL; 1259 if (!new_alias)
1278 return 0; 1260 return -ENOMEM;
1261
1262 memcpy(new_alias->ifalias, alias, len);
1263 new_alias->ifalias[len] = 0;
1279 } 1264 }
1280 1265
1281 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); 1266 mutex_lock(&ifalias_mutex);
1282 if (!new_ifalias) 1267 rcu_swap_protected(dev->ifalias, new_alias,
1283 return -ENOMEM; 1268 mutex_is_locked(&ifalias_mutex));
1284 dev->ifalias = new_ifalias; 1269 mutex_unlock(&ifalias_mutex);
1285 memcpy(dev->ifalias, alias, len); 1270
1286 dev->ifalias[len] = 0; 1271 if (new_alias)
1272 kfree_rcu(new_alias, rcuhead);
1287 1273
1288 return len; 1274 return len;
1289} 1275}
1290 1276
1277/**
1278 * dev_get_alias - get ifalias of a device
1279 * @dev: device
1280 * @name: buffer to store name of ifalias
1281 * @len: size of buffer
1282 *
1283 * get ifalias for a device. Caller must make sure dev cannot go
1284 * away, e.g. rcu read lock or own a reference count to device.
1285 */
1286int dev_get_alias(const struct net_device *dev, char *name, size_t len)
1287{
1288 const struct dev_ifalias *alias;
1289 int ret = 0;
1290
1291 rcu_read_lock();
1292 alias = rcu_dereference(dev->ifalias);
1293 if (alias)
1294 ret = snprintf(name, len, "%s", alias->ifalias);
1295 rcu_read_unlock();
1296
1297 return ret;
1298}
1291 1299
1292/** 1300/**
1293 * netdev_features_change - device changes features 1301 * netdev_features_change - device changes features
@@ -1312,10 +1320,11 @@ EXPORT_SYMBOL(netdev_features_change);
1312void netdev_state_change(struct net_device *dev) 1320void netdev_state_change(struct net_device *dev)
1313{ 1321{
1314 if (dev->flags & IFF_UP) { 1322 if (dev->flags & IFF_UP) {
1315 struct netdev_notifier_change_info change_info; 1323 struct netdev_notifier_change_info change_info = {
1324 .info.dev = dev,
1325 };
1316 1326
1317 change_info.flags_changed = 0; 1327 call_netdevice_notifiers_info(NETDEV_CHANGE,
1318 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
1319 &change_info.info); 1328 &change_info.info);
1320 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); 1329 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1321 } 1330 }
@@ -1536,9 +1545,10 @@ EXPORT_SYMBOL(dev_disable_lro);
1536static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, 1545static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1537 struct net_device *dev) 1546 struct net_device *dev)
1538{ 1547{
1539 struct netdev_notifier_info info; 1548 struct netdev_notifier_info info = {
1549 .dev = dev,
1550 };
1540 1551
1541 netdev_notifier_info_init(&info, dev);
1542 return nb->notifier_call(nb, val, &info); 1552 return nb->notifier_call(nb, val, &info);
1543} 1553}
1544 1554
@@ -1663,11 +1673,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1663 */ 1673 */
1664 1674
1665static int call_netdevice_notifiers_info(unsigned long val, 1675static int call_netdevice_notifiers_info(unsigned long val,
1666 struct net_device *dev,
1667 struct netdev_notifier_info *info) 1676 struct netdev_notifier_info *info)
1668{ 1677{
1669 ASSERT_RTNL(); 1678 ASSERT_RTNL();
1670 netdev_notifier_info_init(info, dev);
1671 return raw_notifier_call_chain(&netdev_chain, val, info); 1679 return raw_notifier_call_chain(&netdev_chain, val, info);
1672} 1680}
1673 1681
@@ -1682,9 +1690,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
1682 1690
1683int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1691int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1684{ 1692{
1685 struct netdev_notifier_info info; 1693 struct netdev_notifier_info info = {
1694 .dev = dev,
1695 };
1686 1696
1687 return call_netdevice_notifiers_info(val, dev, &info); 1697 return call_netdevice_notifiers_info(val, &info);
1688} 1698}
1689EXPORT_SYMBOL(call_netdevice_notifiers); 1699EXPORT_SYMBOL(call_netdevice_notifiers);
1690 1700
@@ -2012,6 +2022,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
2012 2022
2013 return 0; 2023 return 0;
2014} 2024}
2025EXPORT_SYMBOL(netdev_txq_to_tc);
2015 2026
2016#ifdef CONFIG_XPS 2027#ifdef CONFIG_XPS
2017static DEFINE_MUTEX(xps_map_mutex); 2028static DEFINE_MUTEX(xps_map_mutex);
@@ -3245,22 +3256,22 @@ EXPORT_SYMBOL(dev_loopback_xmit);
3245static struct sk_buff * 3256static struct sk_buff *
3246sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) 3257sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3247{ 3258{
3248 struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); 3259 struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
3249 struct tcf_result cl_res; 3260 struct tcf_result cl_res;
3250 3261
3251 if (!cl) 3262 if (!miniq)
3252 return skb; 3263 return skb;
3253 3264
3254 /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ 3265 /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
3255 qdisc_bstats_cpu_update(cl->q, skb); 3266 mini_qdisc_bstats_cpu_update(miniq, skb);
3256 3267
3257 switch (tcf_classify(skb, cl, &cl_res, false)) { 3268 switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
3258 case TC_ACT_OK: 3269 case TC_ACT_OK:
3259 case TC_ACT_RECLASSIFY: 3270 case TC_ACT_RECLASSIFY:
3260 skb->tc_index = TC_H_MIN(cl_res.classid); 3271 skb->tc_index = TC_H_MIN(cl_res.classid);
3261 break; 3272 break;
3262 case TC_ACT_SHOT: 3273 case TC_ACT_SHOT:
3263 qdisc_qstats_cpu_drop(cl->q); 3274 mini_qdisc_qstats_cpu_drop(miniq);
3264 *ret = NET_XMIT_DROP; 3275 *ret = NET_XMIT_DROP;
3265 kfree_skb(skb); 3276 kfree_skb(skb);
3266 return NULL; 3277 return NULL;
@@ -3864,8 +3875,8 @@ drop:
3864static u32 netif_receive_generic_xdp(struct sk_buff *skb, 3875static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3865 struct bpf_prog *xdp_prog) 3876 struct bpf_prog *xdp_prog)
3866{ 3877{
3878 u32 metalen, act = XDP_DROP;
3867 struct xdp_buff xdp; 3879 struct xdp_buff xdp;
3868 u32 act = XDP_DROP;
3869 void *orig_data; 3880 void *orig_data;
3870 int hlen, off; 3881 int hlen, off;
3871 u32 mac_len; 3882 u32 mac_len;
@@ -3876,8 +3887,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3876 if (skb_cloned(skb)) 3887 if (skb_cloned(skb))
3877 return XDP_PASS; 3888 return XDP_PASS;
3878 3889
3879 if (skb_linearize(skb)) 3890 /* XDP packets must be linear and must have sufficient headroom
3880 goto do_drop; 3891 * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
3892 * native XDP provides, thus we need to do it here as well.
3893 */
3894 if (skb_is_nonlinear(skb) ||
3895 skb_headroom(skb) < XDP_PACKET_HEADROOM) {
3896 int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
3897 int troom = skb->tail + skb->data_len - skb->end;
3898
3899 /* In case we have to go down the path and also linearize,
3900 * then lets do the pskb_expand_head() work just once here.
3901 */
3902 if (pskb_expand_head(skb,
3903 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
3904 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
3905 goto do_drop;
3906 if (troom > 0 && __skb_linearize(skb))
3907 goto do_drop;
3908 }
3881 3909
3882 /* The XDP program wants to see the packet starting at the MAC 3910 /* The XDP program wants to see the packet starting at the MAC
3883 * header. 3911 * header.
@@ -3885,6 +3913,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3885 mac_len = skb->data - skb_mac_header(skb); 3913 mac_len = skb->data - skb_mac_header(skb);
3886 hlen = skb_headlen(skb) + mac_len; 3914 hlen = skb_headlen(skb) + mac_len;
3887 xdp.data = skb->data - mac_len; 3915 xdp.data = skb->data - mac_len;
3916 xdp.data_meta = xdp.data;
3888 xdp.data_end = xdp.data + hlen; 3917 xdp.data_end = xdp.data + hlen;
3889 xdp.data_hard_start = skb->data - skb_headroom(skb); 3918 xdp.data_hard_start = skb->data - skb_headroom(skb);
3890 orig_data = xdp.data; 3919 orig_data = xdp.data;
@@ -3902,10 +3931,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3902 case XDP_REDIRECT: 3931 case XDP_REDIRECT:
3903 case XDP_TX: 3932 case XDP_TX:
3904 __skb_push(skb, mac_len); 3933 __skb_push(skb, mac_len);
3905 /* fall through */ 3934 break;
3906 case XDP_PASS: 3935 case XDP_PASS:
3936 metalen = xdp.data - xdp.data_meta;
3937 if (metalen)
3938 skb_metadata_set(skb, metalen);
3907 break; 3939 break;
3908
3909 default: 3940 default:
3910 bpf_warn_invalid_xdp_action(act); 3941 bpf_warn_invalid_xdp_action(act);
3911 /* fall through */ 3942 /* fall through */
@@ -4140,7 +4171,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
4140 struct net_device *orig_dev) 4171 struct net_device *orig_dev)
4141{ 4172{
4142#ifdef CONFIG_NET_CLS_ACT 4173#ifdef CONFIG_NET_CLS_ACT
4143 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); 4174 struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
4144 struct tcf_result cl_res; 4175 struct tcf_result cl_res;
4145 4176
4146 /* If there's at least one ingress present somewhere (so 4177 /* If there's at least one ingress present somewhere (so
@@ -4148,8 +4179,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
4148 * that are not configured with an ingress qdisc will bail 4179 * that are not configured with an ingress qdisc will bail
4149 * out here. 4180 * out here.
4150 */ 4181 */
4151 if (!cl) 4182 if (!miniq)
4152 return skb; 4183 return skb;
4184
4153 if (*pt_prev) { 4185 if (*pt_prev) {
4154 *ret = deliver_skb(skb, *pt_prev, orig_dev); 4186 *ret = deliver_skb(skb, *pt_prev, orig_dev);
4155 *pt_prev = NULL; 4187 *pt_prev = NULL;
@@ -4157,15 +4189,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
4157 4189
4158 qdisc_skb_cb(skb)->pkt_len = skb->len; 4190 qdisc_skb_cb(skb)->pkt_len = skb->len;
4159 skb->tc_at_ingress = 1; 4191 skb->tc_at_ingress = 1;
4160 qdisc_bstats_cpu_update(cl->q, skb); 4192 mini_qdisc_bstats_cpu_update(miniq, skb);
4161 4193
4162 switch (tcf_classify(skb, cl, &cl_res, false)) { 4194 switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
4163 case TC_ACT_OK: 4195 case TC_ACT_OK:
4164 case TC_ACT_RECLASSIFY: 4196 case TC_ACT_RECLASSIFY:
4165 skb->tc_index = TC_H_MIN(cl_res.classid); 4197 skb->tc_index = TC_H_MIN(cl_res.classid);
4166 break; 4198 break;
4167 case TC_ACT_SHOT: 4199 case TC_ACT_SHOT:
4168 qdisc_qstats_cpu_drop(cl->q); 4200 mini_qdisc_qstats_cpu_drop(miniq);
4169 kfree_skb(skb); 4201 kfree_skb(skb);
4170 return NULL; 4202 return NULL;
4171 case TC_ACT_STOLEN: 4203 case TC_ACT_STOLEN:
@@ -4443,6 +4475,33 @@ out:
4443 return ret; 4475 return ret;
4444} 4476}
4445 4477
4478/**
4479 * netif_receive_skb_core - special purpose version of netif_receive_skb
4480 * @skb: buffer to process
4481 *
4482 * More direct receive version of netif_receive_skb(). It should
4483 * only be used by callers that have a need to skip RPS and Generic XDP.
4484 * Caller must also take care of handling if (page_is_)pfmemalloc.
4485 *
4486 * This function may only be called from softirq context and interrupts
4487 * should be enabled.
4488 *
4489 * Return values (usually ignored):
4490 * NET_RX_SUCCESS: no congestion
4491 * NET_RX_DROP: packet was dropped
4492 */
4493int netif_receive_skb_core(struct sk_buff *skb)
4494{
4495 int ret;
4496
4497 rcu_read_lock();
4498 ret = __netif_receive_skb_core(skb, false);
4499 rcu_read_unlock();
4500
4501 return ret;
4502}
4503EXPORT_SYMBOL(netif_receive_skb_core);
4504
4446static int __netif_receive_skb(struct sk_buff *skb) 4505static int __netif_receive_skb(struct sk_buff *skb)
4447{ 4506{
4448 int ret; 4507 int ret;
@@ -4468,7 +4527,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
4468 return ret; 4527 return ret;
4469} 4528}
4470 4529
4471static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) 4530static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
4472{ 4531{
4473 struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); 4532 struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
4474 struct bpf_prog *new = xdp->prog; 4533 struct bpf_prog *new = xdp->prog;
@@ -4695,6 +4754,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
4695 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 4754 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
4696 diffs |= p->vlan_tci ^ skb->vlan_tci; 4755 diffs |= p->vlan_tci ^ skb->vlan_tci;
4697 diffs |= skb_metadata_dst_cmp(p, skb); 4756 diffs |= skb_metadata_dst_cmp(p, skb);
4757 diffs |= skb_metadata_differs(p, skb);
4698 if (maclen == ETH_HLEN) 4758 if (maclen == ETH_HLEN)
4699 diffs |= compare_ether_header(skb_mac_header(p), 4759 diffs |= compare_ether_header(skb_mac_header(p),
4700 skb_mac_header(skb)); 4760 skb_mac_header(skb));
@@ -6228,9 +6288,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
6228 6288
6229static int __netdev_upper_dev_link(struct net_device *dev, 6289static int __netdev_upper_dev_link(struct net_device *dev,
6230 struct net_device *upper_dev, bool master, 6290 struct net_device *upper_dev, bool master,
6231 void *upper_priv, void *upper_info) 6291 void *upper_priv, void *upper_info,
6232{ 6292 struct netlink_ext_ack *extack)
6233 struct netdev_notifier_changeupper_info changeupper_info; 6293{
6294 struct netdev_notifier_changeupper_info changeupper_info = {
6295 .info = {
6296 .dev = dev,
6297 .extack = extack,
6298 },
6299 .upper_dev = upper_dev,
6300 .master = master,
6301 .linking = true,
6302 .upper_info = upper_info,
6303 };
6234 int ret = 0; 6304 int ret = 0;
6235 6305
6236 ASSERT_RTNL(); 6306 ASSERT_RTNL();
@@ -6248,12 +6318,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
6248 if (master && netdev_master_upper_dev_get(dev)) 6318 if (master && netdev_master_upper_dev_get(dev))
6249 return -EBUSY; 6319 return -EBUSY;
6250 6320
6251 changeupper_info.upper_dev = upper_dev; 6321 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
6252 changeupper_info.master = master;
6253 changeupper_info.linking = true;
6254 changeupper_info.upper_info = upper_info;
6255
6256 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
6257 &changeupper_info.info); 6322 &changeupper_info.info);
6258 ret = notifier_to_errno(ret); 6323 ret = notifier_to_errno(ret);
6259 if (ret) 6324 if (ret)
@@ -6264,7 +6329,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
6264 if (ret) 6329 if (ret)
6265 return ret; 6330 return ret;
6266 6331
6267 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, 6332 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
6268 &changeupper_info.info); 6333 &changeupper_info.info);
6269 ret = notifier_to_errno(ret); 6334 ret = notifier_to_errno(ret);
6270 if (ret) 6335 if (ret)
@@ -6289,9 +6354,11 @@ rollback:
6289 * returns zero. 6354 * returns zero.
6290 */ 6355 */
6291int netdev_upper_dev_link(struct net_device *dev, 6356int netdev_upper_dev_link(struct net_device *dev,
6292 struct net_device *upper_dev) 6357 struct net_device *upper_dev,
6358 struct netlink_ext_ack *extack)
6293{ 6359{
6294 return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); 6360 return __netdev_upper_dev_link(dev, upper_dev, false,
6361 NULL, NULL, extack);
6295} 6362}
6296EXPORT_SYMBOL(netdev_upper_dev_link); 6363EXPORT_SYMBOL(netdev_upper_dev_link);
6297 6364
@@ -6310,10 +6377,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
6310 */ 6377 */
6311int netdev_master_upper_dev_link(struct net_device *dev, 6378int netdev_master_upper_dev_link(struct net_device *dev,
6312 struct net_device *upper_dev, 6379 struct net_device *upper_dev,
6313 void *upper_priv, void *upper_info) 6380 void *upper_priv, void *upper_info,
6381 struct netlink_ext_ack *extack)
6314{ 6382{
6315 return __netdev_upper_dev_link(dev, upper_dev, true, 6383 return __netdev_upper_dev_link(dev, upper_dev, true,
6316 upper_priv, upper_info); 6384 upper_priv, upper_info, extack);
6317} 6385}
6318EXPORT_SYMBOL(netdev_master_upper_dev_link); 6386EXPORT_SYMBOL(netdev_master_upper_dev_link);
6319 6387
@@ -6328,20 +6396,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
6328void netdev_upper_dev_unlink(struct net_device *dev, 6396void netdev_upper_dev_unlink(struct net_device *dev,
6329 struct net_device *upper_dev) 6397 struct net_device *upper_dev)
6330{ 6398{
6331 struct netdev_notifier_changeupper_info changeupper_info; 6399 struct netdev_notifier_changeupper_info changeupper_info = {
6400 .info = {
6401 .dev = dev,
6402 },
6403 .upper_dev = upper_dev,
6404 .linking = false,
6405 };
6332 6406
6333 ASSERT_RTNL(); 6407 ASSERT_RTNL();
6334 6408
6335 changeupper_info.upper_dev = upper_dev;
6336 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; 6409 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
6337 changeupper_info.linking = false;
6338 6410
6339 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, 6411 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
6340 &changeupper_info.info); 6412 &changeupper_info.info);
6341 6413
6342 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); 6414 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
6343 6415
6344 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, 6416 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
6345 &changeupper_info.info); 6417 &changeupper_info.info);
6346} 6418}
6347EXPORT_SYMBOL(netdev_upper_dev_unlink); 6419EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6357,11 +6429,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
6357void netdev_bonding_info_change(struct net_device *dev, 6429void netdev_bonding_info_change(struct net_device *dev,
6358 struct netdev_bonding_info *bonding_info) 6430 struct netdev_bonding_info *bonding_info)
6359{ 6431{
6360 struct netdev_notifier_bonding_info info; 6432 struct netdev_notifier_bonding_info info = {
6433 .info.dev = dev,
6434 };
6361 6435
6362 memcpy(&info.bonding_info, bonding_info, 6436 memcpy(&info.bonding_info, bonding_info,
6363 sizeof(struct netdev_bonding_info)); 6437 sizeof(struct netdev_bonding_info));
6364 call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, 6438 call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
6365 &info.info); 6439 &info.info);
6366} 6440}
6367EXPORT_SYMBOL(netdev_bonding_info_change); 6441EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6487,11 +6561,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
6487void netdev_lower_state_changed(struct net_device *lower_dev, 6561void netdev_lower_state_changed(struct net_device *lower_dev,
6488 void *lower_state_info) 6562 void *lower_state_info)
6489{ 6563{
6490 struct netdev_notifier_changelowerstate_info changelowerstate_info; 6564 struct netdev_notifier_changelowerstate_info changelowerstate_info = {
6565 .info.dev = lower_dev,
6566 };
6491 6567
6492 ASSERT_RTNL(); 6568 ASSERT_RTNL();
6493 changelowerstate_info.lower_state_info = lower_state_info; 6569 changelowerstate_info.lower_state_info = lower_state_info;
6494 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, 6570 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
6495 &changelowerstate_info.info); 6571 &changelowerstate_info.info);
6496} 6572}
6497EXPORT_SYMBOL(netdev_lower_state_changed); 6573EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6782,11 +6858,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
6782 6858
6783 if (dev->flags & IFF_UP && 6859 if (dev->flags & IFF_UP &&
6784 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { 6860 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
6785 struct netdev_notifier_change_info change_info; 6861 struct netdev_notifier_change_info change_info = {
6862 .info = {
6863 .dev = dev,
6864 },
6865 .flags_changed = changes,
6866 };
6786 6867
6787 change_info.flags_changed = changes; 6868 call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
6788 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
6789 &change_info.info);
6790 } 6869 }
6791} 6870}
6792 6871
@@ -6993,26 +7072,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
6993} 7072}
6994EXPORT_SYMBOL(dev_change_proto_down); 7073EXPORT_SYMBOL(dev_change_proto_down);
6995 7074
6996u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id) 7075u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
6997{ 7076{
6998 struct netdev_xdp xdp; 7077 struct netdev_bpf xdp;
6999 7078
7000 memset(&xdp, 0, sizeof(xdp)); 7079 memset(&xdp, 0, sizeof(xdp));
7001 xdp.command = XDP_QUERY_PROG; 7080 xdp.command = XDP_QUERY_PROG;
7002 7081
7003 /* Query must always succeed. */ 7082 /* Query must always succeed. */
7004 WARN_ON(xdp_op(dev, &xdp) < 0); 7083 WARN_ON(bpf_op(dev, &xdp) < 0);
7005 if (prog_id) 7084 if (prog_id)
7006 *prog_id = xdp.prog_id; 7085 *prog_id = xdp.prog_id;
7007 7086
7008 return xdp.prog_attached; 7087 return xdp.prog_attached;
7009} 7088}
7010 7089
7011static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, 7090static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
7012 struct netlink_ext_ack *extack, u32 flags, 7091 struct netlink_ext_ack *extack, u32 flags,
7013 struct bpf_prog *prog) 7092 struct bpf_prog *prog)
7014{ 7093{
7015 struct netdev_xdp xdp; 7094 struct netdev_bpf xdp;
7016 7095
7017 memset(&xdp, 0, sizeof(xdp)); 7096 memset(&xdp, 0, sizeof(xdp));
7018 if (flags & XDP_FLAGS_HW_MODE) 7097 if (flags & XDP_FLAGS_HW_MODE)
@@ -7023,7 +7102,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
7023 xdp.flags = flags; 7102 xdp.flags = flags;
7024 xdp.prog = prog; 7103 xdp.prog = prog;
7025 7104
7026 return xdp_op(dev, &xdp); 7105 return bpf_op(dev, &xdp);
7027} 7106}
7028 7107
7029/** 7108/**
@@ -7040,32 +7119,36 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
7040{ 7119{
7041 const struct net_device_ops *ops = dev->netdev_ops; 7120 const struct net_device_ops *ops = dev->netdev_ops;
7042 struct bpf_prog *prog = NULL; 7121 struct bpf_prog *prog = NULL;
7043 xdp_op_t xdp_op, xdp_chk; 7122 bpf_op_t bpf_op, bpf_chk;
7044 int err; 7123 int err;
7045 7124
7046 ASSERT_RTNL(); 7125 ASSERT_RTNL();
7047 7126
7048 xdp_op = xdp_chk = ops->ndo_xdp; 7127 bpf_op = bpf_chk = ops->ndo_bpf;
7049 if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) 7128 if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
7050 return -EOPNOTSUPP; 7129 return -EOPNOTSUPP;
7051 if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) 7130 if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
7052 xdp_op = generic_xdp_install; 7131 bpf_op = generic_xdp_install;
7053 if (xdp_op == xdp_chk) 7132 if (bpf_op == bpf_chk)
7054 xdp_chk = generic_xdp_install; 7133 bpf_chk = generic_xdp_install;
7055 7134
7056 if (fd >= 0) { 7135 if (fd >= 0) {
7057 if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL)) 7136 if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
7058 return -EEXIST; 7137 return -EEXIST;
7059 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && 7138 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
7060 __dev_xdp_attached(dev, xdp_op, NULL)) 7139 __dev_xdp_attached(dev, bpf_op, NULL))
7061 return -EBUSY; 7140 return -EBUSY;
7062 7141
7063 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); 7142 if (bpf_op == ops->ndo_bpf)
7143 prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
7144 dev);
7145 else
7146 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
7064 if (IS_ERR(prog)) 7147 if (IS_ERR(prog))
7065 return PTR_ERR(prog); 7148 return PTR_ERR(prog);
7066 } 7149 }
7067 7150
7068 err = dev_xdp_install(dev, xdp_op, extack, flags, prog); 7151 err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
7069 if (err < 0 && prog) 7152 if (err < 0 && prog)
7070 bpf_prog_put(prog); 7153 bpf_prog_put(prog);
7071 7154
@@ -7157,7 +7240,7 @@ static void rollback_registered_many(struct list_head *head)
7157 if (!dev->rtnl_link_ops || 7240 if (!dev->rtnl_link_ops ||
7158 dev->rtnl_link_state == RTNL_LINK_INITIALIZED) 7241 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
7159 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, 7242 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
7160 GFP_KERNEL); 7243 GFP_KERNEL, NULL);
7161 7244
7162 /* 7245 /*
7163 * Flush the unicast and multicast chains 7246 * Flush the unicast and multicast chains
@@ -7994,7 +8077,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7994 unsigned int txqs, unsigned int rxqs) 8077 unsigned int txqs, unsigned int rxqs)
7995{ 8078{
7996 struct net_device *dev; 8079 struct net_device *dev;
7997 size_t alloc_size; 8080 unsigned int alloc_size;
7998 struct net_device *p; 8081 struct net_device *p;
7999 8082
8000 BUG_ON(strlen(name) >= sizeof(dev->name)); 8083 BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -8244,7 +8327,7 @@ EXPORT_SYMBOL(unregister_netdev);
8244 8327
8245int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 8328int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
8246{ 8329{
8247 int err; 8330 int err, new_nsid;
8248 8331
8249 ASSERT_RTNL(); 8332 ASSERT_RTNL();
8250 8333
@@ -8300,7 +8383,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
8300 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 8383 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
8301 rcu_barrier(); 8384 rcu_barrier();
8302 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 8385 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
8303 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); 8386 if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
8387 new_nsid = peernet2id_alloc(dev_net(dev), net);
8388 else
8389 new_nsid = peernet2id(dev_net(dev), net);
8390 rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
8304 8391
8305 /* 8392 /*
8306 * Flush the unicast and multicast chains 8393 * Flush the unicast and multicast chains
@@ -8562,6 +8649,8 @@ static void __net_exit netdev_exit(struct net *net)
8562{ 8649{
8563 kfree(net->dev_name_head); 8650 kfree(net->dev_name_head);
8564 kfree(net->dev_index_head); 8651 kfree(net->dev_index_head);
8652 if (net != &init_net)
8653 WARN_ON_ONCE(!list_empty(&net->dev_base_head));
8565} 8654}
8566 8655
8567static struct pernet_operations __net_initdata netdev_net_ops = { 8656static struct pernet_operations __net_initdata netdev_net_ops = {
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 70ccda233bd1..c7785efeea57 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -144,9 +144,9 @@ static void send_dm_alert(struct work_struct *work)
144 * in the event that more drops will arrive during the 144 * in the event that more drops will arrive during the
145 * hysteresis period. 145 * hysteresis period.
146 */ 146 */
147static void sched_send_work(unsigned long _data) 147static void sched_send_work(struct timer_list *t)
148{ 148{
149 struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; 149 struct per_cpu_dm_data *data = from_timer(data, t, send_timer);
150 150
151 schedule_work(&data->dm_alert_work); 151 schedule_work(&data->dm_alert_work);
152} 152}
@@ -412,8 +412,7 @@ static int __init init_net_drop_monitor(void)
412 for_each_possible_cpu(cpu) { 412 for_each_possible_cpu(cpu) {
413 data = &per_cpu(dm_cpu_data, cpu); 413 data = &per_cpu(dm_cpu_data, cpu);
414 INIT_WORK(&data->dm_alert_work, send_dm_alert); 414 INIT_WORK(&data->dm_alert_work, send_dm_alert);
415 setup_timer(&data->send_timer, sched_send_work, 415 timer_setup(&data->send_timer, sched_send_work, 0);
416 (unsigned long)data);
417 spin_lock_init(&data->lock); 416 spin_lock_init(&data->lock);
418 reset_per_cpu_data(data); 417 reset_per_cpu_data(data);
419 } 418 }
diff --git a/net/core/dst.c b/net/core/dst.c
index a6c47da7d0f8..662a2d4a3d19 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -322,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
322 return md_dst; 322 return md_dst;
323} 323}
324EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); 324EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
325
326void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
327{
328#ifdef CONFIG_DST_CACHE
329 int cpu;
330
331 for_each_possible_cpu(cpu) {
332 struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
333
334 if (one_md_dst->type == METADATA_IP_TUNNEL)
335 dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
336 }
337#endif
338 free_percpu(md_dst);
339}
340EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9a9a3d77e327..f8fcf450a36e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -403,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
403 return 0; 403 return 0;
404} 404}
405 405
406/* Given two link masks, AND them together and save the result in dst. */
407void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
408 struct ethtool_link_ksettings *src)
409{
410 unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
411 unsigned int idx = 0;
412
413 for (; idx < size; idx++) {
414 dst->link_modes.supported[idx] &=
415 src->link_modes.supported[idx];
416 dst->link_modes.advertising[idx] &=
417 src->link_modes.advertising[idx];
418 }
419}
420EXPORT_SYMBOL(ethtool_intersect_link_masks);
421
406void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, 422void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
407 u32 legacy_u32) 423 u32 legacy_u32)
408{ 424{
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 4fc202dbdfb6..0c048bdeb016 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -34,12 +34,14 @@ static unsigned int fib_seq_sum(void)
34 34
35 rtnl_lock(); 35 rtnl_lock();
36 for_each_net(net) { 36 for_each_net(net) {
37 list_for_each_entry(ops, &net->fib_notifier_ops, list) { 37 rcu_read_lock();
38 list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
38 if (!try_module_get(ops->owner)) 39 if (!try_module_get(ops->owner))
39 continue; 40 continue;
40 fib_seq += ops->fib_seq_read(net); 41 fib_seq += ops->fib_seq_read(net);
41 module_put(ops->owner); 42 module_put(ops->owner);
42 } 43 }
44 rcu_read_unlock();
43 } 45 }
44 rtnl_unlock(); 46 rtnl_unlock();
45 47
@@ -161,8 +163,14 @@ static int __net_init fib_notifier_net_init(struct net *net)
161 return 0; 163 return 0;
162} 164}
163 165
166static void __net_exit fib_notifier_net_exit(struct net *net)
167{
168 WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
169}
170
164static struct pernet_operations fib_notifier_net_ops = { 171static struct pernet_operations fib_notifier_net_ops = {
165 .init = fib_notifier_net_init, 172 .init = fib_notifier_net_init,
173 .exit = fib_notifier_net_exit,
166}; 174};
167 175
168static int __init fib_notifier_init(void) 176static int __init fib_notifier_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a6d97c1d810..98e1066c3d55 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -314,10 +314,12 @@ static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
314static int call_fib_rule_notifiers(struct net *net, 314static int call_fib_rule_notifiers(struct net *net,
315 enum fib_event_type event_type, 315 enum fib_event_type event_type,
316 struct fib_rule *rule, 316 struct fib_rule *rule,
317 struct fib_rules_ops *ops) 317 struct fib_rules_ops *ops,
318 struct netlink_ext_ack *extack)
318{ 319{
319 struct fib_rule_notifier_info info = { 320 struct fib_rule_notifier_info info = {
320 .info.family = ops->family, 321 .info.family = ops->family,
322 .info.extack = extack,
321 .rule = rule, 323 .rule = rule,
322 }; 324 };
323 325
@@ -609,7 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
609 if (rule->tun_id) 611 if (rule->tun_id)
610 ip_tunnel_need_metadata(); 612 ip_tunnel_need_metadata();
611 613
612 call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops); 614 call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
613 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); 615 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
614 flush_route_cache(ops); 616 flush_route_cache(ops);
615 rules_ops_put(ops); 617 rules_ops_put(ops);
@@ -749,7 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
749 } 751 }
750 } 752 }
751 753
752 call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops); 754 call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
755 NULL);
753 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 756 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
754 NETLINK_CB(skb).portid); 757 NETLINK_CB(skb).portid);
755 fib_rule_put(rule); 758 fib_rule_put(rule);
@@ -1019,8 +1022,14 @@ static int __net_init fib_rules_net_init(struct net *net)
1019 return 0; 1022 return 0;
1020} 1023}
1021 1024
1025static void __net_exit fib_rules_net_exit(struct net *net)
1026{
1027 WARN_ON_ONCE(!list_empty(&net->rules_ops));
1028}
1029
1022static struct pernet_operations fib_rules_net_ops = { 1030static struct pernet_operations fib_rules_net_ops = {
1023 .init = fib_rules_net_init, 1031 .init = fib_rules_net_init,
1032 .exit = fib_rules_net_exit,
1024}; 1033};
1025 1034
1026static int __init fib_rules_init(void) 1035static int __init fib_rules_init(void)
diff --git a/net/core/filter.c b/net/core/filter.c
index 6ae94f825f72..1afa17935954 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -43,6 +43,7 @@
43#include <linux/timer.h> 43#include <linux/timer.h>
44#include <linux/uaccess.h> 44#include <linux/uaccess.h>
45#include <asm/unaligned.h> 45#include <asm/unaligned.h>
46#include <asm/cmpxchg.h>
46#include <linux/filter.h> 47#include <linux/filter.h>
47#include <linux/ratelimit.h> 48#include <linux/ratelimit.h>
48#include <linux/seccomp.h> 49#include <linux/seccomp.h>
@@ -1406,7 +1407,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
1406{ 1407{
1407 int err = __bpf_try_make_writable(skb, write_len); 1408 int err = __bpf_try_make_writable(skb, write_len);
1408 1409
1409 bpf_compute_data_end(skb); 1410 bpf_compute_data_pointers(skb);
1410 return err; 1411 return err;
1411} 1412}
1412 1413
@@ -1968,7 +1969,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
1968 ret = skb_vlan_push(skb, vlan_proto, vlan_tci); 1969 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
1969 bpf_pull_mac_rcsum(skb); 1970 bpf_pull_mac_rcsum(skb);
1970 1971
1971 bpf_compute_data_end(skb); 1972 bpf_compute_data_pointers(skb);
1972 return ret; 1973 return ret;
1973} 1974}
1974 1975
@@ -1990,7 +1991,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
1990 ret = skb_vlan_pop(skb); 1991 ret = skb_vlan_pop(skb);
1991 bpf_pull_mac_rcsum(skb); 1992 bpf_pull_mac_rcsum(skb);
1992 1993
1993 bpf_compute_data_end(skb); 1994 bpf_compute_data_pointers(skb);
1994 return ret; 1995 return ret;
1995} 1996}
1996 1997
@@ -2184,7 +2185,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
2184 * need to be verified first. 2185 * need to be verified first.
2185 */ 2186 */
2186 ret = bpf_skb_proto_xlat(skb, proto); 2187 ret = bpf_skb_proto_xlat(skb, proto);
2187 bpf_compute_data_end(skb); 2188 bpf_compute_data_pointers(skb);
2188 return ret; 2189 return ret;
2189} 2190}
2190 2191
@@ -2309,7 +2310,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
2309 ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : 2310 ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
2310 bpf_skb_net_grow(skb, len_diff_abs); 2311 bpf_skb_net_grow(skb, len_diff_abs);
2311 2312
2312 bpf_compute_data_end(skb); 2313 bpf_compute_data_pointers(skb);
2313 return ret; 2314 return ret;
2314} 2315}
2315 2316
@@ -2400,7 +2401,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
2400 skb_gso_reset(skb); 2401 skb_gso_reset(skb);
2401 } 2402 }
2402 2403
2403 bpf_compute_data_end(skb); 2404 bpf_compute_data_pointers(skb);
2404 return ret; 2405 return ret;
2405} 2406}
2406 2407
@@ -2440,7 +2441,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
2440 skb_reset_mac_header(skb); 2441 skb_reset_mac_header(skb);
2441 } 2442 }
2442 2443
2443 bpf_compute_data_end(skb); 2444 bpf_compute_data_pointers(skb);
2444 return 0; 2445 return 0;
2445} 2446}
2446 2447
@@ -2453,14 +2454,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
2453 .arg3_type = ARG_ANYTHING, 2454 .arg3_type = ARG_ANYTHING,
2454}; 2455};
2455 2456
2457static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
2458{
2459 return xdp_data_meta_unsupported(xdp) ? 0 :
2460 xdp->data - xdp->data_meta;
2461}
2462
2456BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) 2463BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
2457{ 2464{
2465 unsigned long metalen = xdp_get_metalen(xdp);
2466 void *data_start = xdp->data_hard_start + metalen;
2458 void *data = xdp->data + offset; 2467 void *data = xdp->data + offset;
2459 2468
2460 if (unlikely(data < xdp->data_hard_start || 2469 if (unlikely(data < data_start ||
2461 data > xdp->data_end - ETH_HLEN)) 2470 data > xdp->data_end - ETH_HLEN))
2462 return -EINVAL; 2471 return -EINVAL;
2463 2472
2473 if (metalen)
2474 memmove(xdp->data_meta + offset,
2475 xdp->data_meta, metalen);
2476 xdp->data_meta += offset;
2464 xdp->data = data; 2477 xdp->data = data;
2465 2478
2466 return 0; 2479 return 0;
@@ -2474,6 +2487,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
2474 .arg2_type = ARG_ANYTHING, 2487 .arg2_type = ARG_ANYTHING,
2475}; 2488};
2476 2489
2490BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
2491{
2492 void *meta = xdp->data_meta + offset;
2493 unsigned long metalen = xdp->data - meta;
2494
2495 if (xdp_data_meta_unsupported(xdp))
2496 return -ENOTSUPP;
2497 if (unlikely(meta < xdp->data_hard_start ||
2498 meta > xdp->data))
2499 return -EINVAL;
2500 if (unlikely((metalen & (sizeof(__u32) - 1)) ||
2501 (metalen > 32)))
2502 return -EACCES;
2503
2504 xdp->data_meta = meta;
2505
2506 return 0;
2507}
2508
2509static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
2510 .func = bpf_xdp_adjust_meta,
2511 .gpl_only = false,
2512 .ret_type = RET_INTEGER,
2513 .arg1_type = ARG_PTR_TO_CTX,
2514 .arg2_type = ARG_ANYTHING,
2515};
2516
2477static int __bpf_tx_xdp(struct net_device *dev, 2517static int __bpf_tx_xdp(struct net_device *dev,
2478 struct bpf_map *map, 2518 struct bpf_map *map,
2479 struct xdp_buff *xdp, 2519 struct xdp_buff *xdp,
@@ -2488,10 +2528,36 @@ static int __bpf_tx_xdp(struct net_device *dev,
2488 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); 2528 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2489 if (err) 2529 if (err)
2490 return err; 2530 return err;
2491 if (map) 2531 dev->netdev_ops->ndo_xdp_flush(dev);
2532 return 0;
2533}
2534
2535static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
2536 struct bpf_map *map,
2537 struct xdp_buff *xdp,
2538 u32 index)
2539{
2540 int err;
2541
2542 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2543 struct net_device *dev = fwd;
2544
2545 if (!dev->netdev_ops->ndo_xdp_xmit)
2546 return -EOPNOTSUPP;
2547
2548 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2549 if (err)
2550 return err;
2492 __dev_map_insert_ctx(map, index); 2551 __dev_map_insert_ctx(map, index);
2493 else 2552
2494 dev->netdev_ops->ndo_xdp_flush(dev); 2553 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
2554 struct bpf_cpu_map_entry *rcpu = fwd;
2555
2556 err = cpu_map_enqueue(rcpu, xdp, dev_rx);
2557 if (err)
2558 return err;
2559 __cpu_map_insert_ctx(map, index);
2560 }
2495 return 0; 2561 return 0;
2496} 2562}
2497 2563
@@ -2501,11 +2567,33 @@ void xdp_do_flush_map(void)
2501 struct bpf_map *map = ri->map_to_flush; 2567 struct bpf_map *map = ri->map_to_flush;
2502 2568
2503 ri->map_to_flush = NULL; 2569 ri->map_to_flush = NULL;
2504 if (map) 2570 if (map) {
2505 __dev_map_flush(map); 2571 switch (map->map_type) {
2572 case BPF_MAP_TYPE_DEVMAP:
2573 __dev_map_flush(map);
2574 break;
2575 case BPF_MAP_TYPE_CPUMAP:
2576 __cpu_map_flush(map);
2577 break;
2578 default:
2579 break;
2580 }
2581 }
2506} 2582}
2507EXPORT_SYMBOL_GPL(xdp_do_flush_map); 2583EXPORT_SYMBOL_GPL(xdp_do_flush_map);
2508 2584
2585static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
2586{
2587 switch (map->map_type) {
2588 case BPF_MAP_TYPE_DEVMAP:
2589 return __dev_map_lookup_elem(map, index);
2590 case BPF_MAP_TYPE_CPUMAP:
2591 return __cpu_map_lookup_elem(map, index);
2592 default:
2593 return NULL;
2594 }
2595}
2596
2509static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, 2597static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
2510 unsigned long aux) 2598 unsigned long aux)
2511{ 2599{
@@ -2518,8 +2606,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
2518 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2606 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2519 unsigned long map_owner = ri->map_owner; 2607 unsigned long map_owner = ri->map_owner;
2520 struct bpf_map *map = ri->map; 2608 struct bpf_map *map = ri->map;
2521 struct net_device *fwd = NULL;
2522 u32 index = ri->ifindex; 2609 u32 index = ri->ifindex;
2610 void *fwd = NULL;
2523 int err; 2611 int err;
2524 2612
2525 ri->ifindex = 0; 2613 ri->ifindex = 0;
@@ -2532,7 +2620,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
2532 goto err; 2620 goto err;
2533 } 2621 }
2534 2622
2535 fwd = __dev_map_lookup_elem(map, index); 2623 fwd = __xdp_map_lookup_elem(map, index);
2536 if (!fwd) { 2624 if (!fwd) {
2537 err = -EINVAL; 2625 err = -EINVAL;
2538 goto err; 2626 goto err;
@@ -2540,7 +2628,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
2540 if (ri->map_to_flush && ri->map_to_flush != map) 2628 if (ri->map_to_flush && ri->map_to_flush != map)
2541 xdp_do_flush_map(); 2629 xdp_do_flush_map();
2542 2630
2543 err = __bpf_tx_xdp(fwd, map, xdp, index); 2631 err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
2544 if (unlikely(err)) 2632 if (unlikely(err))
2545 goto err; 2633 goto err;
2546 2634
@@ -2582,54 +2670,88 @@ err:
2582} 2670}
2583EXPORT_SYMBOL_GPL(xdp_do_redirect); 2671EXPORT_SYMBOL_GPL(xdp_do_redirect);
2584 2672
2585int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, 2673static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
2586 struct bpf_prog *xdp_prog) 2674{
2675 unsigned int len;
2676
2677 if (unlikely(!(fwd->flags & IFF_UP)))
2678 return -ENETDOWN;
2679
2680 len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
2681 if (skb->len > len)
2682 return -EMSGSIZE;
2683
2684 return 0;
2685}
2686
2687int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
2688 struct bpf_prog *xdp_prog)
2587{ 2689{
2588 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2690 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2589 unsigned long map_owner = ri->map_owner; 2691 unsigned long map_owner = ri->map_owner;
2590 struct bpf_map *map = ri->map; 2692 struct bpf_map *map = ri->map;
2591 struct net_device *fwd = NULL; 2693 struct net_device *fwd = NULL;
2592 u32 index = ri->ifindex; 2694 u32 index = ri->ifindex;
2593 unsigned int len;
2594 int err = 0; 2695 int err = 0;
2595 2696
2596 ri->ifindex = 0; 2697 ri->ifindex = 0;
2597 ri->map = NULL; 2698 ri->map = NULL;
2598 ri->map_owner = 0; 2699 ri->map_owner = 0;
2599 2700
2600 if (map) { 2701 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
2601 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { 2702 err = -EFAULT;
2602 err = -EFAULT; 2703 map = NULL;
2603 map = NULL; 2704 goto err;
2604 goto err;
2605 }
2606 fwd = __dev_map_lookup_elem(map, index);
2607 } else {
2608 fwd = dev_get_by_index_rcu(dev_net(dev), index);
2609 } 2705 }
2706 fwd = __xdp_map_lookup_elem(map, index);
2610 if (unlikely(!fwd)) { 2707 if (unlikely(!fwd)) {
2611 err = -EINVAL; 2708 err = -EINVAL;
2612 goto err; 2709 goto err;
2613 } 2710 }
2614 2711
2615 if (unlikely(!(fwd->flags & IFF_UP))) { 2712 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2616 err = -ENETDOWN; 2713 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2714 goto err;
2715 skb->dev = fwd;
2716 } else {
2717 /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
2718 err = -EBADRQC;
2617 goto err; 2719 goto err;
2618 } 2720 }
2619 2721
2620 len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; 2722 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
2621 if (skb->len > len) { 2723 return 0;
2622 err = -EMSGSIZE; 2724err:
2725 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
2726 return err;
2727}
2728
2729int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
2730 struct bpf_prog *xdp_prog)
2731{
2732 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2733 u32 index = ri->ifindex;
2734 struct net_device *fwd;
2735 int err = 0;
2736
2737 if (ri->map)
2738 return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
2739
2740 ri->ifindex = 0;
2741 fwd = dev_get_by_index_rcu(dev_net(dev), index);
2742 if (unlikely(!fwd)) {
2743 err = -EINVAL;
2623 goto err; 2744 goto err;
2624 } 2745 }
2625 2746
2747 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2748 goto err;
2749
2626 skb->dev = fwd; 2750 skb->dev = fwd;
2627 map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index) 2751 _trace_xdp_redirect(dev, xdp_prog, index);
2628 : _trace_xdp_redirect(dev, xdp_prog, index);
2629 return 0; 2752 return 0;
2630err: 2753err:
2631 map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err) 2754 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2632 : _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2633 return err; 2755 return err;
2634} 2756}
2635EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); 2757EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
@@ -2698,7 +2820,8 @@ bool bpf_helper_changes_pkt_data(void *func)
2698 func == bpf_clone_redirect || 2820 func == bpf_clone_redirect ||
2699 func == bpf_l3_csum_replace || 2821 func == bpf_l3_csum_replace ||
2700 func == bpf_l4_csum_replace || 2822 func == bpf_l4_csum_replace ||
2701 func == bpf_xdp_adjust_head) 2823 func == bpf_xdp_adjust_head ||
2824 func == bpf_xdp_adjust_meta)
2702 return true; 2825 return true;
2703 2826
2704 return false; 2827 return false;
@@ -2949,14 +3072,15 @@ static const struct bpf_func_proto *
2949bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) 3072bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
2950{ 3073{
2951 if (!md_dst) { 3074 if (!md_dst) {
2952 /* Race is not possible, since it's called from verifier 3075 struct metadata_dst __percpu *tmp;
2953 * that is holding verifier mutex. 3076
2954 */ 3077 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
2955 md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, 3078 METADATA_IP_TUNNEL,
2956 METADATA_IP_TUNNEL, 3079 GFP_KERNEL);
2957 GFP_KERNEL); 3080 if (!tmp)
2958 if (!md_dst)
2959 return NULL; 3081 return NULL;
3082 if (cmpxchg(&md_dst, NULL, tmp))
3083 metadata_dst_free_percpu(tmp);
2960 } 3084 }
2961 3085
2962 switch (which) { 3086 switch (which) {
@@ -3151,7 +3275,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3151 3275
3152static const struct bpf_func_proto bpf_setsockopt_proto = { 3276static const struct bpf_func_proto bpf_setsockopt_proto = {
3153 .func = bpf_setsockopt, 3277 .func = bpf_setsockopt,
3154 .gpl_only = true, 3278 .gpl_only = false,
3155 .ret_type = RET_INTEGER, 3279 .ret_type = RET_INTEGER,
3156 .arg1_type = ARG_PTR_TO_CTX, 3280 .arg1_type = ARG_PTR_TO_CTX,
3157 .arg2_type = ARG_ANYTHING, 3281 .arg2_type = ARG_ANYTHING,
@@ -3160,6 +3284,47 @@ static const struct bpf_func_proto bpf_setsockopt_proto = {
3160 .arg5_type = ARG_CONST_SIZE, 3284 .arg5_type = ARG_CONST_SIZE,
3161}; 3285};
3162 3286
3287BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3288 int, level, int, optname, char *, optval, int, optlen)
3289{
3290 struct sock *sk = bpf_sock->sk;
3291
3292 if (!sk_fullsock(sk))
3293 goto err_clear;
3294
3295#ifdef CONFIG_INET
3296 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
3297 if (optname == TCP_CONGESTION) {
3298 struct inet_connection_sock *icsk = inet_csk(sk);
3299
3300 if (!icsk->icsk_ca_ops || optlen <= 1)
3301 goto err_clear;
3302 strncpy(optval, icsk->icsk_ca_ops->name, optlen);
3303 optval[optlen - 1] = 0;
3304 } else {
3305 goto err_clear;
3306 }
3307 } else {
3308 goto err_clear;
3309 }
3310 return 0;
3311#endif
3312err_clear:
3313 memset(optval, 0, optlen);
3314 return -EINVAL;
3315}
3316
3317static const struct bpf_func_proto bpf_getsockopt_proto = {
3318 .func = bpf_getsockopt,
3319 .gpl_only = false,
3320 .ret_type = RET_INTEGER,
3321 .arg1_type = ARG_PTR_TO_CTX,
3322 .arg2_type = ARG_ANYTHING,
3323 .arg3_type = ARG_ANYTHING,
3324 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
3325 .arg5_type = ARG_CONST_SIZE,
3326};
3327
3163static const struct bpf_func_proto * 3328static const struct bpf_func_proto *
3164bpf_base_func_proto(enum bpf_func_id func_id) 3329bpf_base_func_proto(enum bpf_func_id func_id)
3165{ 3330{
@@ -3294,6 +3459,8 @@ xdp_func_proto(enum bpf_func_id func_id)
3294 return &bpf_get_smp_processor_id_proto; 3459 return &bpf_get_smp_processor_id_proto;
3295 case BPF_FUNC_xdp_adjust_head: 3460 case BPF_FUNC_xdp_adjust_head:
3296 return &bpf_xdp_adjust_head_proto; 3461 return &bpf_xdp_adjust_head_proto;
3462 case BPF_FUNC_xdp_adjust_meta:
3463 return &bpf_xdp_adjust_meta_proto;
3297 case BPF_FUNC_redirect: 3464 case BPF_FUNC_redirect:
3298 return &bpf_xdp_redirect_proto; 3465 return &bpf_xdp_redirect_proto;
3299 case BPF_FUNC_redirect_map: 3466 case BPF_FUNC_redirect_map:
@@ -3336,6 +3503,8 @@ static const struct bpf_func_proto *
3336 switch (func_id) { 3503 switch (func_id) {
3337 case BPF_FUNC_setsockopt: 3504 case BPF_FUNC_setsockopt:
3338 return &bpf_setsockopt_proto; 3505 return &bpf_setsockopt_proto;
3506 case BPF_FUNC_getsockopt:
3507 return &bpf_getsockopt_proto;
3339 case BPF_FUNC_sock_map_update: 3508 case BPF_FUNC_sock_map_update:
3340 return &bpf_sock_map_update_proto; 3509 return &bpf_sock_map_update_proto;
3341 default: 3510 default:
@@ -3424,6 +3593,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
3424 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): 3593 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
3425 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): 3594 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
3426 case bpf_ctx_range(struct __sk_buff, data): 3595 case bpf_ctx_range(struct __sk_buff, data):
3596 case bpf_ctx_range(struct __sk_buff, data_meta):
3427 case bpf_ctx_range(struct __sk_buff, data_end): 3597 case bpf_ctx_range(struct __sk_buff, data_end):
3428 if (size != size_default) 3598 if (size != size_default)
3429 return false; 3599 return false;
@@ -3450,6 +3620,7 @@ static bool sk_filter_is_valid_access(int off, int size,
3450 switch (off) { 3620 switch (off) {
3451 case bpf_ctx_range(struct __sk_buff, tc_classid): 3621 case bpf_ctx_range(struct __sk_buff, tc_classid):
3452 case bpf_ctx_range(struct __sk_buff, data): 3622 case bpf_ctx_range(struct __sk_buff, data):
3623 case bpf_ctx_range(struct __sk_buff, data_meta):
3453 case bpf_ctx_range(struct __sk_buff, data_end): 3624 case bpf_ctx_range(struct __sk_buff, data_end):
3454 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 3625 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
3455 return false; 3626 return false;
@@ -3474,6 +3645,7 @@ static bool lwt_is_valid_access(int off, int size,
3474 switch (off) { 3645 switch (off) {
3475 case bpf_ctx_range(struct __sk_buff, tc_classid): 3646 case bpf_ctx_range(struct __sk_buff, tc_classid):
3476 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 3647 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
3648 case bpf_ctx_range(struct __sk_buff, data_meta):
3477 return false; 3649 return false;
3478 } 3650 }
3479 3651
@@ -3592,6 +3764,9 @@ static bool tc_cls_act_is_valid_access(int off, int size,
3592 case bpf_ctx_range(struct __sk_buff, data): 3764 case bpf_ctx_range(struct __sk_buff, data):
3593 info->reg_type = PTR_TO_PACKET; 3765 info->reg_type = PTR_TO_PACKET;
3594 break; 3766 break;
3767 case bpf_ctx_range(struct __sk_buff, data_meta):
3768 info->reg_type = PTR_TO_PACKET_META;
3769 break;
3595 case bpf_ctx_range(struct __sk_buff, data_end): 3770 case bpf_ctx_range(struct __sk_buff, data_end):
3596 info->reg_type = PTR_TO_PACKET_END; 3771 info->reg_type = PTR_TO_PACKET_END;
3597 break; 3772 break;
@@ -3625,6 +3800,9 @@ static bool xdp_is_valid_access(int off, int size,
3625 case offsetof(struct xdp_md, data): 3800 case offsetof(struct xdp_md, data):
3626 info->reg_type = PTR_TO_PACKET; 3801 info->reg_type = PTR_TO_PACKET;
3627 break; 3802 break;
3803 case offsetof(struct xdp_md, data_meta):
3804 info->reg_type = PTR_TO_PACKET_META;
3805 break;
3628 case offsetof(struct xdp_md, data_end): 3806 case offsetof(struct xdp_md, data_end):
3629 info->reg_type = PTR_TO_PACKET_END; 3807 info->reg_type = PTR_TO_PACKET_END;
3630 break; 3808 break;
@@ -3683,6 +3861,12 @@ static bool sk_skb_is_valid_access(int off, int size,
3683 enum bpf_access_type type, 3861 enum bpf_access_type type,
3684 struct bpf_insn_access_aux *info) 3862 struct bpf_insn_access_aux *info)
3685{ 3863{
3864 switch (off) {
3865 case bpf_ctx_range(struct __sk_buff, tc_classid):
3866 case bpf_ctx_range(struct __sk_buff, data_meta):
3867 return false;
3868 }
3869
3686 if (type == BPF_WRITE) { 3870 if (type == BPF_WRITE) {
3687 switch (off) { 3871 switch (off) {
3688 case bpf_ctx_range(struct __sk_buff, tc_index): 3872 case bpf_ctx_range(struct __sk_buff, tc_index):
@@ -3695,7 +3879,6 @@ static bool sk_skb_is_valid_access(int off, int size,
3695 3879
3696 switch (off) { 3880 switch (off) {
3697 case bpf_ctx_range(struct __sk_buff, mark): 3881 case bpf_ctx_range(struct __sk_buff, mark):
3698 case bpf_ctx_range(struct __sk_buff, tc_classid):
3699 return false; 3882 return false;
3700 case bpf_ctx_range(struct __sk_buff, data): 3883 case bpf_ctx_range(struct __sk_buff, data):
3701 info->reg_type = PTR_TO_PACKET; 3884 info->reg_type = PTR_TO_PACKET;
@@ -3853,6 +4036,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
3853 offsetof(struct sk_buff, data)); 4036 offsetof(struct sk_buff, data));
3854 break; 4037 break;
3855 4038
4039 case offsetof(struct __sk_buff, data_meta):
4040 off = si->off;
4041 off -= offsetof(struct __sk_buff, data_meta);
4042 off += offsetof(struct sk_buff, cb);
4043 off += offsetof(struct bpf_skb_data_end, data_meta);
4044 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
4045 si->src_reg, off);
4046 break;
4047
3856 case offsetof(struct __sk_buff, data_end): 4048 case offsetof(struct __sk_buff, data_end):
3857 off = si->off; 4049 off = si->off;
3858 off -= offsetof(struct __sk_buff, data_end); 4050 off -= offsetof(struct __sk_buff, data_end);
@@ -4101,6 +4293,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4101 si->dst_reg, si->src_reg, 4293 si->dst_reg, si->src_reg,
4102 offsetof(struct xdp_buff, data)); 4294 offsetof(struct xdp_buff, data));
4103 break; 4295 break;
4296 case offsetof(struct xdp_md, data_meta):
4297 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
4298 si->dst_reg, si->src_reg,
4299 offsetof(struct xdp_buff, data_meta));
4300 break;
4104 case offsetof(struct xdp_md, data_end): 4301 case offsetof(struct xdp_md, data_end):
4105 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), 4302 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
4106 si->dst_reg, si->src_reg, 4303 si->dst_reg, si->src_reg,
@@ -4269,68 +4466,95 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
4269 return insn - insn_buf; 4466 return insn - insn_buf;
4270} 4467}
4271 4468
4272const struct bpf_verifier_ops sk_filter_prog_ops = { 4469const struct bpf_verifier_ops sk_filter_verifier_ops = {
4273 .get_func_proto = sk_filter_func_proto, 4470 .get_func_proto = sk_filter_func_proto,
4274 .is_valid_access = sk_filter_is_valid_access, 4471 .is_valid_access = sk_filter_is_valid_access,
4275 .convert_ctx_access = bpf_convert_ctx_access, 4472 .convert_ctx_access = bpf_convert_ctx_access,
4276}; 4473};
4277 4474
4278const struct bpf_verifier_ops tc_cls_act_prog_ops = { 4475const struct bpf_prog_ops sk_filter_prog_ops = {
4476};
4477
4478const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
4279 .get_func_proto = tc_cls_act_func_proto, 4479 .get_func_proto = tc_cls_act_func_proto,
4280 .is_valid_access = tc_cls_act_is_valid_access, 4480 .is_valid_access = tc_cls_act_is_valid_access,
4281 .convert_ctx_access = tc_cls_act_convert_ctx_access, 4481 .convert_ctx_access = tc_cls_act_convert_ctx_access,
4282 .gen_prologue = tc_cls_act_prologue, 4482 .gen_prologue = tc_cls_act_prologue,
4483};
4484
4485const struct bpf_prog_ops tc_cls_act_prog_ops = {
4283 .test_run = bpf_prog_test_run_skb, 4486 .test_run = bpf_prog_test_run_skb,
4284}; 4487};
4285 4488
4286const struct bpf_verifier_ops xdp_prog_ops = { 4489const struct bpf_verifier_ops xdp_verifier_ops = {
4287 .get_func_proto = xdp_func_proto, 4490 .get_func_proto = xdp_func_proto,
4288 .is_valid_access = xdp_is_valid_access, 4491 .is_valid_access = xdp_is_valid_access,
4289 .convert_ctx_access = xdp_convert_ctx_access, 4492 .convert_ctx_access = xdp_convert_ctx_access,
4493};
4494
4495const struct bpf_prog_ops xdp_prog_ops = {
4290 .test_run = bpf_prog_test_run_xdp, 4496 .test_run = bpf_prog_test_run_xdp,
4291}; 4497};
4292 4498
4293const struct bpf_verifier_ops cg_skb_prog_ops = { 4499const struct bpf_verifier_ops cg_skb_verifier_ops = {
4294 .get_func_proto = sk_filter_func_proto, 4500 .get_func_proto = sk_filter_func_proto,
4295 .is_valid_access = sk_filter_is_valid_access, 4501 .is_valid_access = sk_filter_is_valid_access,
4296 .convert_ctx_access = bpf_convert_ctx_access, 4502 .convert_ctx_access = bpf_convert_ctx_access,
4503};
4504
4505const struct bpf_prog_ops cg_skb_prog_ops = {
4297 .test_run = bpf_prog_test_run_skb, 4506 .test_run = bpf_prog_test_run_skb,
4298}; 4507};
4299 4508
4300const struct bpf_verifier_ops lwt_inout_prog_ops = { 4509const struct bpf_verifier_ops lwt_inout_verifier_ops = {
4301 .get_func_proto = lwt_inout_func_proto, 4510 .get_func_proto = lwt_inout_func_proto,
4302 .is_valid_access = lwt_is_valid_access, 4511 .is_valid_access = lwt_is_valid_access,
4303 .convert_ctx_access = bpf_convert_ctx_access, 4512 .convert_ctx_access = bpf_convert_ctx_access,
4513};
4514
4515const struct bpf_prog_ops lwt_inout_prog_ops = {
4304 .test_run = bpf_prog_test_run_skb, 4516 .test_run = bpf_prog_test_run_skb,
4305}; 4517};
4306 4518
4307const struct bpf_verifier_ops lwt_xmit_prog_ops = { 4519const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
4308 .get_func_proto = lwt_xmit_func_proto, 4520 .get_func_proto = lwt_xmit_func_proto,
4309 .is_valid_access = lwt_is_valid_access, 4521 .is_valid_access = lwt_is_valid_access,
4310 .convert_ctx_access = bpf_convert_ctx_access, 4522 .convert_ctx_access = bpf_convert_ctx_access,
4311 .gen_prologue = tc_cls_act_prologue, 4523 .gen_prologue = tc_cls_act_prologue,
4524};
4525
4526const struct bpf_prog_ops lwt_xmit_prog_ops = {
4312 .test_run = bpf_prog_test_run_skb, 4527 .test_run = bpf_prog_test_run_skb,
4313}; 4528};
4314 4529
4315const struct bpf_verifier_ops cg_sock_prog_ops = { 4530const struct bpf_verifier_ops cg_sock_verifier_ops = {
4316 .get_func_proto = sock_filter_func_proto, 4531 .get_func_proto = sock_filter_func_proto,
4317 .is_valid_access = sock_filter_is_valid_access, 4532 .is_valid_access = sock_filter_is_valid_access,
4318 .convert_ctx_access = sock_filter_convert_ctx_access, 4533 .convert_ctx_access = sock_filter_convert_ctx_access,
4319}; 4534};
4320 4535
4321const struct bpf_verifier_ops sock_ops_prog_ops = { 4536const struct bpf_prog_ops cg_sock_prog_ops = {
4537};
4538
4539const struct bpf_verifier_ops sock_ops_verifier_ops = {
4322 .get_func_proto = sock_ops_func_proto, 4540 .get_func_proto = sock_ops_func_proto,
4323 .is_valid_access = sock_ops_is_valid_access, 4541 .is_valid_access = sock_ops_is_valid_access,
4324 .convert_ctx_access = sock_ops_convert_ctx_access, 4542 .convert_ctx_access = sock_ops_convert_ctx_access,
4325}; 4543};
4326 4544
4327const struct bpf_verifier_ops sk_skb_prog_ops = { 4545const struct bpf_prog_ops sock_ops_prog_ops = {
4546};
4547
4548const struct bpf_verifier_ops sk_skb_verifier_ops = {
4328 .get_func_proto = sk_skb_func_proto, 4549 .get_func_proto = sk_skb_func_proto,
4329 .is_valid_access = sk_skb_is_valid_access, 4550 .is_valid_access = sk_skb_is_valid_access,
4330 .convert_ctx_access = sk_skb_convert_ctx_access, 4551 .convert_ctx_access = sk_skb_convert_ctx_access,
4331 .gen_prologue = sk_skb_prologue, 4552 .gen_prologue = sk_skb_prologue,
4332}; 4553};
4333 4554
4555const struct bpf_prog_ops sk_skb_prog_ops = {
4556};
4557
4334int sk_detach_filter(struct sock *sk) 4558int sk_detach_filter(struct sock *sk)
4335{ 4559{
4336 int ret = -ENOENT; 4560 int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0a977373d003..15ce30063765 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,10 +5,12 @@
5#include <linux/ipv6.h> 5#include <linux/ipv6.h>
6#include <linux/if_vlan.h> 6#include <linux/if_vlan.h>
7#include <net/dsa.h> 7#include <net/dsa.h>
8#include <net/dst_metadata.h>
8#include <net/ip.h> 9#include <net/ip.h>
9#include <net/ipv6.h> 10#include <net/ipv6.h>
10#include <net/gre.h> 11#include <net/gre.h>
11#include <net/pptp.h> 12#include <net/pptp.h>
13#include <net/tipc.h>
12#include <linux/igmp.h> 14#include <linux/igmp.h>
13#include <linux/icmp.h> 15#include <linux/icmp.h>
14#include <linux/sctp.h> 16#include <linux/sctp.h>
@@ -115,6 +117,102 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
115} 117}
116EXPORT_SYMBOL(__skb_flow_get_ports); 118EXPORT_SYMBOL(__skb_flow_get_ports);
117 119
120static void
121skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
122 struct flow_dissector *flow_dissector,
123 void *target_container)
124{
125 struct flow_dissector_key_control *ctrl;
126
127 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL))
128 return;
129
130 ctrl = skb_flow_dissector_target(flow_dissector,
131 FLOW_DISSECTOR_KEY_ENC_CONTROL,
132 target_container);
133 ctrl->addr_type = type;
134}
135
136static void
137__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
138 struct flow_dissector *flow_dissector,
139 void *target_container)
140{
141 struct ip_tunnel_info *info;
142 struct ip_tunnel_key *key;
143
144 /* A quick check to see if there might be something to do. */
145 if (!dissector_uses_key(flow_dissector,
146 FLOW_DISSECTOR_KEY_ENC_KEYID) &&
147 !dissector_uses_key(flow_dissector,
148 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
149 !dissector_uses_key(flow_dissector,
150 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
151 !dissector_uses_key(flow_dissector,
152 FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
153 !dissector_uses_key(flow_dissector,
154 FLOW_DISSECTOR_KEY_ENC_PORTS))
155 return;
156
157 info = skb_tunnel_info(skb);
158 if (!info)
159 return;
160
161 key = &info->key;
162
163 switch (ip_tunnel_info_af(info)) {
164 case AF_INET:
165 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
166 flow_dissector,
167 target_container);
168 if (dissector_uses_key(flow_dissector,
169 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
170 struct flow_dissector_key_ipv4_addrs *ipv4;
171
172 ipv4 = skb_flow_dissector_target(flow_dissector,
173 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
174 target_container);
175 ipv4->src = key->u.ipv4.src;
176 ipv4->dst = key->u.ipv4.dst;
177 }
178 break;
179 case AF_INET6:
180 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
181 flow_dissector,
182 target_container);
183 if (dissector_uses_key(flow_dissector,
184 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
185 struct flow_dissector_key_ipv6_addrs *ipv6;
186
187 ipv6 = skb_flow_dissector_target(flow_dissector,
188 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
189 target_container);
190 ipv6->src = key->u.ipv6.src;
191 ipv6->dst = key->u.ipv6.dst;
192 }
193 break;
194 }
195
196 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
197 struct flow_dissector_key_keyid *keyid;
198
199 keyid = skb_flow_dissector_target(flow_dissector,
200 FLOW_DISSECTOR_KEY_ENC_KEYID,
201 target_container);
202 keyid->keyid = tunnel_id_to_key32(key->tun_id);
203 }
204
205 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
206 struct flow_dissector_key_ports *tp;
207
208 tp = skb_flow_dissector_target(flow_dissector,
209 FLOW_DISSECTOR_KEY_ENC_PORTS,
210 target_container);
211 tp->src = key->tp_src;
212 tp->dst = key->tp_dst;
213 }
214}
215
118static enum flow_dissect_ret 216static enum flow_dissect_ret
119__skb_flow_dissect_mpls(const struct sk_buff *skb, 217__skb_flow_dissect_mpls(const struct sk_buff *skb,
120 struct flow_dissector *flow_dissector, 218 struct flow_dissector *flow_dissector,
@@ -478,6 +576,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
478 FLOW_DISSECTOR_KEY_BASIC, 576 FLOW_DISSECTOR_KEY_BASIC,
479 target_container); 577 target_container);
480 578
579 __skb_flow_dissect_tunnel_info(skb, flow_dissector,
580 target_container);
581
481 if (dissector_uses_key(flow_dissector, 582 if (dissector_uses_key(flow_dissector,
482 FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 583 FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
483 struct ethhdr *eth = eth_hdr(skb); 584 struct ethhdr *eth = eth_hdr(skb);
@@ -672,23 +773,22 @@ proto_again:
672 break; 773 break;
673 } 774 }
674 case htons(ETH_P_TIPC): { 775 case htons(ETH_P_TIPC): {
675 struct { 776 struct tipc_basic_hdr *hdr, _hdr;
676 __be32 pre[3]; 777
677 __be32 srcnode; 778 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
678 } *hdr, _hdr; 779 data, hlen, &_hdr);
679 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
680 if (!hdr) { 780 if (!hdr) {
681 fdret = FLOW_DISSECT_RET_OUT_BAD; 781 fdret = FLOW_DISSECT_RET_OUT_BAD;
682 break; 782 break;
683 } 783 }
684 784
685 if (dissector_uses_key(flow_dissector, 785 if (dissector_uses_key(flow_dissector,
686 FLOW_DISSECTOR_KEY_TIPC_ADDRS)) { 786 FLOW_DISSECTOR_KEY_TIPC)) {
687 key_addrs = skb_flow_dissector_target(flow_dissector, 787 key_addrs = skb_flow_dissector_target(flow_dissector,
688 FLOW_DISSECTOR_KEY_TIPC_ADDRS, 788 FLOW_DISSECTOR_KEY_TIPC,
689 target_container); 789 target_container);
690 key_addrs->tipcaddrs.srcnode = hdr->srcnode; 790 key_addrs->tipckey.key = tipc_hdr_rps_key(hdr);
691 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS; 791 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC;
692 } 792 }
693 fdret = FLOW_DISSECT_RET_OUT_GOOD; 793 fdret = FLOW_DISSECT_RET_OUT_GOOD;
694 break; 794 break;
@@ -924,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
924 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1024 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
925 diff -= sizeof(flow->addrs.v6addrs); 1025 diff -= sizeof(flow->addrs.v6addrs);
926 break; 1026 break;
927 case FLOW_DISSECTOR_KEY_TIPC_ADDRS: 1027 case FLOW_DISSECTOR_KEY_TIPC:
928 diff -= sizeof(flow->addrs.tipcaddrs); 1028 diff -= sizeof(flow->addrs.tipckey);
929 break; 1029 break;
930 } 1030 }
931 return (sizeof(*flow) - diff) / sizeof(u32); 1031 return (sizeof(*flow) - diff) / sizeof(u32);
@@ -939,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow)
939 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1039 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
940 return (__force __be32)ipv6_addr_hash( 1040 return (__force __be32)ipv6_addr_hash(
941 &flow->addrs.v6addrs.src); 1041 &flow->addrs.v6addrs.src);
942 case FLOW_DISSECTOR_KEY_TIPC_ADDRS: 1042 case FLOW_DISSECTOR_KEY_TIPC:
943 return flow->addrs.tipcaddrs.srcnode; 1043 return flow->addrs.tipckey.key;
944 default: 1044 default:
945 return 0; 1045 return 0;
946 } 1046 }
@@ -1221,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
1221 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1321 .offset = offsetof(struct flow_keys, addrs.v6addrs),
1222 }, 1322 },
1223 { 1323 {
1224 .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS, 1324 .key_id = FLOW_DISSECTOR_KEY_TIPC,
1225 .offset = offsetof(struct flow_keys, addrs.tipcaddrs), 1325 .offset = offsetof(struct flow_keys, addrs.tipckey),
1226 }, 1326 },
1227 { 1327 {
1228 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1328 .key_id = FLOW_DISSECTOR_KEY_PORTS,
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 7c1ffd6f9501..9834cfa21b21 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -76,9 +76,9 @@ static void est_fetch_counters(struct net_rate_estimator *e,
76 76
77} 77}
78 78
79static void est_timer(unsigned long arg) 79static void est_timer(struct timer_list *t)
80{ 80{
81 struct net_rate_estimator *est = (struct net_rate_estimator *)arg; 81 struct net_rate_estimator *est = from_timer(est, t, timer);
82 struct gnet_stats_basic_packed b; 82 struct gnet_stats_basic_packed b;
83 u64 rate, brate; 83 u64 rate, brate;
84 84
@@ -170,7 +170,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
170 } 170 }
171 171
172 est->next_jiffies = jiffies + ((HZ/4) << intvl_log); 172 est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
173 setup_timer(&est->timer, est_timer, (unsigned long)est); 173 timer_setup(&est->timer, est_timer, 0);
174 mod_timer(&est->timer, est->next_jiffies); 174 mod_timer(&est->timer, est->next_jiffies);
175 175
176 rcu_assign_pointer(*rate_est, est); 176 rcu_assign_pointer(*rate_est, est);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1307731ddfe4..e7e626fb87bb 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
51 */ 51 */
52 preempt_disable(); 52 preempt_disable();
53 rcu_read_lock(); 53 rcu_read_lock();
54 bpf_compute_data_end(skb); 54 bpf_compute_data_pointers(skb);
55 ret = bpf_prog_run_save_cb(lwt->prog, skb); 55 ret = bpf_prog_run_save_cb(lwt->prog, skb);
56 rcu_read_unlock(); 56 rcu_read_unlock();
57 57
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 16a1a4c4eb57..d1f5fe986edd 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -51,7 +51,7 @@ do { \
51 51
52#define PNEIGH_HASHMASK 0xF 52#define PNEIGH_HASHMASK 0xF
53 53
54static void neigh_timer_handler(unsigned long arg); 54static void neigh_timer_handler(struct timer_list *t);
55static void __neigh_notify(struct neighbour *n, int type, int flags, 55static void __neigh_notify(struct neighbour *n, int type, int flags,
56 u32 pid); 56 u32 pid);
57static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 57static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
@@ -331,7 +331,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
331 n->output = neigh_blackhole; 331 n->output = neigh_blackhole;
332 seqlock_init(&n->hh.hh_lock); 332 seqlock_init(&n->hh.hh_lock);
333 n->parms = neigh_parms_clone(&tbl->parms); 333 n->parms = neigh_parms_clone(&tbl->parms);
334 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); 334 timer_setup(&n->timer, neigh_timer_handler, 0);
335 335
336 NEIGH_CACHE_STAT_INC(tbl, allocs); 336 NEIGH_CACHE_STAT_INC(tbl, allocs);
337 n->tbl = tbl; 337 n->tbl = tbl;
@@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
457 const void *pkey) 457 const void *pkey)
458{ 458{
459 struct neighbour *n; 459 struct neighbour *n;
460 int key_len = tbl->key_len; 460 unsigned int key_len = tbl->key_len;
461 u32 hash_val; 461 u32 hash_val;
462 struct neigh_hash_table *nht; 462 struct neigh_hash_table *nht;
463 463
@@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
488 struct net_device *dev, bool want_ref) 488 struct net_device *dev, bool want_ref)
489{ 489{
490 u32 hash_val; 490 u32 hash_val;
491 int key_len = tbl->key_len; 491 unsigned int key_len = tbl->key_len;
492 int error; 492 int error;
493 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); 493 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
494 struct neigh_hash_table *nht; 494 struct neigh_hash_table *nht;
@@ -572,7 +572,7 @@ out_neigh_release:
572} 572}
573EXPORT_SYMBOL(__neigh_create); 573EXPORT_SYMBOL(__neigh_create);
574 574
575static u32 pneigh_hash(const void *pkey, int key_len) 575static u32 pneigh_hash(const void *pkey, unsigned int key_len)
576{ 576{
577 u32 hash_val = *(u32 *)(pkey + key_len - 4); 577 u32 hash_val = *(u32 *)(pkey + key_len - 4);
578 hash_val ^= (hash_val >> 16); 578 hash_val ^= (hash_val >> 16);
@@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len)
585static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 585static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
586 struct net *net, 586 struct net *net,
587 const void *pkey, 587 const void *pkey,
588 int key_len, 588 unsigned int key_len,
589 struct net_device *dev) 589 struct net_device *dev)
590{ 590{
591 while (n) { 591 while (n) {
@@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
601struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 601struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
602 struct net *net, const void *pkey, struct net_device *dev) 602 struct net *net, const void *pkey, struct net_device *dev)
603{ 603{
604 int key_len = tbl->key_len; 604 unsigned int key_len = tbl->key_len;
605 u32 hash_val = pneigh_hash(pkey, key_len); 605 u32 hash_val = pneigh_hash(pkey, key_len);
606 606
607 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 607 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
@@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
614 struct net_device *dev, int creat) 614 struct net_device *dev, int creat)
615{ 615{
616 struct pneigh_entry *n; 616 struct pneigh_entry *n;
617 int key_len = tbl->key_len; 617 unsigned int key_len = tbl->key_len;
618 u32 hash_val = pneigh_hash(pkey, key_len); 618 u32 hash_val = pneigh_hash(pkey, key_len);
619 619
620 read_lock_bh(&tbl->lock); 620 read_lock_bh(&tbl->lock);
@@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
659 struct net_device *dev) 659 struct net_device *dev)
660{ 660{
661 struct pneigh_entry *n, **np; 661 struct pneigh_entry *n, **np;
662 int key_len = tbl->key_len; 662 unsigned int key_len = tbl->key_len;
663 u32 hash_val = pneigh_hash(pkey, key_len); 663 u32 hash_val = pneigh_hash(pkey, key_len);
664 664
665 write_lock_bh(&tbl->lock); 665 write_lock_bh(&tbl->lock);
@@ -903,10 +903,10 @@ static void neigh_probe(struct neighbour *neigh)
903 903
904/* Called when a timer expires for a neighbour entry. */ 904/* Called when a timer expires for a neighbour entry. */
905 905
906static void neigh_timer_handler(unsigned long arg) 906static void neigh_timer_handler(struct timer_list *t)
907{ 907{
908 unsigned long now, next; 908 unsigned long now, next;
909 struct neighbour *neigh = (struct neighbour *)arg; 909 struct neighbour *neigh = from_timer(neigh, t, timer);
910 unsigned int state; 910 unsigned int state;
911 int notify = 0; 911 int notify = 0;
912 912
@@ -1391,9 +1391,9 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1391} 1391}
1392EXPORT_SYMBOL(neigh_direct_output); 1392EXPORT_SYMBOL(neigh_direct_output);
1393 1393
1394static void neigh_proxy_process(unsigned long arg) 1394static void neigh_proxy_process(struct timer_list *t)
1395{ 1395{
1396 struct neigh_table *tbl = (struct neigh_table *)arg; 1396 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1397 long sched_next = 0; 1397 long sched_next = 0;
1398 unsigned long now = jiffies; 1398 unsigned long now = jiffies;
1399 struct sk_buff *skb, *n; 1399 struct sk_buff *skb, *n;
@@ -1573,7 +1573,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
1573 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1573 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1574 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1574 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1575 tbl->parms.reachable_time); 1575 tbl->parms.reachable_time);
1576 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); 1576 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1577 skb_queue_head_init_class(&tbl->proxy_queue, 1577 skb_queue_head_init_class(&tbl->proxy_queue,
1578 &neigh_table_proxy_queue_class); 1578 &neigh_table_proxy_queue_class);
1579 1579
@@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1662 if (tbl == NULL) 1662 if (tbl == NULL)
1663 return -EAFNOSUPPORT; 1663 return -EAFNOSUPPORT;
1664 1664
1665 if (nla_len(dst_attr) < tbl->key_len) 1665 if (nla_len(dst_attr) < (int)tbl->key_len)
1666 goto out; 1666 goto out;
1667 1667
1668 if (ndm->ndm_flags & NTF_PROXY) { 1668 if (ndm->ndm_flags & NTF_PROXY) {
@@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1730 if (tbl == NULL) 1730 if (tbl == NULL)
1731 return -EAFNOSUPPORT; 1731 return -EAFNOSUPPORT;
1732 1732
1733 if (nla_len(tb[NDA_DST]) < tbl->key_len) 1733 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1734 goto out; 1734 goto out;
1735 dst = nla_data(tb[NDA_DST]); 1735 dst = nla_data(tb[NDA_DST]);
1736 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1736 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 927a6dcbad96..799b75268291 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -382,7 +382,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
382 struct net_device *netdev = to_net_dev(dev); 382 struct net_device *netdev = to_net_dev(dev);
383 struct net *net = dev_net(netdev); 383 struct net *net = dev_net(netdev);
384 size_t count = len; 384 size_t count = len;
385 ssize_t ret; 385 ssize_t ret = 0;
386 386
387 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 387 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
388 return -EPERM; 388 return -EPERM;
@@ -393,23 +393,30 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
393 393
394 if (!rtnl_trylock()) 394 if (!rtnl_trylock())
395 return restart_syscall(); 395 return restart_syscall();
396 ret = dev_set_alias(netdev, buf, count); 396
397 if (dev_isalive(netdev)) {
398 ret = dev_set_alias(netdev, buf, count);
399 if (ret < 0)
400 goto err;
401 ret = len;
402 netdev_state_change(netdev);
403 }
404err:
397 rtnl_unlock(); 405 rtnl_unlock();
398 406
399 return ret < 0 ? ret : len; 407 return ret;
400} 408}
401 409
402static ssize_t ifalias_show(struct device *dev, 410static ssize_t ifalias_show(struct device *dev,
403 struct device_attribute *attr, char *buf) 411 struct device_attribute *attr, char *buf)
404{ 412{
405 const struct net_device *netdev = to_net_dev(dev); 413 const struct net_device *netdev = to_net_dev(dev);
414 char tmp[IFALIASZ];
406 ssize_t ret = 0; 415 ssize_t ret = 0;
407 416
408 if (!rtnl_trylock()) 417 ret = dev_get_alias(netdev, tmp, sizeof(tmp));
409 return restart_syscall(); 418 if (ret > 0)
410 if (netdev->ifalias) 419 ret = sprintf(buf, "%s\n", tmp);
411 ret = sprintf(buf, "%s\n", netdev->ifalias);
412 rtnl_unlock();
413 return ret; 420 return ret;
414} 421}
415static DEVICE_ATTR_RW(ifalias); 422static DEVICE_ATTR_RW(ifalias);
@@ -1488,7 +1495,10 @@ static void netdev_release(struct device *d)
1488 1495
1489 BUG_ON(dev->reg_state != NETREG_RELEASED); 1496 BUG_ON(dev->reg_state != NETREG_RELEASED);
1490 1497
1491 kfree(dev->ifalias); 1498 /* no need to wait for rcu grace period:
1499 * device is dead and about to be freed.
1500 */
1501 kfree(rcu_access_pointer(dev->ifalias));
1492 netdev_freemem(dev); 1502 netdev_freemem(dev);
1493} 1503}
1494 1504
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 71f209542364..380934580fa1 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -32,6 +32,7 @@
32#include <trace/events/napi.h> 32#include <trace/events/napi.h>
33#include <trace/events/sock.h> 33#include <trace/events/sock.h>
34#include <trace/events/udp.h> 34#include <trace/events/udp.h>
35#include <trace/events/tcp.h>
35#include <trace/events/fib.h> 36#include <trace/events/fib.h>
36#include <trace/events/qdisc.h> 37#include <trace/events/qdisc.h>
37#if IS_ENABLED(CONFIG_IPV6) 38#if IS_ENABLED(CONFIG_IPV6)
@@ -49,3 +50,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
49EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 50EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
50 51
51EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); 52EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
53
54EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6cfdc7c84c48..b797832565d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
234 rtnl_net_notifyid(net, RTM_NEWNSID, id); 234 rtnl_net_notifyid(net, RTM_NEWNSID, id);
235 return id; 235 return id;
236} 236}
237EXPORT_SYMBOL_GPL(peernet2id_alloc);
237 238
238/* This function returns, if assigned, the id of a peer netns. */ 239/* This function returns, if assigned, the id of a peer netns. */
239int peernet2id(struct net *net, struct net *peer) 240int peernet2id(struct net *net, struct net *peer)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3b2034f6d49d..f95a15086225 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2165,7 +2165,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2165 + pkt_dev->pkt_overhead; 2165 + pkt_dev->pkt_overhead;
2166 } 2166 }
2167 2167
2168 for (i = 0; i < IN6_ADDR_HSIZE; i++) 2168 for (i = 0; i < sizeof(struct in6_addr); i++)
2169 if (pkt_dev->cur_in6_saddr.s6_addr[i]) { 2169 if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
2170 set = 1; 2170 set = 1;
2171 break; 2171 break;
@@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
2711static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, 2711static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2712 int datalen) 2712 int datalen)
2713{ 2713{
2714 struct timeval timestamp; 2714 struct timespec64 timestamp;
2715 struct pktgen_hdr *pgh; 2715 struct pktgen_hdr *pgh;
2716 2716
2717 pgh = skb_put(skb, sizeof(*pgh)); 2717 pgh = skb_put(skb, sizeof(*pgh));
@@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2773 pgh->tv_sec = 0; 2773 pgh->tv_sec = 0;
2774 pgh->tv_usec = 0; 2774 pgh->tv_usec = 0;
2775 } else { 2775 } else {
2776 do_gettimeofday(&timestamp); 2776 /*
2777 * pgh->tv_sec wraps in y2106 when interpreted as unsigned
2778 * as done by wireshark, or y2038 when interpreted as signed.
2779 * This is probably harmless, but if anyone wants to improve
2780 * it, we could introduce a variant that puts 64-bit nanoseconds
2781 * into the respective header bytes.
2782 * This would also be slightly faster to read.
2783 */
2784 ktime_get_real_ts64(&timestamp);
2777 pgh->tv_sec = htonl(timestamp.tv_sec); 2785 pgh->tv_sec = htonl(timestamp.tv_sec);
2778 pgh->tv_usec = htonl(timestamp.tv_usec); 2786 pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
2779 } 2787 }
2780} 2788}
2781 2789
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ace48926b19..dabba2a91fc8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -453,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
453{ 453{
454 const struct rtnl_af_ops *ops; 454 const struct rtnl_af_ops *ops;
455 455
456 list_for_each_entry(ops, &rtnl_af_ops, list) { 456 list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
457 if (ops->family == family) 457 if (ops->family == family)
458 return ops; 458 return ops;
459 } 459 }
@@ -470,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
470void rtnl_af_register(struct rtnl_af_ops *ops) 470void rtnl_af_register(struct rtnl_af_ops *ops)
471{ 471{
472 rtnl_lock(); 472 rtnl_lock();
473 list_add_tail(&ops->list, &rtnl_af_ops); 473 list_add_tail_rcu(&ops->list, &rtnl_af_ops);
474 rtnl_unlock(); 474 rtnl_unlock();
475} 475}
476EXPORT_SYMBOL_GPL(rtnl_af_register); 476EXPORT_SYMBOL_GPL(rtnl_af_register);
477 477
478/** 478/**
479 * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
480 * @ops: struct rtnl_af_ops * to unregister
481 *
482 * The caller must hold the rtnl_mutex.
483 */
484void __rtnl_af_unregister(struct rtnl_af_ops *ops)
485{
486 list_del(&ops->list);
487}
488EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
489
490/**
491 * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. 479 * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
492 * @ops: struct rtnl_af_ops * to unregister 480 * @ops: struct rtnl_af_ops * to unregister
493 */ 481 */
494void rtnl_af_unregister(struct rtnl_af_ops *ops) 482void rtnl_af_unregister(struct rtnl_af_ops *ops)
495{ 483{
496 rtnl_lock(); 484 rtnl_lock();
497 __rtnl_af_unregister(ops); 485 list_del_rcu(&ops->list);
498 rtnl_unlock(); 486 rtnl_unlock();
487
488 synchronize_rcu();
499} 489}
500EXPORT_SYMBOL_GPL(rtnl_af_unregister); 490EXPORT_SYMBOL_GPL(rtnl_af_unregister);
501 491
@@ -508,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
508 /* IFLA_AF_SPEC */ 498 /* IFLA_AF_SPEC */
509 size = nla_total_size(sizeof(struct nlattr)); 499 size = nla_total_size(sizeof(struct nlattr));
510 500
511 list_for_each_entry(af_ops, &rtnl_af_ops, list) { 501 rcu_read_lock();
502 list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
512 if (af_ops->get_link_af_size) { 503 if (af_ops->get_link_af_size) {
513 /* AF_* + nested data */ 504 /* AF_* + nested data */
514 size += nla_total_size(sizeof(struct nlattr)) + 505 size += nla_total_size(sizeof(struct nlattr)) +
515 af_ops->get_link_af_size(dev, ext_filter_mask); 506 af_ops->get_link_af_size(dev, ext_filter_mask);
516 } 507 }
517 } 508 }
509 rcu_read_unlock();
518 510
519 return size; 511 return size;
520} 512}
@@ -522,11 +514,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
522static bool rtnl_have_link_slave_info(const struct net_device *dev) 514static bool rtnl_have_link_slave_info(const struct net_device *dev)
523{ 515{
524 struct net_device *master_dev; 516 struct net_device *master_dev;
517 bool ret = false;
525 518
526 master_dev = netdev_master_upper_dev_get((struct net_device *) dev); 519 rcu_read_lock();
520
521 master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
527 if (master_dev && master_dev->rtnl_link_ops) 522 if (master_dev && master_dev->rtnl_link_ops)
528 return true; 523 ret = true;
529 return false; 524 rcu_read_unlock();
525 return ret;
530} 526}
531 527
532static int rtnl_link_slave_info_fill(struct sk_buff *skb, 528static int rtnl_link_slave_info_fill(struct sk_buff *skb,
@@ -923,8 +919,10 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
923 + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ 919 + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
924 + rtnl_xdp_size() /* IFLA_XDP */ 920 + rtnl_xdp_size() /* IFLA_XDP */
925 + nla_total_size(4) /* IFLA_EVENT */ 921 + nla_total_size(4) /* IFLA_EVENT */
926 + nla_total_size(1); /* IFLA_PROTO_DOWN */ 922 + nla_total_size(4) /* IFLA_NEW_NETNSID */
927 923 + nla_total_size(1) /* IFLA_PROTO_DOWN */
924 + nla_total_size(4) /* IFLA_IF_NETNSID */
925 + 0;
928} 926}
929 927
930static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) 928static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1211,6 +1209,36 @@ nla_put_vfinfo_failure:
1211 return -EMSGSIZE; 1209 return -EMSGSIZE;
1212} 1210}
1213 1211
1212static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
1213 struct net_device *dev,
1214 u32 ext_filter_mask)
1215{
1216 struct nlattr *vfinfo;
1217 int i, num_vfs;
1218
1219 if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
1220 return 0;
1221
1222 num_vfs = dev_num_vf(dev->dev.parent);
1223 if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
1224 return -EMSGSIZE;
1225
1226 if (!dev->netdev_ops->ndo_get_vf_config)
1227 return 0;
1228
1229 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
1230 if (!vfinfo)
1231 return -EMSGSIZE;
1232
1233 for (i = 0; i < num_vfs; i++) {
1234 if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
1235 return -EMSGSIZE;
1236 }
1237
1238 nla_nest_end(skb, vfinfo);
1239 return 0;
1240}
1241
1214static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) 1242static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1215{ 1243{
1216 struct rtnl_link_ifmap map; 1244 struct rtnl_link_ifmap map;
@@ -1242,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
1242 *prog_id = generic_xdp_prog->aux->id; 1270 *prog_id = generic_xdp_prog->aux->id;
1243 return XDP_ATTACHED_SKB; 1271 return XDP_ATTACHED_SKB;
1244 } 1272 }
1245 if (!ops->ndo_xdp) 1273 if (!ops->ndo_bpf)
1246 return XDP_ATTACHED_NONE; 1274 return XDP_ATTACHED_NONE;
1247 1275
1248 return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id); 1276 return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
1249} 1277}
1250 1278
1251static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) 1279static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1307,16 +1335,108 @@ static u32 rtnl_get_event(unsigned long event)
1307 return rtnl_event_type; 1335 return rtnl_event_type;
1308} 1336}
1309 1337
1310static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 1338static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
1339{
1340 const struct net_device *upper_dev;
1341 int ret = 0;
1342
1343 rcu_read_lock();
1344
1345 upper_dev = netdev_master_upper_dev_get_rcu(dev);
1346 if (upper_dev)
1347 ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
1348
1349 rcu_read_unlock();
1350 return ret;
1351}
1352
1353static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
1354{
1355 int ifindex = dev_get_iflink(dev);
1356
1357 if (dev->ifindex == ifindex)
1358 return 0;
1359
1360 return nla_put_u32(skb, IFLA_LINK, ifindex);
1361}
1362
1363static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
1364 struct net_device *dev)
1365{
1366 char buf[IFALIASZ];
1367 int ret;
1368
1369 ret = dev_get_alias(dev, buf, sizeof(buf));
1370 return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
1371}
1372
1373static int rtnl_fill_link_netnsid(struct sk_buff *skb,
1374 const struct net_device *dev,
1375 struct net *src_net)
1376{
1377 if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
1378 struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
1379
1380 if (!net_eq(dev_net(dev), link_net)) {
1381 int id = peernet2id_alloc(src_net, link_net);
1382
1383 if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
1384 return -EMSGSIZE;
1385 }
1386 }
1387
1388 return 0;
1389}
1390
1391static int rtnl_fill_link_af(struct sk_buff *skb,
1392 const struct net_device *dev,
1393 u32 ext_filter_mask)
1394{
1395 const struct rtnl_af_ops *af_ops;
1396 struct nlattr *af_spec;
1397
1398 af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
1399 if (!af_spec)
1400 return -EMSGSIZE;
1401
1402 list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
1403 struct nlattr *af;
1404 int err;
1405
1406 if (!af_ops->fill_link_af)
1407 continue;
1408
1409 af = nla_nest_start(skb, af_ops->family);
1410 if (!af)
1411 return -EMSGSIZE;
1412
1413 err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
1414 /*
1415 * Caller may return ENODATA to indicate that there
1416 * was no data to be dumped. This is not an error, it
1417 * means we should trim the attribute header and
1418 * continue.
1419 */
1420 if (err == -ENODATA)
1421 nla_nest_cancel(skb, af);
1422 else if (err < 0)
1423 return -EMSGSIZE;
1424
1425 nla_nest_end(skb, af);
1426 }
1427
1428 nla_nest_end(skb, af_spec);
1429 return 0;
1430}
1431
1432static int rtnl_fill_ifinfo(struct sk_buff *skb,
1433 struct net_device *dev, struct net *src_net,
1311 int type, u32 pid, u32 seq, u32 change, 1434 int type, u32 pid, u32 seq, u32 change,
1312 unsigned int flags, u32 ext_filter_mask, 1435 unsigned int flags, u32 ext_filter_mask,
1313 u32 event) 1436 u32 event, int *new_nsid, int tgt_netnsid)
1314{ 1437{
1315 struct ifinfomsg *ifm; 1438 struct ifinfomsg *ifm;
1316 struct nlmsghdr *nlh; 1439 struct nlmsghdr *nlh;
1317 struct nlattr *af_spec;
1318 struct rtnl_af_ops *af_ops;
1319 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1320 1440
1321 ASSERT_RTNL(); 1441 ASSERT_RTNL();
1322 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 1442 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1331,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1331 ifm->ifi_flags = dev_get_flags(dev); 1451 ifm->ifi_flags = dev_get_flags(dev);
1332 ifm->ifi_change = change; 1452 ifm->ifi_change = change;
1333 1453
1454 if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
1455 goto nla_put_failure;
1456
1334 if (nla_put_string(skb, IFLA_IFNAME, dev->name) || 1457 if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
1335 nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || 1458 nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
1336 nla_put_u8(skb, IFLA_OPERSTATE, 1459 nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1345,15 +1468,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1345#ifdef CONFIG_RPS 1468#ifdef CONFIG_RPS
1346 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || 1469 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
1347#endif 1470#endif
1348 (dev->ifindex != dev_get_iflink(dev) && 1471 nla_put_iflink(skb, dev) ||
1349 nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || 1472 put_master_ifindex(skb, dev) ||
1350 (upper_dev &&
1351 nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
1352 nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || 1473 nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
1353 (dev->qdisc && 1474 (dev->qdisc &&
1354 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || 1475 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
1355 (dev->ifalias && 1476 nla_put_ifalias(skb, dev) ||
1356 nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
1357 nla_put_u32(skb, IFLA_CARRIER_CHANGES, 1477 nla_put_u32(skb, IFLA_CARRIER_CHANGES,
1358 atomic_read(&dev->carrier_changes)) || 1478 atomic_read(&dev->carrier_changes)) ||
1359 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) 1479 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
@@ -1385,27 +1505,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1385 if (rtnl_fill_stats(skb, dev)) 1505 if (rtnl_fill_stats(skb, dev))
1386 goto nla_put_failure; 1506 goto nla_put_failure;
1387 1507
1388 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && 1508 if (rtnl_fill_vf(skb, dev, ext_filter_mask))
1389 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
1390 goto nla_put_failure; 1509 goto nla_put_failure;
1391 1510
1392 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
1393 ext_filter_mask & RTEXT_FILTER_VF) {
1394 int i;
1395 struct nlattr *vfinfo;
1396 int num_vfs = dev_num_vf(dev->dev.parent);
1397
1398 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
1399 if (!vfinfo)
1400 goto nla_put_failure;
1401 for (i = 0; i < num_vfs; i++) {
1402 if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
1403 goto nla_put_failure;
1404 }
1405
1406 nla_nest_end(skb, vfinfo);
1407 }
1408
1409 if (rtnl_port_fill(skb, dev, ext_filter_mask)) 1511 if (rtnl_port_fill(skb, dev, ext_filter_mask))
1410 goto nla_put_failure; 1512 goto nla_put_failure;
1411 1513
@@ -1417,51 +1519,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1417 goto nla_put_failure; 1519 goto nla_put_failure;
1418 } 1520 }
1419 1521
1420 if (dev->rtnl_link_ops && 1522 if (rtnl_fill_link_netnsid(skb, dev, src_net))
1421 dev->rtnl_link_ops->get_link_net) {
1422 struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
1423
1424 if (!net_eq(dev_net(dev), link_net)) {
1425 int id = peernet2id_alloc(dev_net(dev), link_net);
1426
1427 if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
1428 goto nla_put_failure;
1429 }
1430 }
1431
1432 if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
1433 goto nla_put_failure; 1523 goto nla_put_failure;
1434 1524
1435 list_for_each_entry(af_ops, &rtnl_af_ops, list) { 1525 if (new_nsid &&
1436 if (af_ops->fill_link_af) { 1526 nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
1437 struct nlattr *af; 1527 goto nla_put_failure;
1438 int err;
1439
1440 if (!(af = nla_nest_start(skb, af_ops->family)))
1441 goto nla_put_failure;
1442
1443 err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
1444
1445 /*
1446 * Caller may return ENODATA to indicate that there
1447 * was no data to be dumped. This is not an error, it
1448 * means we should trim the attribute header and
1449 * continue.
1450 */
1451 if (err == -ENODATA)
1452 nla_nest_cancel(skb, af);
1453 else if (err < 0)
1454 goto nla_put_failure;
1455
1456 nla_nest_end(skb, af);
1457 }
1458 }
1459 1528
1460 nla_nest_end(skb, af_spec); 1529 rcu_read_lock();
1530 if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
1531 goto nla_put_failure_rcu;
1532 rcu_read_unlock();
1461 1533
1462 nlmsg_end(skb, nlh); 1534 nlmsg_end(skb, nlh);
1463 return 0; 1535 return 0;
1464 1536
1537nla_put_failure_rcu:
1538 rcu_read_unlock();
1465nla_put_failure: 1539nla_put_failure:
1466 nlmsg_cancel(skb, nlh); 1540 nlmsg_cancel(skb, nlh);
1467 return -EMSGSIZE; 1541 return -EMSGSIZE;
@@ -1503,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1503 [IFLA_XDP] = { .type = NLA_NESTED }, 1577 [IFLA_XDP] = { .type = NLA_NESTED },
1504 [IFLA_EVENT] = { .type = NLA_U32 }, 1578 [IFLA_EVENT] = { .type = NLA_U32 },
1505 [IFLA_GROUP] = { .type = NLA_U32 }, 1579 [IFLA_GROUP] = { .type = NLA_U32 },
1580 [IFLA_IF_NETNSID] = { .type = NLA_S32 },
1506}; 1581};
1507 1582
1508static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 1583static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1606,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev,
1606 return false; 1681 return false;
1607} 1682}
1608 1683
1684static struct net *get_target_net(struct sk_buff *skb, int netnsid)
1685{
1686 struct net *net;
1687
1688 net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
1689 if (!net)
1690 return ERR_PTR(-EINVAL);
1691
1692 /* For now, the caller is required to have CAP_NET_ADMIN in
1693 * the user namespace owning the target net ns.
1694 */
1695 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
1696 put_net(net);
1697 return ERR_PTR(-EACCES);
1698 }
1699 return net;
1700}
1701
1609static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 1702static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1610{ 1703{
1611 struct net *net = sock_net(skb->sk); 1704 struct net *net = sock_net(skb->sk);
1705 struct net *tgt_net = net;
1612 int h, s_h; 1706 int h, s_h;
1613 int idx = 0, s_idx; 1707 int idx = 0, s_idx;
1614 struct net_device *dev; 1708 struct net_device *dev;
@@ -1618,6 +1712,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1618 const struct rtnl_link_ops *kind_ops = NULL; 1712 const struct rtnl_link_ops *kind_ops = NULL;
1619 unsigned int flags = NLM_F_MULTI; 1713 unsigned int flags = NLM_F_MULTI;
1620 int master_idx = 0; 1714 int master_idx = 0;
1715 int netnsid = -1;
1621 int err; 1716 int err;
1622 int hdrlen; 1717 int hdrlen;
1623 1718
@@ -1636,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1636 1731
1637 if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, 1732 if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
1638 ifla_policy, NULL) >= 0) { 1733 ifla_policy, NULL) >= 0) {
1734 if (tb[IFLA_IF_NETNSID]) {
1735 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
1736 tgt_net = get_target_net(skb, netnsid);
1737 if (IS_ERR(tgt_net)) {
1738 tgt_net = net;
1739 netnsid = -1;
1740 }
1741 }
1742
1639 if (tb[IFLA_EXT_MASK]) 1743 if (tb[IFLA_EXT_MASK])
1640 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 1744 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
1641 1745
@@ -1651,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1651 1755
1652 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1756 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1653 idx = 0; 1757 idx = 0;
1654 head = &net->dev_index_head[h]; 1758 head = &tgt_net->dev_index_head[h];
1655 hlist_for_each_entry(dev, head, index_hlist) { 1759 hlist_for_each_entry(dev, head, index_hlist) {
1656 if (link_dump_filtered(dev, master_idx, kind_ops)) 1760 if (link_dump_filtered(dev, master_idx, kind_ops))
1657 goto cont; 1761 goto cont;
1658 if (idx < s_idx) 1762 if (idx < s_idx)
1659 goto cont; 1763 goto cont;
1660 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1764 err = rtnl_fill_ifinfo(skb, dev, net,
1765 RTM_NEWLINK,
1661 NETLINK_CB(cb->skb).portid, 1766 NETLINK_CB(cb->skb).portid,
1662 cb->nlh->nlmsg_seq, 0, 1767 cb->nlh->nlmsg_seq, 0,
1663 flags, 1768 flags,
1664 ext_filter_mask, 0); 1769 ext_filter_mask, 0, NULL,
1770 netnsid);
1665 1771
1666 if (err < 0) { 1772 if (err < 0) {
1667 if (likely(skb->len)) 1773 if (likely(skb->len))
@@ -1680,6 +1786,8 @@ out_err:
1680 cb->args[0] = h; 1786 cb->args[0] = h;
1681 cb->seq = net->dev_base_seq; 1787 cb->seq = net->dev_base_seq;
1682 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1788 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1789 if (netnsid >= 0)
1790 put_net(tgt_net);
1683 1791
1684 return err; 1792 return err;
1685} 1793}
@@ -1726,17 +1834,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
1726 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { 1834 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
1727 const struct rtnl_af_ops *af_ops; 1835 const struct rtnl_af_ops *af_ops;
1728 1836
1729 if (!(af_ops = rtnl_af_lookup(nla_type(af)))) 1837 rcu_read_lock();
1838 af_ops = rtnl_af_lookup(nla_type(af));
1839 if (!af_ops) {
1840 rcu_read_unlock();
1730 return -EAFNOSUPPORT; 1841 return -EAFNOSUPPORT;
1842 }
1731 1843
1732 if (!af_ops->set_link_af) 1844 if (!af_ops->set_link_af) {
1845 rcu_read_unlock();
1733 return -EOPNOTSUPP; 1846 return -EOPNOTSUPP;
1847 }
1734 1848
1735 if (af_ops->validate_link_af) { 1849 if (af_ops->validate_link_af) {
1736 err = af_ops->validate_link_af(dev, af); 1850 err = af_ops->validate_link_af(dev, af);
1737 if (err < 0) 1851 if (err < 0) {
1852 rcu_read_unlock();
1738 return err; 1853 return err;
1854 }
1739 } 1855 }
1856
1857 rcu_read_unlock();
1740 } 1858 }
1741 } 1859 }
1742 1860
@@ -1912,7 +2030,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1912 return err; 2030 return err;
1913} 2031}
1914 2032
1915static int do_set_master(struct net_device *dev, int ifindex) 2033static int do_set_master(struct net_device *dev, int ifindex,
2034 struct netlink_ext_ack *extack)
1916{ 2035{
1917 struct net_device *upper_dev = netdev_master_upper_dev_get(dev); 2036 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1918 const struct net_device_ops *ops; 2037 const struct net_device_ops *ops;
@@ -1937,7 +2056,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
1937 return -EINVAL; 2056 return -EINVAL;
1938 ops = upper_dev->netdev_ops; 2057 ops = upper_dev->netdev_ops;
1939 if (ops->ndo_add_slave) { 2058 if (ops->ndo_add_slave) {
1940 err = ops->ndo_add_slave(upper_dev, dev); 2059 err = ops->ndo_add_slave(upper_dev, dev, extack);
1941 if (err) 2060 if (err)
1942 return err; 2061 return err;
1943 } else { 2062 } else {
@@ -2070,7 +2189,7 @@ static int do_setlink(const struct sk_buff *skb,
2070 } 2189 }
2071 2190
2072 if (tb[IFLA_MASTER]) { 2191 if (tb[IFLA_MASTER]) {
2073 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); 2192 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
2074 if (err) 2193 if (err)
2075 goto errout; 2194 goto errout;
2076 status |= DO_SETLINK_MODIFIED; 2195 status |= DO_SETLINK_MODIFIED;
@@ -2193,13 +2312,17 @@ static int do_setlink(const struct sk_buff *skb,
2193 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { 2312 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
2194 const struct rtnl_af_ops *af_ops; 2313 const struct rtnl_af_ops *af_ops;
2195 2314
2196 if (!(af_ops = rtnl_af_lookup(nla_type(af)))) 2315 rcu_read_lock();
2197 BUG(); 2316
2317 BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
2198 2318
2199 err = af_ops->set_link_af(dev, af); 2319 err = af_ops->set_link_af(dev, af);
2200 if (err < 0) 2320 if (err < 0) {
2321 rcu_read_unlock();
2201 goto errout; 2322 goto errout;
2323 }
2202 2324
2325 rcu_read_unlock();
2203 status |= DO_SETLINK_NOTIFY; 2326 status |= DO_SETLINK_NOTIFY;
2204 } 2327 }
2205 } 2328 }
@@ -2277,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2277 if (err < 0) 2400 if (err < 0)
2278 goto errout; 2401 goto errout;
2279 2402
2403 if (tb[IFLA_IF_NETNSID])
2404 return -EOPNOTSUPP;
2405
2280 if (tb[IFLA_IFNAME]) 2406 if (tb[IFLA_IFNAME])
2281 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2407 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2282 else 2408 else
@@ -2371,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
2371 if (err < 0) 2497 if (err < 0)
2372 return err; 2498 return err;
2373 2499
2500 if (tb[IFLA_IF_NETNSID])
2501 return -EOPNOTSUPP;
2502
2374 if (tb[IFLA_IFNAME]) 2503 if (tb[IFLA_IFNAME])
2375 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2504 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2376 2505
@@ -2502,6 +2631,9 @@ replay:
2502 if (err < 0) 2631 if (err < 0)
2503 return err; 2632 return err;
2504 2633
2634 if (tb[IFLA_IF_NETNSID])
2635 return -EOPNOTSUPP;
2636
2505 if (tb[IFLA_IFNAME]) 2637 if (tb[IFLA_IFNAME])
2506 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2638 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2507 else 2639 else
@@ -2579,12 +2711,6 @@ replay:
2579 return err; 2711 return err;
2580 slave_data = slave_attr; 2712 slave_data = slave_attr;
2581 } 2713 }
2582 if (m_ops->slave_validate) {
2583 err = m_ops->slave_validate(tb, slave_data,
2584 extack);
2585 if (err < 0)
2586 return err;
2587 }
2588 } 2714 }
2589 2715
2590 if (dev) { 2716 if (dev) {
@@ -2714,7 +2840,8 @@ replay:
2714 goto out_unregister; 2840 goto out_unregister;
2715 } 2841 }
2716 if (tb[IFLA_MASTER]) { 2842 if (tb[IFLA_MASTER]) {
2717 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); 2843 err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
2844 extack);
2718 if (err) 2845 if (err)
2719 goto out_unregister; 2846 goto out_unregister;
2720 } 2847 }
@@ -2740,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2740 struct netlink_ext_ack *extack) 2867 struct netlink_ext_ack *extack)
2741{ 2868{
2742 struct net *net = sock_net(skb->sk); 2869 struct net *net = sock_net(skb->sk);
2870 struct net *tgt_net = net;
2743 struct ifinfomsg *ifm; 2871 struct ifinfomsg *ifm;
2744 char ifname[IFNAMSIZ]; 2872 char ifname[IFNAMSIZ];
2745 struct nlattr *tb[IFLA_MAX+1]; 2873 struct nlattr *tb[IFLA_MAX+1];
2746 struct net_device *dev = NULL; 2874 struct net_device *dev = NULL;
2747 struct sk_buff *nskb; 2875 struct sk_buff *nskb;
2876 int netnsid = -1;
2748 int err; 2877 int err;
2749 u32 ext_filter_mask = 0; 2878 u32 ext_filter_mask = 0;
2750 2879
@@ -2752,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2752 if (err < 0) 2881 if (err < 0)
2753 return err; 2882 return err;
2754 2883
2884 if (tb[IFLA_IF_NETNSID]) {
2885 netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
2886 tgt_net = get_target_net(skb, netnsid);
2887 if (IS_ERR(tgt_net))
2888 return PTR_ERR(tgt_net);
2889 }
2890
2755 if (tb[IFLA_IFNAME]) 2891 if (tb[IFLA_IFNAME])
2756 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2892 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
2757 2893
2758 if (tb[IFLA_EXT_MASK]) 2894 if (tb[IFLA_EXT_MASK])
2759 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 2895 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
2760 2896
2897 err = -EINVAL;
2761 ifm = nlmsg_data(nlh); 2898 ifm = nlmsg_data(nlh);
2762 if (ifm->ifi_index > 0) 2899 if (ifm->ifi_index > 0)
2763 dev = __dev_get_by_index(net, ifm->ifi_index); 2900 dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
2764 else if (tb[IFLA_IFNAME]) 2901 else if (tb[IFLA_IFNAME])
2765 dev = __dev_get_by_name(net, ifname); 2902 dev = __dev_get_by_name(tgt_net, ifname);
2766 else 2903 else
2767 return -EINVAL; 2904 goto out;
2768 2905
2906 err = -ENODEV;
2769 if (dev == NULL) 2907 if (dev == NULL)
2770 return -ENODEV; 2908 goto out;
2771 2909
2910 err = -ENOBUFS;
2772 nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); 2911 nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
2773 if (nskb == NULL) 2912 if (nskb == NULL)
2774 return -ENOBUFS; 2913 goto out;
2775 2914
2776 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, 2915 err = rtnl_fill_ifinfo(nskb, dev, net,
2777 nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); 2916 RTM_NEWLINK, NETLINK_CB(skb).portid,
2917 nlh->nlmsg_seq, 0, 0, ext_filter_mask,
2918 0, NULL, netnsid);
2778 if (err < 0) { 2919 if (err < 0) {
2779 /* -EMSGSIZE implies BUG in if_nlmsg_size */ 2920 /* -EMSGSIZE implies BUG in if_nlmsg_size */
2780 WARN_ON(err == -EMSGSIZE); 2921 WARN_ON(err == -EMSGSIZE);
2781 kfree_skb(nskb); 2922 kfree_skb(nskb);
2782 } else 2923 } else
2783 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); 2924 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
2925out:
2926 if (netnsid >= 0)
2927 put_net(tgt_net);
2784 2928
2785 return err; 2929 return err;
2786} 2930}
@@ -2859,7 +3003,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
2859 3003
2860struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, 3004struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
2861 unsigned int change, 3005 unsigned int change,
2862 u32 event, gfp_t flags) 3006 u32 event, gfp_t flags, int *new_nsid)
2863{ 3007{
2864 struct net *net = dev_net(dev); 3008 struct net *net = dev_net(dev);
2865 struct sk_buff *skb; 3009 struct sk_buff *skb;
@@ -2870,7 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
2870 if (skb == NULL) 3014 if (skb == NULL)
2871 goto errout; 3015 goto errout;
2872 3016
2873 err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); 3017 err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
3018 type, 0, 0, change, 0, 0, event,
3019 new_nsid, -1);
2874 if (err < 0) { 3020 if (err < 0) {
2875 /* -EMSGSIZE implies BUG in if_nlmsg_size() */ 3021 /* -EMSGSIZE implies BUG in if_nlmsg_size() */
2876 WARN_ON(err == -EMSGSIZE); 3022 WARN_ON(err == -EMSGSIZE);
@@ -2893,14 +3039,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
2893 3039
2894static void rtmsg_ifinfo_event(int type, struct net_device *dev, 3040static void rtmsg_ifinfo_event(int type, struct net_device *dev,
2895 unsigned int change, u32 event, 3041 unsigned int change, u32 event,
2896 gfp_t flags) 3042 gfp_t flags, int *new_nsid)
2897{ 3043{
2898 struct sk_buff *skb; 3044 struct sk_buff *skb;
2899 3045
2900 if (dev->reg_state != NETREG_REGISTERED) 3046 if (dev->reg_state != NETREG_REGISTERED)
2901 return; 3047 return;
2902 3048
2903 skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); 3049 skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
2904 if (skb) 3050 if (skb)
2905 rtmsg_ifinfo_send(skb, dev, flags); 3051 rtmsg_ifinfo_send(skb, dev, flags);
2906} 3052}
@@ -2908,9 +3054,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
2908void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, 3054void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
2909 gfp_t flags) 3055 gfp_t flags)
2910{ 3056{
2911 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags); 3057 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
3058}
3059
3060void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
3061 gfp_t flags, int *new_nsid)
3062{
3063 rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
3064 new_nsid);
2912} 3065}
2913EXPORT_SYMBOL(rtmsg_ifinfo);
2914 3066
2915static int nlmsg_populate_fdb_fill(struct sk_buff *skb, 3067static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
2916 struct net_device *dev, 3068 struct net_device *dev,
@@ -3017,21 +3169,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
3017} 3169}
3018EXPORT_SYMBOL(ndo_dflt_fdb_add); 3170EXPORT_SYMBOL(ndo_dflt_fdb_add);
3019 3171
3020static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid) 3172static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
3173 struct netlink_ext_ack *extack)
3021{ 3174{
3022 u16 vid = 0; 3175 u16 vid = 0;
3023 3176
3024 if (vlan_attr) { 3177 if (vlan_attr) {
3025 if (nla_len(vlan_attr) != sizeof(u16)) { 3178 if (nla_len(vlan_attr) != sizeof(u16)) {
3026 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n"); 3179 NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
3027 return -EINVAL; 3180 return -EINVAL;
3028 } 3181 }
3029 3182
3030 vid = nla_get_u16(vlan_attr); 3183 vid = nla_get_u16(vlan_attr);
3031 3184
3032 if (!vid || vid >= VLAN_VID_MASK) { 3185 if (!vid || vid >= VLAN_VID_MASK) {
3033 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n", 3186 NL_SET_ERR_MSG(extack, "invalid vlan id");
3034 vid);
3035 return -EINVAL; 3187 return -EINVAL;
3036 } 3188 }
3037 } 3189 }
@@ -3056,24 +3208,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
3056 3208
3057 ndm = nlmsg_data(nlh); 3209 ndm = nlmsg_data(nlh);
3058 if (ndm->ndm_ifindex == 0) { 3210 if (ndm->ndm_ifindex == 0) {
3059 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n"); 3211 NL_SET_ERR_MSG(extack, "invalid ifindex");
3060 return -EINVAL; 3212 return -EINVAL;
3061 } 3213 }
3062 3214
3063 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 3215 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
3064 if (dev == NULL) { 3216 if (dev == NULL) {
3065 pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n"); 3217 NL_SET_ERR_MSG(extack, "unknown ifindex");
3066 return -ENODEV; 3218 return -ENODEV;
3067 } 3219 }
3068 3220
3069 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { 3221 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
3070 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n"); 3222 NL_SET_ERR_MSG(extack, "invalid address");
3071 return -EINVAL; 3223 return -EINVAL;
3072 } 3224 }
3073 3225
3074 addr = nla_data(tb[NDA_LLADDR]); 3226 addr = nla_data(tb[NDA_LLADDR]);
3075 3227
3076 err = fdb_vid_parse(tb[NDA_VLAN], &vid); 3228 err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
3077 if (err) 3229 if (err)
3078 return err; 3230 return err;
3079 3231
@@ -3160,24 +3312,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
3160 3312
3161 ndm = nlmsg_data(nlh); 3313 ndm = nlmsg_data(nlh);
3162 if (ndm->ndm_ifindex == 0) { 3314 if (ndm->ndm_ifindex == 0) {
3163 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n"); 3315 NL_SET_ERR_MSG(extack, "invalid ifindex");
3164 return -EINVAL; 3316 return -EINVAL;
3165 } 3317 }
3166 3318
3167 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 3319 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
3168 if (dev == NULL) { 3320 if (dev == NULL) {
3169 pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n"); 3321 NL_SET_ERR_MSG(extack, "unknown ifindex");
3170 return -ENODEV; 3322 return -ENODEV;
3171 } 3323 }
3172 3324
3173 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { 3325 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
3174 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); 3326 NL_SET_ERR_MSG(extack, "invalid address");
3175 return -EINVAL; 3327 return -EINVAL;
3176 } 3328 }
3177 3329
3178 addr = nla_data(tb[NDA_LLADDR]); 3330 addr = nla_data(tb[NDA_LLADDR]);
3179 3331
3180 err = fdb_vid_parse(tb[NDA_VLAN], &vid); 3332 err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
3181 if (err) 3333 if (err)
3182 return err; 3334 return err;
3183 3335
@@ -3617,7 +3769,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
3617 3769
3618 dev = __dev_get_by_index(net, ifm->ifi_index); 3770 dev = __dev_get_by_index(net, ifm->ifi_index);
3619 if (!dev) { 3771 if (!dev) {
3620 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); 3772 NL_SET_ERR_MSG(extack, "unknown ifindex");
3621 return -ENODEV; 3773 return -ENODEV;
3622 } 3774 }
3623 3775
@@ -3692,7 +3844,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
3692 3844
3693 dev = __dev_get_by_index(net, ifm->ifi_index); 3845 dev = __dev_get_by_index(net, ifm->ifi_index);
3694 if (!dev) { 3846 if (!dev) {
3695 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); 3847 NL_SET_ERR_MSG(extack, "unknown ifindex");
3696 return -ENODEV; 3848 return -ENODEV;
3697 } 3849 }
3698 3850
@@ -3943,25 +4095,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
3943 if (!attr) 4095 if (!attr)
3944 goto nla_put_failure; 4096 goto nla_put_failure;
3945 4097
3946 list_for_each_entry(af_ops, &rtnl_af_ops, list) { 4098 rcu_read_lock();
4099 list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
3947 if (af_ops->fill_stats_af) { 4100 if (af_ops->fill_stats_af) {
3948 struct nlattr *af; 4101 struct nlattr *af;
3949 int err; 4102 int err;
3950 4103
3951 af = nla_nest_start(skb, af_ops->family); 4104 af = nla_nest_start(skb, af_ops->family);
3952 if (!af) 4105 if (!af) {
4106 rcu_read_unlock();
3953 goto nla_put_failure; 4107 goto nla_put_failure;
3954 4108 }
3955 err = af_ops->fill_stats_af(skb, dev); 4109 err = af_ops->fill_stats_af(skb, dev);
3956 4110
3957 if (err == -ENODATA) 4111 if (err == -ENODATA) {
3958 nla_nest_cancel(skb, af); 4112 nla_nest_cancel(skb, af);
3959 else if (err < 0) 4113 } else if (err < 0) {
4114 rcu_read_unlock();
3960 goto nla_put_failure; 4115 goto nla_put_failure;
4116 }
3961 4117
3962 nla_nest_end(skb, af); 4118 nla_nest_end(skb, af);
3963 } 4119 }
3964 } 4120 }
4121 rcu_read_unlock();
3965 4122
3966 nla_nest_end(skb, attr); 4123 nla_nest_end(skb, attr);
3967 4124
@@ -4030,7 +4187,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
4030 /* for IFLA_STATS_AF_SPEC */ 4187 /* for IFLA_STATS_AF_SPEC */
4031 size += nla_total_size(0); 4188 size += nla_total_size(0);
4032 4189
4033 list_for_each_entry(af_ops, &rtnl_af_ops, list) { 4190 rcu_read_lock();
4191 list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
4034 if (af_ops->get_stats_af_size) { 4192 if (af_ops->get_stats_af_size) {
4035 size += nla_total_size( 4193 size += nla_total_size(
4036 af_ops->get_stats_af_size(dev)); 4194 af_ops->get_stats_af_size(dev));
@@ -4039,6 +4197,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
4039 size += nla_total_size(0); 4197 size += nla_total_size(0);
4040 } 4198 }
4041 } 4199 }
4200 rcu_read_unlock();
4042 } 4201 }
4043 4202
4044 return size; 4203 return size;
@@ -4292,9 +4451,10 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
4292 case NETDEV_CHANGEUPPER: 4451 case NETDEV_CHANGEUPPER:
4293 case NETDEV_RESEND_IGMP: 4452 case NETDEV_RESEND_IGMP:
4294 case NETDEV_CHANGEINFODATA: 4453 case NETDEV_CHANGEINFODATA:
4454 case NETDEV_CHANGELOWERSTATE:
4295 case NETDEV_CHANGE_TX_QUEUE_LEN: 4455 case NETDEV_CHANGE_TX_QUEUE_LEN:
4296 rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), 4456 rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
4297 GFP_KERNEL); 4457 GFP_KERNEL, NULL);
4298 break; 4458 break;
4299 default: 4459 default:
4300 break; 4460 break;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e140ba49b30a..6b0ff396fa9d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -41,7 +41,6 @@
41#include <linux/module.h> 41#include <linux/module.h>
42#include <linux/types.h> 42#include <linux/types.h>
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/kmemcheck.h>
45#include <linux/mm.h> 44#include <linux/mm.h>
46#include <linux/interrupt.h> 45#include <linux/interrupt.h>
47#include <linux/in.h> 46#include <linux/in.h>
@@ -234,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
234 shinfo = skb_shinfo(skb); 233 shinfo = skb_shinfo(skb);
235 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); 234 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
236 atomic_set(&shinfo->dataref, 1); 235 atomic_set(&shinfo->dataref, 1);
237 kmemcheck_annotate_variable(shinfo->destructor_arg);
238 236
239 if (flags & SKB_ALLOC_FCLONE) { 237 if (flags & SKB_ALLOC_FCLONE) {
240 struct sk_buff_fclones *fclones; 238 struct sk_buff_fclones *fclones;
241 239
242 fclones = container_of(skb, struct sk_buff_fclones, skb1); 240 fclones = container_of(skb, struct sk_buff_fclones, skb1);
243 241
244 kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
245 skb->fclone = SKB_FCLONE_ORIG; 242 skb->fclone = SKB_FCLONE_ORIG;
246 refcount_set(&fclones->fclone_ref, 1); 243 refcount_set(&fclones->fclone_ref, 1);
247 244
@@ -301,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
301 shinfo = skb_shinfo(skb); 298 shinfo = skb_shinfo(skb);
302 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); 299 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
303 atomic_set(&shinfo->dataref, 1); 300 atomic_set(&shinfo->dataref, 1);
304 kmemcheck_annotate_variable(shinfo->destructor_arg);
305 301
306 return skb; 302 return skb;
307} 303}
@@ -357,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
357 */ 353 */
358void *netdev_alloc_frag(unsigned int fragsz) 354void *netdev_alloc_frag(unsigned int fragsz)
359{ 355{
360 return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); 356 return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
361} 357}
362EXPORT_SYMBOL(netdev_alloc_frag); 358EXPORT_SYMBOL(netdev_alloc_frag);
363 359
@@ -370,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
370 366
371void *napi_alloc_frag(unsigned int fragsz) 367void *napi_alloc_frag(unsigned int fragsz)
372{ 368{
373 return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); 369 return __napi_alloc_frag(fragsz, GFP_ATOMIC);
374} 370}
375EXPORT_SYMBOL(napi_alloc_frag); 371EXPORT_SYMBOL(napi_alloc_frag);
376 372
@@ -1283,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
1283 if (!n) 1279 if (!n)
1284 return NULL; 1280 return NULL;
1285 1281
1286 kmemcheck_annotate_bitfield(n, flags1);
1287 n->fclone = SKB_FCLONE_UNAVAILABLE; 1282 n->fclone = SKB_FCLONE_UNAVAILABLE;
1288 } 1283 }
1289 1284
@@ -1354,8 +1349,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
1354 /* Set the tail pointer and length */ 1349 /* Set the tail pointer and length */
1355 skb_put(n, skb->len); 1350 skb_put(n, skb->len);
1356 1351
1357 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 1352 BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
1358 BUG();
1359 1353
1360 copy_skb_header(n, skb); 1354 copy_skb_header(n, skb);
1361 return n; 1355 return n;
@@ -1453,8 +1447,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1453 1447
1454 BUG_ON(nhead < 0); 1448 BUG_ON(nhead < 0);
1455 1449
1456 if (skb_shared(skb)) 1450 BUG_ON(skb_shared(skb));
1457 BUG();
1458 1451
1459 size = SKB_DATA_ALIGN(size); 1452 size = SKB_DATA_ALIGN(size);
1460 1453
@@ -1513,6 +1506,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1513 skb->nohdr = 0; 1506 skb->nohdr = 0;
1514 atomic_set(&skb_shinfo(skb)->dataref, 1); 1507 atomic_set(&skb_shinfo(skb)->dataref, 1);
1515 1508
1509 skb_metadata_clear(skb);
1510
1516 /* It is not generally safe to change skb->truesize. 1511 /* It is not generally safe to change skb->truesize.
1517 * For the moment, we really care of rx path, or 1512 * For the moment, we really care of rx path, or
1518 * when skb is orphaned (not attached to a socket). 1513 * when skb is orphaned (not attached to a socket).
@@ -1597,9 +1592,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1597 head_copy_off = newheadroom - head_copy_len; 1592 head_copy_off = newheadroom - head_copy_len;
1598 1593
1599 /* Copy the linear header and data. */ 1594 /* Copy the linear header and data. */
1600 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 1595 BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
1601 skb->len + head_copy_len)) 1596 skb->len + head_copy_len));
1602 BUG();
1603 1597
1604 copy_skb_header(n, skb); 1598 copy_skb_header(n, skb);
1605 1599
@@ -1880,8 +1874,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
1880 return NULL; 1874 return NULL;
1881 } 1875 }
1882 1876
1883 if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) 1877 BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
1884 BUG(); 1878 skb_tail_pointer(skb), delta));
1885 1879
1886 /* Optimization: no fragments, no reasons to preestimate 1880 /* Optimization: no fragments, no reasons to preestimate
1887 * size of pulled pages. Superb. 1881 * size of pulled pages. Superb.
@@ -2852,12 +2846,15 @@ EXPORT_SYMBOL(skb_queue_purge);
2852 */ 2846 */
2853void skb_rbtree_purge(struct rb_root *root) 2847void skb_rbtree_purge(struct rb_root *root)
2854{ 2848{
2855 struct sk_buff *skb, *next; 2849 struct rb_node *p = rb_first(root);
2856 2850
2857 rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) 2851 while (p) {
2858 kfree_skb(skb); 2852 struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
2859 2853
2860 *root = RB_ROOT; 2854 p = rb_next(p);
2855 rb_erase(&skb->rbnode, root);
2856 kfree_skb(skb);
2857 }
2861} 2858}
2862 2859
2863/** 2860/**
@@ -4766,6 +4763,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
4766bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, 4763bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4767 bool *fragstolen, int *delta_truesize) 4764 bool *fragstolen, int *delta_truesize)
4768{ 4765{
4766 struct skb_shared_info *to_shinfo, *from_shinfo;
4769 int i, delta, len = from->len; 4767 int i, delta, len = from->len;
4770 4768
4771 *fragstolen = false; 4769 *fragstolen = false;
@@ -4780,7 +4778,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4780 return true; 4778 return true;
4781 } 4779 }
4782 4780
4783 if (skb_has_frag_list(to) || skb_has_frag_list(from)) 4781 to_shinfo = skb_shinfo(to);
4782 from_shinfo = skb_shinfo(from);
4783 if (to_shinfo->frag_list || from_shinfo->frag_list)
4784 return false; 4784 return false;
4785 if (skb_zcopy(to) || skb_zcopy(from)) 4785 if (skb_zcopy(to) || skb_zcopy(from))
4786 return false; 4786 return false;
@@ -4789,8 +4789,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4789 struct page *page; 4789 struct page *page;
4790 unsigned int offset; 4790 unsigned int offset;
4791 4791
4792 if (skb_shinfo(to)->nr_frags + 4792 if (to_shinfo->nr_frags +
4793 skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) 4793 from_shinfo->nr_frags >= MAX_SKB_FRAGS)
4794 return false; 4794 return false;
4795 4795
4796 if (skb_head_is_locked(from)) 4796 if (skb_head_is_locked(from))
@@ -4801,12 +4801,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4801 page = virt_to_head_page(from->head); 4801 page = virt_to_head_page(from->head);
4802 offset = from->data - (unsigned char *)page_address(page); 4802 offset = from->data - (unsigned char *)page_address(page);
4803 4803
4804 skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, 4804 skb_fill_page_desc(to, to_shinfo->nr_frags,
4805 page, offset, skb_headlen(from)); 4805 page, offset, skb_headlen(from));
4806 *fragstolen = true; 4806 *fragstolen = true;
4807 } else { 4807 } else {
4808 if (skb_shinfo(to)->nr_frags + 4808 if (to_shinfo->nr_frags +
4809 skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) 4809 from_shinfo->nr_frags > MAX_SKB_FRAGS)
4810 return false; 4810 return false;
4811 4811
4812 delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); 4812 delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@ -4814,19 +4814,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
4814 4814
4815 WARN_ON_ONCE(delta < len); 4815 WARN_ON_ONCE(delta < len);
4816 4816
4817 memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, 4817 memcpy(to_shinfo->frags + to_shinfo->nr_frags,
4818 skb_shinfo(from)->frags, 4818 from_shinfo->frags,
4819 skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); 4819 from_shinfo->nr_frags * sizeof(skb_frag_t));
4820 skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; 4820 to_shinfo->nr_frags += from_shinfo->nr_frags;
4821 4821
4822 if (!skb_cloned(from)) 4822 if (!skb_cloned(from))
4823 skb_shinfo(from)->nr_frags = 0; 4823 from_shinfo->nr_frags = 0;
4824 4824
4825 /* if the skb is not cloned this does nothing 4825 /* if the skb is not cloned this does nothing
4826 * since we set nr_frags to 0. 4826 * since we set nr_frags to 0.
4827 */ 4827 */
4828 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) 4828 for (i = 0; i < from_shinfo->nr_frags; i++)
4829 skb_frag_ref(from, i); 4829 __skb_frag_ref(&from_shinfo->frags[i]);
4830 4830
4831 to->truesize += delta; 4831 to->truesize += delta;
4832 to->len += len; 4832 to->len += len;
diff --git a/net/core/sock.c b/net/core/sock.c
index 415f441c63b9..c0b5b2f17412 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1469,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1469 sk = kmalloc(prot->obj_size, priority); 1469 sk = kmalloc(prot->obj_size, priority);
1470 1470
1471 if (sk != NULL) { 1471 if (sk != NULL) {
1472 kmemcheck_annotate_bitfield(sk, flags);
1473
1474 if (security_sk_alloc(sk, family, priority)) 1472 if (security_sk_alloc(sk, family, priority))
1475 goto out_free; 1473 goto out_free;
1476 1474
@@ -2346,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2346 2344
2347 /* guarantee minimum buffer size under pressure */ 2345 /* guarantee minimum buffer size under pressure */
2348 if (kind == SK_MEM_RECV) { 2346 if (kind == SK_MEM_RECV) {
2349 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) 2347 if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
2350 return 1; 2348 return 1;
2351 2349
2352 } else { /* SK_MEM_SEND */ 2350 } else { /* SK_MEM_SEND */
2351 int wmem0 = sk_get_wmem0(sk, prot);
2352
2353 if (sk->sk_type == SOCK_STREAM) { 2353 if (sk->sk_type == SOCK_STREAM) {
2354 if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) 2354 if (sk->sk_wmem_queued < wmem0)
2355 return 1; 2355 return 1;
2356 } else if (refcount_read(&sk->sk_wmem_alloc) < 2356 } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
2357 prot->sysctl_wmem[0])
2358 return 1; 2357 return 1;
2358 }
2359 } 2359 }
2360 2360
2361 if (sk_has_memory_pressure(sk)) { 2361 if (sk_has_memory_pressure(sk)) {
@@ -2685,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2685 sk_init_common(sk); 2685 sk_init_common(sk);
2686 sk->sk_send_head = NULL; 2686 sk->sk_send_head = NULL;
2687 2687
2688 init_timer(&sk->sk_timer); 2688 timer_setup(&sk->sk_timer, NULL, 0);
2689 2689
2690 sk->sk_allocation = GFP_KERNEL; 2690 sk->sk_allocation = GFP_KERNEL;
2691 sk->sk_rcvbuf = sysctl_rmem_default; 2691 sk->sk_rcvbuf = sysctl_rmem_default;
@@ -2744,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2744 2744
2745 sk->sk_max_pacing_rate = ~0U; 2745 sk->sk_max_pacing_rate = ~0U;
2746 sk->sk_pacing_rate = ~0U; 2746 sk->sk_pacing_rate = ~0U;
2747 sk->sk_pacing_shift = 10;
2747 sk->sk_incoming_cpu = -1; 2748 sk->sk_incoming_cpu = -1;
2748 /* 2749 /*
2749 * Before updating sk_refcnt, we must commit prior changes to memory 2750 * Before updating sk_refcnt, we must commit prior changes to memory
@@ -3042,7 +3043,6 @@ struct prot_inuse {
3042 3043
3043static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); 3044static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3044 3045
3045#ifdef CONFIG_NET_NS
3046void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 3046void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3047{ 3047{
3048 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); 3048 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@ -3086,27 +3086,6 @@ static __init int net_inuse_init(void)
3086} 3086}
3087 3087
3088core_initcall(net_inuse_init); 3088core_initcall(net_inuse_init);
3089#else
3090static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
3091
3092void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3093{
3094 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
3095}
3096EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3097
3098int sock_prot_inuse_get(struct net *net, struct proto *prot)
3099{
3100 int cpu, idx = prot->inuse_idx;
3101 int res = 0;
3102
3103 for_each_possible_cpu(cpu)
3104 res += per_cpu(prot_inuse, cpu).val[idx];
3105
3106 return res >= 0 ? res : 0;
3107}
3108EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3109#endif
3110 3089
3111static void assign_proto_idx(struct proto *prot) 3090static void assign_proto_idx(struct proto *prot)
3112{ 3091{
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e1295d5f2c56..1c75cd1255f6 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,10 +126,10 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
126 DCCPF_SEQ_WMAX)); 126 DCCPF_SEQ_WMAX));
127} 127}
128 128
129static void ccid2_hc_tx_rto_expire(unsigned long data) 129static void ccid2_hc_tx_rto_expire(struct timer_list *t)
130{ 130{
131 struct sock *sk = (struct sock *)data; 131 struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
132 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 132 struct sock *sk = hc->sk;
133 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); 133 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
134 134
135 bh_lock_sock(sk); 135 bh_lock_sock(sk);
@@ -733,8 +733,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
733 hc->tx_rpdupack = -1; 733 hc->tx_rpdupack = -1;
734 hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32; 734 hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
735 hc->tx_cwnd_used = 0; 735 hc->tx_cwnd_used = 0;
736 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 736 hc->sk = sk;
737 (unsigned long)sk); 737 timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0);
738 INIT_LIST_HEAD(&hc->tx_av_chunks); 738 INIT_LIST_HEAD(&hc->tx_av_chunks);
739 return 0; 739 return 0;
740} 740}
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 6e50ef2898fb..1af0116dc6ce 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -85,6 +85,7 @@ struct ccid2_hc_tx_sock {
85 tx_rto; 85 tx_rto;
86 u64 tx_rtt_seq:48; 86 u64 tx_rtt_seq:48;
87 struct timer_list tx_rtotimer; 87 struct timer_list tx_rtotimer;
88 struct sock *sk;
88 89
89 /* Congestion Window validation (optional, RFC 2861) */ 90 /* Congestion Window validation (optional, RFC 2861) */
90 u32 tx_cwnd_used, 91 u32 tx_cwnd_used,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 119c04317d48..8b5ba6dffac7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -195,10 +195,10 @@ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc,
195 } 195 }
196} 196}
197 197
198static void ccid3_hc_tx_no_feedback_timer(unsigned long data) 198static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t)
199{ 199{
200 struct sock *sk = (struct sock *)data; 200 struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer);
201 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 201 struct sock *sk = hc->sk;
202 unsigned long t_nfb = USEC_PER_SEC / 5; 202 unsigned long t_nfb = USEC_PER_SEC / 5;
203 203
204 bh_lock_sock(sk); 204 bh_lock_sock(sk);
@@ -505,8 +505,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
505 505
506 hc->tx_state = TFRC_SSTATE_NO_SENT; 506 hc->tx_state = TFRC_SSTATE_NO_SENT;
507 hc->tx_hist = NULL; 507 hc->tx_hist = NULL;
508 setup_timer(&hc->tx_no_feedback_timer, 508 hc->sk = sk;
509 ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); 509 timer_setup(&hc->tx_no_feedback_timer,
510 ccid3_hc_tx_no_feedback_timer, 0);
510 return 0; 511 return 0;
511} 512}
512 513
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 1a9933c29672..813d91c6e1e2 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -106,6 +106,7 @@ struct ccid3_hc_tx_sock {
106 u8 tx_last_win_count; 106 u8 tx_last_win_count;
107 ktime_t tx_t_last_win_count; 107 ktime_t tx_t_last_win_count;
108 struct timer_list tx_no_feedback_timer; 108 struct timer_list tx_no_feedback_timer;
109 struct sock *sk;
109 ktime_t tx_t_ld; 110 ktime_t tx_t_ld;
110 ktime_t tx_t_nom; 111 ktime_t tx_t_nom;
111 struct tfrc_tx_hist_entry *tx_hist; 112 struct tfrc_tx_hist_entry *tx_hist;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 08df7a3acb3d..876e18592d71 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -149,10 +149,8 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
149{ 149{
150 const u8 idx_a = tfrc_rx_hist_index(h, a), 150 const u8 idx_a = tfrc_rx_hist_index(h, a),
151 idx_b = tfrc_rx_hist_index(h, b); 151 idx_b = tfrc_rx_hist_index(h, b);
152 struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
153 152
154 h->ring[idx_a] = h->ring[idx_b]; 153 swap(h->ring[idx_a], h->ring[idx_b]);
155 h->ring[idx_b] = tmp;
156} 154}
157 155
158/* 156/*
diff --git a/net/dccp/input.c b/net/dccp/input.c
index fa6be9750bb4..d28d46bff6ab 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -534,6 +534,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
534 case DCCP_PKT_DATA: 534 case DCCP_PKT_DATA:
535 if (sk->sk_state == DCCP_RESPOND) 535 if (sk->sk_state == DCCP_RESPOND)
536 break; 536 break;
537 /* fall through */
537 case DCCP_PKT_DATAACK: 538 case DCCP_PKT_DATAACK:
538 case DCCP_PKT_ACK: 539 case DCCP_PKT_ACK:
539 /* 540 /*
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 51cdfc3bd8ca..4e40db017e19 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -227,8 +227,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
227 * Ack vectors are processed by the TX CCID if it is 227 * Ack vectors are processed by the TX CCID if it is
228 * interested. The RX CCID need not parse Ack Vectors, 228 * interested. The RX CCID need not parse Ack Vectors,
229 * since it is only interested in clearing old state. 229 * since it is only interested in clearing old state.
230 * Fall through.
231 */ 230 */
231 /* fall through */
232 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: 232 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
233 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, 233 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
234 pkt_type, opt, value, len)) 234 pkt_type, opt, value, len))
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3a2c34027758..b50a8732ff43 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk)
125 __sk_dst_reset(sk); 125 __sk_dst_reset(sk);
126} 126}
127 127
128static void dccp_write_timer(unsigned long data) 128static void dccp_write_timer(struct timer_list *t)
129{ 129{
130 struct sock *sk = (struct sock *)data; 130 struct inet_connection_sock *icsk =
131 struct inet_connection_sock *icsk = inet_csk(sk); 131 from_timer(icsk, t, icsk_retransmit_timer);
132 struct sock *sk = &icsk->icsk_inet.sk;
132 int event = 0; 133 int event = 0;
133 134
134 bh_lock_sock(sk); 135 bh_lock_sock(sk);
@@ -161,19 +162,20 @@ out:
161 sock_put(sk); 162 sock_put(sk);
162} 163}
163 164
164static void dccp_keepalive_timer(unsigned long data) 165static void dccp_keepalive_timer(struct timer_list *t)
165{ 166{
166 struct sock *sk = (struct sock *)data; 167 struct sock *sk = from_timer(sk, t, sk_timer);
167 168
168 pr_err("dccp should not use a keepalive timer !\n"); 169 pr_err("dccp should not use a keepalive timer !\n");
169 sock_put(sk); 170 sock_put(sk);
170} 171}
171 172
172/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ 173/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
173static void dccp_delack_timer(unsigned long data) 174static void dccp_delack_timer(struct timer_list *t)
174{ 175{
175 struct sock *sk = (struct sock *)data; 176 struct inet_connection_sock *icsk =
176 struct inet_connection_sock *icsk = inet_csk(sk); 177 from_timer(icsk, t, icsk_delack_timer);
178 struct sock *sk = &icsk->icsk_inet.sk;
177 179
178 bh_lock_sock(sk); 180 bh_lock_sock(sk);
179 if (sock_owned_by_user(sk)) { 181 if (sock_owned_by_user(sk)) {
@@ -232,10 +234,13 @@ static void dccp_write_xmitlet(unsigned long data)
232 bh_unlock_sock(sk); 234 bh_unlock_sock(sk);
233} 235}
234 236
235static void dccp_write_xmit_timer(unsigned long data) 237static void dccp_write_xmit_timer(struct timer_list *t)
236{ 238{
237 dccp_write_xmitlet(data); 239 struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer);
238 sock_put((struct sock *)data); 240 struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
241
242 dccp_write_xmitlet((unsigned long)sk);
243 sock_put(sk);
239} 244}
240 245
241void dccp_init_xmit_timers(struct sock *sk) 246void dccp_init_xmit_timers(struct sock *sk)
@@ -243,8 +248,7 @@ void dccp_init_xmit_timers(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk); 248 struct dccp_sock *dp = dccp_sk(sk);
244 249
245 tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); 250 tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
246 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 251 timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0);
247 (unsigned long)sk);
248 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, 252 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
249 &dccp_keepalive_timer); 253 &dccp_keepalive_timer);
250} 254}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 73a0399dc7a2..518cea17b811 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -533,10 +533,6 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
533 scp->keepalive = 10 * HZ; 533 scp->keepalive = 10 * HZ;
534 scp->keepalive_fxn = dn_keepalive; 534 scp->keepalive_fxn = dn_keepalive;
535 535
536 init_timer(&scp->delack_timer);
537 scp->delack_pending = 0;
538 scp->delack_fxn = dn_nsp_delayed_ack;
539
540 dn_start_slow_timer(sk); 536 dn_start_slow_timer(sk);
541out: 537out:
542 return sk; 538 return sk;
@@ -634,10 +630,12 @@ static void dn_destroy_sock(struct sock *sk)
634 goto disc_reject; 630 goto disc_reject;
635 case DN_RUN: 631 case DN_RUN:
636 scp->state = DN_DI; 632 scp->state = DN_DI;
633 /* fall through */
637 case DN_DI: 634 case DN_DI:
638 case DN_DR: 635 case DN_DR:
639disc_reject: 636disc_reject:
640 dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); 637 dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
638 /* fall through */
641 case DN_NC: 639 case DN_NC:
642 case DN_NR: 640 case DN_NR:
643 case DN_RJ: 641 case DN_RJ:
@@ -651,6 +649,7 @@ disc_reject:
651 break; 649 break;
652 default: 650 default:
653 printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); 651 printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
652 /* fall through */
654 case DN_O: 653 case DN_O:
655 dn_stop_slow_timer(sk); 654 dn_stop_slow_timer(sk);
656 655
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index df042b6d80b8..9153247dad28 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1039,14 +1039,14 @@ static void dn_eth_down(struct net_device *dev)
1039 1039
1040static void dn_dev_set_timer(struct net_device *dev); 1040static void dn_dev_set_timer(struct net_device *dev);
1041 1041
1042static void dn_dev_timer_func(unsigned long arg) 1042static void dn_dev_timer_func(struct timer_list *t)
1043{ 1043{
1044 struct net_device *dev = (struct net_device *)arg; 1044 struct dn_dev *dn_db = from_timer(dn_db, t, timer);
1045 struct dn_dev *dn_db; 1045 struct net_device *dev;
1046 struct dn_ifaddr *ifa; 1046 struct dn_ifaddr *ifa;
1047 1047
1048 rcu_read_lock(); 1048 rcu_read_lock();
1049 dn_db = rcu_dereference(dev->dn_ptr); 1049 dev = dn_db->dev;
1050 if (dn_db->t3 <= dn_db->parms.t2) { 1050 if (dn_db->t3 <= dn_db->parms.t2) {
1051 if (dn_db->parms.timer3) { 1051 if (dn_db->parms.timer3) {
1052 for (ifa = rcu_dereference(dn_db->ifa_list); 1052 for (ifa = rcu_dereference(dn_db->ifa_list);
@@ -1071,8 +1071,6 @@ static void dn_dev_set_timer(struct net_device *dev)
1071 if (dn_db->parms.t2 > dn_db->parms.t3) 1071 if (dn_db->parms.t2 > dn_db->parms.t3)
1072 dn_db->parms.t2 = dn_db->parms.t3; 1072 dn_db->parms.t2 = dn_db->parms.t3;
1073 1073
1074 dn_db->timer.data = (unsigned long)dev;
1075 dn_db->timer.function = dn_dev_timer_func;
1076 dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ); 1074 dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
1077 1075
1078 add_timer(&dn_db->timer); 1076 add_timer(&dn_db->timer);
@@ -1101,7 +1099,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
1101 1099
1102 rcu_assign_pointer(dev->dn_ptr, dn_db); 1100 rcu_assign_pointer(dev->dn_ptr, dn_db);
1103 dn_db->dev = dev; 1101 dn_db->dev = dev;
1104 init_timer(&dn_db->timer); 1102 timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
1105 1103
1106 dn_db->uptime = jiffies; 1104 dn_db->uptime = jiffies;
1107 1105
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 7ac086d5c0c0..1b2120645730 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -776,12 +776,8 @@ static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
776 * Swap src & dst and look up in the normal way. 776 * Swap src & dst and look up in the normal way.
777 */ 777 */
778 if (unlikely(cb->rt_flags & DN_RT_F_RTS)) { 778 if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
779 __le16 tmp = cb->dst_port; 779 swap(cb->dst_port, cb->src_port);
780 cb->dst_port = cb->src_port; 780 swap(cb->dst, cb->src);
781 cb->src_port = tmp;
782 tmp = cb->dst;
783 cb->dst = cb->src;
784 cb->src = tmp;
785 } 781 }
786 782
787 /* 783 /*
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 66f035e476ea..56a52a004c56 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -313,11 +313,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c
313 ackcrs |= 0x8000; 313 ackcrs |= 0x8000;
314 314
315 /* If this is an "other data/ack" message, swap acknum and ackcrs */ 315 /* If this is an "other data/ack" message, swap acknum and ackcrs */
316 if (other) { 316 if (other)
317 unsigned short tmp = acknum; 317 swap(acknum, ackcrs);
318 acknum = ackcrs;
319 ackcrs = tmp;
320 }
321 318
322 /* Set "cross subchannel" bit in ackcrs */ 319 /* Set "cross subchannel" bit in ackcrs */
323 ackcrs |= 0x2000; 320 ackcrs |= 0x2000;
@@ -491,17 +488,6 @@ void dn_send_conn_ack (struct sock *sk)
491 dn_nsp_send(skb); 488 dn_nsp_send(skb);
492} 489}
493 490
494void dn_nsp_delayed_ack(struct sock *sk)
495{
496 struct dn_scp *scp = DN_SK(sk);
497
498 if (scp->ackxmt_oth != scp->numoth_rcv)
499 dn_nsp_send_oth_ack(sk);
500
501 if (scp->ackxmt_dat != scp->numdat_rcv)
502 dn_nsp_send_data_ack(sk);
503}
504
505static int dn_nsp_retrans_conn_conf(struct sock *sk) 491static int dn_nsp_retrans_conn_conf(struct sock *sk)
506{ 492{
507 struct dn_scp *scp = DN_SK(sk); 493 struct dn_scp *scp = DN_SK(sk);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 6538632fbd03..324cb9f2f551 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -125,7 +125,7 @@ static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
125 struct sk_buff *skb, 125 struct sk_buff *skb,
126 const void *daddr); 126 const void *daddr);
127static int dn_route_input(struct sk_buff *); 127static int dn_route_input(struct sk_buff *);
128static void dn_run_flush(unsigned long dummy); 128static void dn_run_flush(struct timer_list *unused);
129 129
130static struct dn_rt_hash_bucket *dn_rt_hash_table; 130static struct dn_rt_hash_bucket *dn_rt_hash_table;
131static unsigned int dn_rt_hash_mask; 131static unsigned int dn_rt_hash_mask;
@@ -183,7 +183,7 @@ static __inline__ unsigned int dn_hash(__le16 src, __le16 dst)
183 return dn_rt_hash_mask & (unsigned int)tmp; 183 return dn_rt_hash_mask & (unsigned int)tmp;
184} 184}
185 185
186static void dn_dst_check_expire(unsigned long dummy) 186static void dn_dst_check_expire(struct timer_list *unused)
187{ 187{
188 int i; 188 int i;
189 struct dn_route *rt; 189 struct dn_route *rt;
@@ -338,7 +338,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
338 dn_rt_hash_table[hash].chain); 338 dn_rt_hash_table[hash].chain);
339 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); 339 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
340 340
341 dst_use(&rth->dst, now); 341 dst_hold_and_use(&rth->dst, now);
342 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 342 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
343 343
344 dst_release_immediate(&rt->dst); 344 dst_release_immediate(&rt->dst);
@@ -351,13 +351,13 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
351 rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); 351 rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
352 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); 352 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
353 353
354 dst_use(&rt->dst, now); 354 dst_hold_and_use(&rt->dst, now);
355 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 355 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
356 *rp = rt; 356 *rp = rt;
357 return 0; 357 return 0;
358} 358}
359 359
360static void dn_run_flush(unsigned long dummy) 360static void dn_run_flush(struct timer_list *unused)
361{ 361{
362 int i; 362 int i;
363 struct dn_route *rt, *next; 363 struct dn_route *rt, *next;
@@ -1258,7 +1258,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
1258 (flp->flowidn_mark == rt->fld.flowidn_mark) && 1258 (flp->flowidn_mark == rt->fld.flowidn_mark) &&
1259 dn_is_output_route(rt) && 1259 dn_is_output_route(rt) &&
1260 (rt->fld.flowidn_oif == flp->flowidn_oif)) { 1260 (rt->fld.flowidn_oif == flp->flowidn_oif)) {
1261 dst_use(&rt->dst, jiffies); 1261 dst_hold_and_use(&rt->dst, jiffies);
1262 rcu_read_unlock_bh(); 1262 rcu_read_unlock_bh();
1263 *pprt = &rt->dst; 1263 *pprt = &rt->dst;
1264 return 0; 1264 return 0;
@@ -1535,7 +1535,7 @@ static int dn_route_input(struct sk_buff *skb)
1535 (rt->fld.flowidn_oif == 0) && 1535 (rt->fld.flowidn_oif == 0) &&
1536 (rt->fld.flowidn_mark == skb->mark) && 1536 (rt->fld.flowidn_mark == skb->mark) &&
1537 (rt->fld.flowidn_iif == cb->iif)) { 1537 (rt->fld.flowidn_iif == cb->iif)) {
1538 dst_use(&rt->dst, jiffies); 1538 dst_hold_and_use(&rt->dst, jiffies);
1539 rcu_read_unlock(); 1539 rcu_read_unlock();
1540 skb_dst_set(skb, (struct dst_entry *)rt); 1540 skb_dst_set(skb, (struct dst_entry *)rt);
1541 return 0; 1541 return 0;
@@ -1875,7 +1875,7 @@ void __init dn_route_init(void)
1875 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, 1875 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
1876 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1876 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1877 dst_entries_init(&dn_dst_ops); 1877 dst_entries_init(&dn_dst_ops);
1878 setup_timer(&dn_route_timer, dn_dst_check_expire, 0); 1878 timer_setup(&dn_route_timer, dn_dst_check_expire, 0);
1879 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1879 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
1880 add_timer(&dn_route_timer); 1880 add_timer(&dn_route_timer);
1881 1881
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 08667f68e601..f0710b5d037d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -156,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz)
156 default: 156 default:
157 printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", 157 printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
158 old_divisor); 158 old_divisor);
159 /* fall through */
159 case 256: 160 case 256:
160 new_divisor = 1024; 161 new_divisor = 1024;
161 new_hashmask = 0x3FF; 162 new_hashmask = 0x3FF;
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index f430daed24a0..aa4155875ca8 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -34,11 +34,11 @@
34 34
35#define SLOW_INTERVAL (HZ/2) 35#define SLOW_INTERVAL (HZ/2)
36 36
37static void dn_slow_timer(unsigned long arg); 37static void dn_slow_timer(struct timer_list *t);
38 38
39void dn_start_slow_timer(struct sock *sk) 39void dn_start_slow_timer(struct sock *sk)
40{ 40{
41 setup_timer(&sk->sk_timer, dn_slow_timer, (unsigned long)sk); 41 timer_setup(&sk->sk_timer, dn_slow_timer, 0);
42 sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); 42 sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
43} 43}
44 44
@@ -47,9 +47,9 @@ void dn_stop_slow_timer(struct sock *sk)
47 sk_stop_timer(sk, &sk->sk_timer); 47 sk_stop_timer(sk, &sk->sk_timer);
48} 48}
49 49
50static void dn_slow_timer(unsigned long arg) 50static void dn_slow_timer(struct timer_list *t)
51{ 51{
52 struct sock *sk = (struct sock *)arg; 52 struct sock *sk = from_timer(sk, t, sk_timer);
53 struct dn_scp *scp = DN_SK(sk); 53 struct dn_scp *scp = DN_SK(sk);
54 54
55 bh_lock_sock(sk); 55 bh_lock_sock(sk);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cc5f8f971689..03c3bdf25468 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -7,6 +7,7 @@ config HAVE_NET_DSA
7config NET_DSA 7config NET_DSA
8 tristate "Distributed Switch Architecture" 8 tristate "Distributed Switch Architecture"
9 depends on HAVE_NET_DSA && MAY_USE_DEVLINK 9 depends on HAVE_NET_DSA && MAY_USE_DEVLINK
10 depends on BRIDGE || BRIDGE=n
10 select NET_SWITCHDEV 11 select NET_SWITCHDEV
11 select PHYLIB 12 select PHYLIB
12 ---help--- 13 ---help---
@@ -19,6 +20,9 @@ if NET_DSA
19config NET_DSA_TAG_BRCM 20config NET_DSA_TAG_BRCM
20 bool 21 bool
21 22
23config NET_DSA_TAG_BRCM_PREPEND
24 bool
25
22config NET_DSA_TAG_DSA 26config NET_DSA_TAG_DSA
23 bool 27 bool
24 28
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 3d3c74193d06..0e13c1f95d13 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,10 +1,11 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2# the core 2# the core
3obj-$(CONFIG_NET_DSA) += dsa_core.o 3obj-$(CONFIG_NET_DSA) += dsa_core.o
4dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o 4dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
5 5
6# tagging formats 6# tagging formats
7dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o 7dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
8dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o
8dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o 9dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
9dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o 10dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
10dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o 11dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 03c58b0eb082..6a9d0f50fbee 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,6 +14,7 @@
14#include <linux/platform_device.h> 14#include <linux/platform_device.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/notifier.h>
17#include <linux/of.h> 18#include <linux/of.h>
18#include <linux/of_mdio.h> 19#include <linux/of_mdio.h>
19#include <linux/of_platform.h> 20#include <linux/of_platform.h>
@@ -43,6 +44,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
43#ifdef CONFIG_NET_DSA_TAG_BRCM 44#ifdef CONFIG_NET_DSA_TAG_BRCM
44 [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, 45 [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
45#endif 46#endif
47#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
48 [DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops,
49#endif
46#ifdef CONFIG_NET_DSA_TAG_DSA 50#ifdef CONFIG_NET_DSA_TAG_DSA
47 [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops, 51 [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
48#endif 52#endif
@@ -67,37 +71,6 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
67 [DSA_TAG_PROTO_NONE] = &none_ops, 71 [DSA_TAG_PROTO_NONE] = &none_ops,
68}; 72};
69 73
70int dsa_cpu_dsa_setup(struct dsa_port *port)
71{
72 struct device_node *port_dn = port->dn;
73 struct dsa_switch *ds = port->ds;
74 struct phy_device *phydev;
75 int ret, mode;
76
77 if (of_phy_is_fixed_link(port_dn)) {
78 ret = of_phy_register_fixed_link(port_dn);
79 if (ret) {
80 dev_err(ds->dev, "failed to register fixed PHY\n");
81 return ret;
82 }
83 phydev = of_phy_find_device(port_dn);
84
85 mode = of_get_phy_mode(port_dn);
86 if (mode < 0)
87 mode = PHY_INTERFACE_MODE_NA;
88 phydev->interface = mode;
89
90 genphy_config_init(phydev);
91 genphy_read_status(phydev);
92 if (ds->ops->adjust_link)
93 ds->ops->adjust_link(ds, port->index, phydev);
94
95 put_device(&phydev->mdio.dev);
96 }
97
98 return 0;
99}
100
101const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) 74const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
102{ 75{
103 const struct dsa_device_ops *ops; 76 const struct dsa_device_ops *ops;
@@ -112,42 +85,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
112 return ops; 85 return ops;
113} 86}
114 87
115int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp)
116{
117 struct dsa_switch *ds = cpu_dp->ds;
118 struct net_device *master;
119 struct ethtool_ops *cpu_ops;
120
121 master = cpu_dp->netdev;
122
123 cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
124 if (!cpu_ops)
125 return -ENOMEM;
126
127 memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops,
128 sizeof(struct ethtool_ops));
129 cpu_dp->orig_ethtool_ops = master->ethtool_ops;
130 memcpy(cpu_ops, &cpu_dp->ethtool_ops,
131 sizeof(struct ethtool_ops));
132 dsa_cpu_port_ethtool_init(cpu_ops);
133 master->ethtool_ops = cpu_ops;
134
135 return 0;
136}
137
138void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp)
139{
140 cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops;
141}
142
143void dsa_cpu_dsa_destroy(struct dsa_port *port)
144{
145 struct device_node *port_dn = port->dn;
146
147 if (of_phy_is_fixed_link(port_dn))
148 of_phy_deregister_fixed_link(port_dn);
149}
150
151static int dev_is_class(struct device *dev, void *class) 88static int dev_is_class(struct device *dev, void *class)
152{ 89{
153 if (dev->class != NULL && !strcmp(dev->class->name, class)) 90 if (dev->class != NULL && !strcmp(dev->class->name, class))
@@ -188,12 +125,12 @@ EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
188static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, 125static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
189 struct packet_type *pt, struct net_device *unused) 126 struct packet_type *pt, struct net_device *unused)
190{ 127{
191 struct dsa_switch_tree *dst = dev->dsa_ptr; 128 struct dsa_port *cpu_dp = dev->dsa_ptr;
192 struct sk_buff *nskb = NULL; 129 struct sk_buff *nskb = NULL;
193 struct pcpu_sw_netstats *s; 130 struct pcpu_sw_netstats *s;
194 struct dsa_slave_priv *p; 131 struct dsa_slave_priv *p;
195 132
196 if (unlikely(dst == NULL)) { 133 if (unlikely(!cpu_dp)) {
197 kfree_skb(skb); 134 kfree_skb(skb);
198 return 0; 135 return 0;
199 } 136 }
@@ -202,7 +139,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
202 if (!skb) 139 if (!skb)
203 return 0; 140 return 0;
204 141
205 nskb = dst->rcv(skb, dev, pt); 142 nskb = cpu_dp->rcv(skb, dev, pt);
206 if (!nskb) { 143 if (!nskb) {
207 kfree_skb(skb); 144 kfree_skb(skb);
208 return 0; 145 return 0;
@@ -228,7 +165,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
228#ifdef CONFIG_PM_SLEEP 165#ifdef CONFIG_PM_SLEEP
229static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) 166static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
230{ 167{
231 return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; 168 return dsa_is_user_port(ds, p) && ds->ports[p].slave;
232} 169}
233 170
234int dsa_switch_suspend(struct dsa_switch *ds) 171int dsa_switch_suspend(struct dsa_switch *ds)
@@ -240,7 +177,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
240 if (!dsa_is_port_initialized(ds, i)) 177 if (!dsa_is_port_initialized(ds, i))
241 continue; 178 continue;
242 179
243 ret = dsa_slave_suspend(ds->ports[i].netdev); 180 ret = dsa_slave_suspend(ds->ports[i].slave);
244 if (ret) 181 if (ret)
245 return ret; 182 return ret;
246 } 183 }
@@ -267,7 +204,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
267 if (!dsa_is_port_initialized(ds, i)) 204 if (!dsa_is_port_initialized(ds, i))
268 continue; 205 continue;
269 206
270 ret = dsa_slave_resume(ds->ports[i].netdev); 207 ret = dsa_slave_resume(ds->ports[i].slave);
271 if (ret) 208 if (ret)
272 return ret; 209 return ret;
273 } 210 }
@@ -289,6 +226,28 @@ bool dsa_schedule_work(struct work_struct *work)
289 return queue_work(dsa_owq, work); 226 return queue_work(dsa_owq, work);
290} 227}
291 228
229static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain);
230
231int register_dsa_notifier(struct notifier_block *nb)
232{
233 return atomic_notifier_chain_register(&dsa_notif_chain, nb);
234}
235EXPORT_SYMBOL_GPL(register_dsa_notifier);
236
237int unregister_dsa_notifier(struct notifier_block *nb)
238{
239 return atomic_notifier_chain_unregister(&dsa_notif_chain, nb);
240}
241EXPORT_SYMBOL_GPL(unregister_dsa_notifier);
242
243int call_dsa_notifiers(unsigned long val, struct net_device *dev,
244 struct dsa_notifier_info *info)
245{
246 info->dev = dev;
247 return atomic_notifier_call_chain(&dsa_notif_chain, val, info);
248}
249EXPORT_SYMBOL_GPL(call_dsa_notifiers);
250
292static int __init dsa_init_module(void) 251static int __init dsa_init_module(void)
293{ 252{
294 int rc; 253 int rc;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 045d8a176279..44e3fb7dec8c 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,293 +21,297 @@
21 21
22#include "dsa_priv.h" 22#include "dsa_priv.h"
23 23
24static LIST_HEAD(dsa_switch_trees); 24static LIST_HEAD(dsa_tree_list);
25static DEFINE_MUTEX(dsa2_mutex); 25static DEFINE_MUTEX(dsa2_mutex);
26 26
27static const struct devlink_ops dsa_devlink_ops = { 27static const struct devlink_ops dsa_devlink_ops = {
28}; 28};
29 29
30static struct dsa_switch_tree *dsa_get_dst(u32 tree) 30static struct dsa_switch_tree *dsa_tree_find(int index)
31{ 31{
32 struct dsa_switch_tree *dst; 32 struct dsa_switch_tree *dst;
33 33
34 list_for_each_entry(dst, &dsa_switch_trees, list) 34 list_for_each_entry(dst, &dsa_tree_list, list)
35 if (dst->tree == tree) { 35 if (dst->index == index)
36 kref_get(&dst->refcount);
37 return dst; 36 return dst;
38 } 37
39 return NULL; 38 return NULL;
40} 39}
41 40
42static void dsa_free_dst(struct kref *ref) 41static struct dsa_switch_tree *dsa_tree_alloc(int index)
43{ 42{
44 struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree, 43 struct dsa_switch_tree *dst;
45 refcount);
46 44
47 list_del(&dst->list); 45 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
48 kfree(dst); 46 if (!dst)
47 return NULL;
48
49 dst->index = index;
50
51 INIT_LIST_HEAD(&dst->list);
52 list_add_tail(&dsa_tree_list, &dst->list);
53
54 /* Initialize the reference counter to the number of switches, not 1 */
55 kref_init(&dst->refcount);
56 refcount_set(&dst->refcount.refcount, 0);
57
58 return dst;
49} 59}
50 60
51static void dsa_put_dst(struct dsa_switch_tree *dst) 61static void dsa_tree_free(struct dsa_switch_tree *dst)
52{ 62{
53 kref_put(&dst->refcount, dsa_free_dst); 63 list_del(&dst->list);
64 kfree(dst);
54} 65}
55 66
56static struct dsa_switch_tree *dsa_add_dst(u32 tree) 67static struct dsa_switch_tree *dsa_tree_touch(int index)
57{ 68{
58 struct dsa_switch_tree *dst; 69 struct dsa_switch_tree *dst;
59 70
60 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 71 dst = dsa_tree_find(index);
61 if (!dst) 72 if (!dst)
62 return NULL; 73 dst = dsa_tree_alloc(index);
63 dst->tree = tree;
64 INIT_LIST_HEAD(&dst->list);
65 list_add_tail(&dsa_switch_trees, &dst->list);
66 kref_init(&dst->refcount);
67 74
68 return dst; 75 return dst;
69} 76}
70 77
71static void dsa_dst_add_ds(struct dsa_switch_tree *dst, 78static void dsa_tree_get(struct dsa_switch_tree *dst)
72 struct dsa_switch *ds, u32 index)
73{ 79{
74 kref_get(&dst->refcount); 80 kref_get(&dst->refcount);
75 dst->ds[index] = ds;
76} 81}
77 82
78static void dsa_dst_del_ds(struct dsa_switch_tree *dst, 83static void dsa_tree_release(struct kref *ref)
79 struct dsa_switch *ds, u32 index)
80{ 84{
81 dst->ds[index] = NULL; 85 struct dsa_switch_tree *dst;
82 kref_put(&dst->refcount, dsa_free_dst); 86
87 dst = container_of(ref, struct dsa_switch_tree, refcount);
88
89 dsa_tree_free(dst);
83} 90}
84 91
85/* For platform data configurations, we need to have a valid name argument to 92static void dsa_tree_put(struct dsa_switch_tree *dst)
86 * differentiate a disabled port from an enabled one
87 */
88static bool dsa_port_is_valid(struct dsa_port *port)
89{ 93{
90 return !!(port->dn || port->name); 94 kref_put(&dst->refcount, dsa_tree_release);
91} 95}
92 96
93static bool dsa_port_is_dsa(struct dsa_port *port) 97static bool dsa_port_is_dsa(struct dsa_port *port)
94{ 98{
95 if (port->name && !strcmp(port->name, "dsa")) 99 return port->type == DSA_PORT_TYPE_DSA;
96 return true;
97 else
98 return !!of_parse_phandle(port->dn, "link", 0);
99} 100}
100 101
101static bool dsa_port_is_cpu(struct dsa_port *port) 102static bool dsa_port_is_cpu(struct dsa_port *port)
102{ 103{
103 if (port->name && !strcmp(port->name, "cpu")) 104 return port->type == DSA_PORT_TYPE_CPU;
104 return true;
105 else
106 return !!of_parse_phandle(port->dn, "ethernet", 0);
107} 105}
108 106
109static bool dsa_ds_find_port_dn(struct dsa_switch *ds, 107static bool dsa_port_is_user(struct dsa_port *dp)
110 struct device_node *port)
111{ 108{
112 u32 index; 109 return dp->type == DSA_PORT_TYPE_USER;
113
114 for (index = 0; index < ds->num_ports; index++)
115 if (ds->ports[index].dn == port)
116 return true;
117 return false;
118} 110}
119 111
120static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst, 112static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
121 struct device_node *port) 113 struct device_node *dn)
122{ 114{
123 struct dsa_switch *ds; 115 struct dsa_switch *ds;
124 u32 index; 116 struct dsa_port *dp;
117 int device, port;
125 118
126 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 119 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
127 ds = dst->ds[index]; 120 ds = dst->ds[device];
128 if (!ds) 121 if (!ds)
129 continue; 122 continue;
130 123
131 if (dsa_ds_find_port_dn(ds, port)) 124 for (port = 0; port < ds->num_ports; port++) {
132 return ds; 125 dp = &ds->ports[port];
126
127 if (dp->dn == dn)
128 return dp;
129 }
133 } 130 }
134 131
135 return NULL; 132 return NULL;
136} 133}
137 134
138static int dsa_port_complete(struct dsa_switch_tree *dst, 135static bool dsa_port_setup_routing_table(struct dsa_port *dp)
139 struct dsa_switch *src_ds,
140 struct dsa_port *port,
141 u32 src_port)
142{ 136{
143 struct device_node *link; 137 struct dsa_switch *ds = dp->ds;
144 int index; 138 struct dsa_switch_tree *dst = ds->dst;
145 struct dsa_switch *dst_ds; 139 struct device_node *dn = dp->dn;
146 140 struct of_phandle_iterator it;
147 for (index = 0;; index++) { 141 struct dsa_port *link_dp;
148 link = of_parse_phandle(port->dn, "link", index); 142 int err;
149 if (!link)
150 break;
151
152 dst_ds = dsa_dst_find_port_dn(dst, link);
153 of_node_put(link);
154 143
155 if (!dst_ds) 144 of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
156 return 1; 145 link_dp = dsa_tree_find_port_by_node(dst, it.node);
146 if (!link_dp) {
147 of_node_put(it.node);
148 return false;
149 }
157 150
158 src_ds->rtable[dst_ds->index] = src_port; 151 ds->rtable[link_dp->ds->index] = dp->index;
159 } 152 }
160 153
161 return 0; 154 return true;
162} 155}
163 156
164/* A switch is complete if all the DSA ports phandles point to ports 157static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
165 * known in the tree. A return value of 1 means the tree is not
166 * complete. This is not an error condition. A value of 0 is
167 * success.
168 */
169static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
170{ 158{
171 struct dsa_port *port; 159 bool complete = true;
172 u32 index; 160 struct dsa_port *dp;
173 int err; 161 int i;
174
175 for (index = 0; index < ds->num_ports; index++) {
176 port = &ds->ports[index];
177 if (!dsa_port_is_valid(port))
178 continue;
179 162
180 if (!dsa_port_is_dsa(port)) 163 for (i = 0; i < DSA_MAX_SWITCHES; i++)
181 continue; 164 ds->rtable[i] = DSA_RTABLE_NONE;
182 165
183 err = dsa_port_complete(dst, ds, port, index); 166 for (i = 0; i < ds->num_ports; i++) {
184 if (err != 0) 167 dp = &ds->ports[i];
185 return err;
186 168
187 ds->dsa_port_mask |= BIT(index); 169 if (dsa_port_is_dsa(dp)) {
170 complete = dsa_port_setup_routing_table(dp);
171 if (!complete)
172 break;
173 }
188 } 174 }
189 175
190 return 0; 176 return complete;
191} 177}
192 178
193/* A tree is complete if all the DSA ports phandles point to ports 179static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
194 * known in the tree. A return value of 1 means the tree is not
195 * complete. This is not an error condition. A value of 0 is
196 * success.
197 */
198static int dsa_dst_complete(struct dsa_switch_tree *dst)
199{ 180{
200 struct dsa_switch *ds; 181 struct dsa_switch *ds;
201 u32 index; 182 bool complete = true;
202 int err; 183 int device;
203 184
204 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 185 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
205 ds = dst->ds[index]; 186 ds = dst->ds[device];
206 if (!ds) 187 if (!ds)
207 continue; 188 continue;
208 189
209 err = dsa_ds_complete(dst, ds); 190 complete = dsa_switch_setup_routing_table(ds);
210 if (err != 0) 191 if (!complete)
211 return err; 192 break;
212 } 193 }
213 194
214 return 0; 195 return complete;
215} 196}
216 197
217static int dsa_dsa_port_apply(struct dsa_port *port) 198static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
218{ 199{
219 struct dsa_switch *ds = port->ds; 200 struct dsa_switch *ds;
220 int err; 201 struct dsa_port *dp;
202 int device, port;
221 203
222 err = dsa_cpu_dsa_setup(port); 204 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
223 if (err) { 205 ds = dst->ds[device];
224 dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", 206 if (!ds)
225 port->index, err); 207 continue;
226 return err;
227 }
228 208
229 memset(&port->devlink_port, 0, sizeof(port->devlink_port)); 209 for (port = 0; port < ds->num_ports; port++) {
210 dp = &ds->ports[port];
230 211
231 return devlink_port_register(ds->devlink, &port->devlink_port, 212 if (dsa_port_is_cpu(dp))
232 port->index); 213 return dp;
233} 214 }
215 }
234 216
235static void dsa_dsa_port_unapply(struct dsa_port *port) 217 return NULL;
236{
237 devlink_port_unregister(&port->devlink_port);
238 dsa_cpu_dsa_destroy(port);
239} 218}
240 219
241static int dsa_cpu_port_apply(struct dsa_port *port) 220static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
242{ 221{
243 struct dsa_switch *ds = port->ds; 222 struct dsa_switch *ds;
244 int err; 223 struct dsa_port *dp;
224 int device, port;
245 225
246 err = dsa_cpu_dsa_setup(port); 226 /* DSA currently only supports a single CPU port */
247 if (err) { 227 dst->cpu_dp = dsa_tree_find_first_cpu(dst);
248 dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", 228 if (!dst->cpu_dp) {
249 port->index, err); 229 pr_warn("Tree has no master device\n");
250 return err; 230 return -EINVAL;
251 } 231 }
252 232
253 memset(&port->devlink_port, 0, sizeof(port->devlink_port)); 233 /* Assign the default CPU port to all ports of the fabric */
254 err = devlink_port_register(ds->devlink, &port->devlink_port, 234 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
255 port->index); 235 ds = dst->ds[device];
256 return err; 236 if (!ds)
237 continue;
238
239 for (port = 0; port < ds->num_ports; port++) {
240 dp = &ds->ports[port];
241
242 if (dsa_port_is_user(dp))
243 dp->cpu_dp = dst->cpu_dp;
244 }
245 }
246
247 return 0;
257} 248}
258 249
259static void dsa_cpu_port_unapply(struct dsa_port *port) 250static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
260{ 251{
261 devlink_port_unregister(&port->devlink_port); 252 /* DSA currently only supports a single CPU port */
262 dsa_cpu_dsa_destroy(port); 253 dst->cpu_dp = NULL;
263 port->ds->cpu_port_mask &= ~BIT(port->index);
264
265} 254}
266 255
267static int dsa_user_port_apply(struct dsa_port *port) 256static int dsa_port_setup(struct dsa_port *dp)
268{ 257{
269 struct dsa_switch *ds = port->ds; 258 struct dsa_switch *ds = dp->ds;
270 const char *name = port->name;
271 int err; 259 int err;
272 260
273 if (port->dn) 261 memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
274 name = of_get_property(port->dn, "label", NULL);
275 if (!name)
276 name = "eth%d";
277 262
278 err = dsa_slave_create(port, name); 263 err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
279 if (err) {
280 dev_warn(ds->dev, "Failed to create slave %d: %d\n",
281 port->index, err);
282 port->netdev = NULL;
283 return err;
284 }
285
286 memset(&port->devlink_port, 0, sizeof(port->devlink_port));
287 err = devlink_port_register(ds->devlink, &port->devlink_port,
288 port->index);
289 if (err) 264 if (err)
290 return err; 265 return err;
291 266
292 devlink_port_type_eth_set(&port->devlink_port, port->netdev); 267 switch (dp->type) {
268 case DSA_PORT_TYPE_UNUSED:
269 break;
270 case DSA_PORT_TYPE_CPU:
271 case DSA_PORT_TYPE_DSA:
272 err = dsa_port_fixed_link_register_of(dp);
273 if (err) {
274 dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
275 ds->index, dp->index);
276 return err;
277 }
278
279 break;
280 case DSA_PORT_TYPE_USER:
281 err = dsa_slave_create(dp);
282 if (err)
283 dev_err(ds->dev, "failed to create slave for port %d.%d\n",
284 ds->index, dp->index);
285 else
286 devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
287 break;
288 }
293 289
294 return 0; 290 return 0;
295} 291}
296 292
297static void dsa_user_port_unapply(struct dsa_port *port) 293static void dsa_port_teardown(struct dsa_port *dp)
298{ 294{
299 devlink_port_unregister(&port->devlink_port); 295 devlink_port_unregister(&dp->devlink_port);
300 if (port->netdev) { 296
301 dsa_slave_destroy(port->netdev); 297 switch (dp->type) {
302 port->netdev = NULL; 298 case DSA_PORT_TYPE_UNUSED:
303 port->ds->enabled_port_mask &= ~(1 << port->index); 299 break;
300 case DSA_PORT_TYPE_CPU:
301 case DSA_PORT_TYPE_DSA:
302 dsa_port_fixed_link_unregister_of(dp);
303 break;
304 case DSA_PORT_TYPE_USER:
305 if (dp->slave) {
306 dsa_slave_destroy(dp->slave);
307 dp->slave = NULL;
308 }
309 break;
304 } 310 }
305} 311}
306 312
307static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) 313static int dsa_switch_setup(struct dsa_switch *ds)
308{ 314{
309 struct dsa_port *port;
310 u32 index;
311 int err; 315 int err;
312 316
313 /* Initialize ds->phys_mii_mask before registering the slave MDIO bus 317 /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
@@ -315,7 +319,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
315 * the slave MDIO bus driver rely on these values for probing PHY 319 * the slave MDIO bus driver rely on these values for probing PHY
316 * devices or not 320 * devices or not
317 */ 321 */
318 ds->phys_mii_mask = ds->enabled_port_mask; 322 ds->phys_mii_mask |= dsa_user_ports(ds);
319 323
320 /* Add the switch to devlink before calling setup, so that setup can 324 /* Add the switch to devlink before calling setup, so that setup can
321 * add dpipe tables 325 * add dpipe tables
@@ -336,12 +340,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
336 if (err) 340 if (err)
337 return err; 341 return err;
338 342
339 if (ds->ops->set_addr) {
340 err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr);
341 if (err < 0)
342 return err;
343 }
344
345 if (!ds->slave_mii_bus && ds->ops->phy_read) { 343 if (!ds->slave_mii_bus && ds->ops->phy_read) {
346 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); 344 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
347 if (!ds->slave_mii_bus) 345 if (!ds->slave_mii_bus)
@@ -354,56 +352,11 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
354 return err; 352 return err;
355 } 353 }
356 354
357 for (index = 0; index < ds->num_ports; index++) {
358 port = &ds->ports[index];
359 if (!dsa_port_is_valid(port))
360 continue;
361
362 if (dsa_port_is_dsa(port)) {
363 err = dsa_dsa_port_apply(port);
364 if (err)
365 return err;
366 continue;
367 }
368
369 if (dsa_port_is_cpu(port)) {
370 err = dsa_cpu_port_apply(port);
371 if (err)
372 return err;
373 continue;
374 }
375
376 err = dsa_user_port_apply(port);
377 if (err)
378 continue;
379 }
380
381 return 0; 355 return 0;
382} 356}
383 357
384static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) 358static void dsa_switch_teardown(struct dsa_switch *ds)
385{ 359{
386 struct dsa_port *port;
387 u32 index;
388
389 for (index = 0; index < ds->num_ports; index++) {
390 port = &ds->ports[index];
391 if (!dsa_port_is_valid(port))
392 continue;
393
394 if (dsa_port_is_dsa(port)) {
395 dsa_dsa_port_unapply(port);
396 continue;
397 }
398
399 if (dsa_port_is_cpu(port)) {
400 dsa_cpu_port_unapply(port);
401 continue;
402 }
403
404 dsa_user_port_unapply(port);
405 }
406
407 if (ds->slave_mii_bus && ds->ops->phy_read) 360 if (ds->slave_mii_bus && ds->ops->phy_read)
408 mdiobus_unregister(ds->slave_mii_bus); 361 mdiobus_unregister(ds->slave_mii_bus);
409 362
@@ -417,199 +370,228 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
417 370
418} 371}
419 372
420static int dsa_dst_apply(struct dsa_switch_tree *dst) 373static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
421{ 374{
422 struct dsa_switch *ds; 375 struct dsa_switch *ds;
423 u32 index; 376 struct dsa_port *dp;
377 int device, port;
424 int err; 378 int err;
425 379
426 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 380 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
427 ds = dst->ds[index]; 381 ds = dst->ds[device];
428 if (!ds) 382 if (!ds)
429 continue; 383 continue;
430 384
431 err = dsa_ds_apply(dst, ds); 385 err = dsa_switch_setup(ds);
432 if (err) 386 if (err)
433 return err; 387 return err;
434 }
435 388
436 if (dst->cpu_dp) { 389 for (port = 0; port < ds->num_ports; port++) {
437 err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); 390 dp = &ds->ports[port];
438 if (err)
439 return err;
440 }
441 391
442 /* If we use a tagging format that doesn't have an ethertype 392 err = dsa_port_setup(dp);
443 * field, make sure that all packets from this point on get 393 if (err)
444 * sent to the tag format's receive function. 394 return err;
445 */ 395 }
446 wmb(); 396 }
447 dst->cpu_dp->netdev->dsa_ptr = dst;
448 dst->applied = true;
449 397
450 return 0; 398 return 0;
451} 399}
452 400
453static void dsa_dst_unapply(struct dsa_switch_tree *dst) 401static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
454{ 402{
455 struct dsa_switch *ds; 403 struct dsa_switch *ds;
456 u32 index; 404 struct dsa_port *dp;
457 405 int device, port;
458 if (!dst->applied)
459 return;
460
461 dst->cpu_dp->netdev->dsa_ptr = NULL;
462
463 /* If we used a tagging format that doesn't have an ethertype
464 * field, make sure that all packets from this point get sent
465 * without the tag and go through the regular receive path.
466 */
467 wmb();
468 406
469 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 407 for (device = 0; device < DSA_MAX_SWITCHES; device++) {
470 ds = dst->ds[index]; 408 ds = dst->ds[device];
471 if (!ds) 409 if (!ds)
472 continue; 410 continue;
473 411
474 dsa_ds_unapply(dst, ds); 412 for (port = 0; port < ds->num_ports; port++) {
475 } 413 dp = &ds->ports[port];
476 414
477 if (dst->cpu_dp) { 415 dsa_port_teardown(dp);
478 dsa_cpu_port_ethtool_restore(dst->cpu_dp); 416 }
479 dst->cpu_dp = NULL; 417
418 dsa_switch_teardown(ds);
480 } 419 }
420}
421
422static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
423{
424 struct dsa_port *cpu_dp = dst->cpu_dp;
425 struct net_device *master = cpu_dp->master;
481 426
482 pr_info("DSA: tree %d unapplied\n", dst->tree); 427 /* DSA currently supports a single pair of CPU port and master device */
483 dst->applied = false; 428 return dsa_master_setup(master, cpu_dp);
484} 429}
485 430
486static int dsa_cpu_parse(struct dsa_port *port, u32 index, 431static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
487 struct dsa_switch_tree *dst,
488 struct dsa_switch *ds)
489{ 432{
490 enum dsa_tag_protocol tag_protocol; 433 struct dsa_port *cpu_dp = dst->cpu_dp;
491 struct net_device *ethernet_dev; 434 struct net_device *master = cpu_dp->master;
492 struct device_node *ethernet;
493 435
494 if (port->dn) { 436 return dsa_master_teardown(master);
495 ethernet = of_parse_phandle(port->dn, "ethernet", 0); 437}
496 if (!ethernet)
497 return -EINVAL;
498 ethernet_dev = of_find_net_device_by_node(ethernet);
499 if (!ethernet_dev)
500 return -EPROBE_DEFER;
501 } else {
502 ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]);
503 if (!ethernet_dev)
504 return -EPROBE_DEFER;
505 dev_put(ethernet_dev);
506 }
507 438
508 if (!dst->cpu_dp) { 439static int dsa_tree_setup(struct dsa_switch_tree *dst)
509 dst->cpu_dp = port; 440{
510 dst->cpu_dp->netdev = ethernet_dev; 441 bool complete;
442 int err;
443
444 if (dst->setup) {
445 pr_err("DSA: tree %d already setup! Disjoint trees?\n",
446 dst->index);
447 return -EEXIST;
511 } 448 }
512 449
513 /* Initialize cpu_port_mask now for drv->setup() 450 complete = dsa_tree_setup_routing_table(dst);
514 * to have access to a correct value, just like what 451 if (!complete)
515 * net/dsa/dsa.c::dsa_switch_setup_one does. 452 return 0;
516 */
517 ds->cpu_port_mask |= BIT(index);
518 453
519 tag_protocol = ds->ops->get_tag_protocol(ds); 454 err = dsa_tree_setup_default_cpu(dst);
520 dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); 455 if (err)
521 if (IS_ERR(dst->tag_ops)) { 456 return err;
522 dev_warn(ds->dev, "No tagger for this switch\n");
523 ds->cpu_port_mask &= ~BIT(index);
524 return PTR_ERR(dst->tag_ops);
525 }
526 457
527 dst->rcv = dst->tag_ops->rcv; 458 err = dsa_tree_setup_switches(dst);
459 if (err)
460 return err;
461
462 err = dsa_tree_setup_master(dst);
463 if (err)
464 return err;
465
466 dst->setup = true;
467
468 pr_info("DSA: tree %d setup\n", dst->index);
528 469
529 return 0; 470 return 0;
530} 471}
531 472
532static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) 473static void dsa_tree_teardown(struct dsa_switch_tree *dst)
474{
475 if (!dst->setup)
476 return;
477
478 dsa_tree_teardown_master(dst);
479
480 dsa_tree_teardown_switches(dst);
481
482 dsa_tree_teardown_default_cpu(dst);
483
484 pr_info("DSA: tree %d torn down\n", dst->index);
485
486 dst->setup = false;
487}
488
489static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
490 unsigned int index)
533{ 491{
534 struct dsa_port *port; 492 dsa_tree_teardown(dst);
535 u32 index; 493
494 dst->ds[index] = NULL;
495 dsa_tree_put(dst);
496}
497
498static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
499 struct dsa_switch *ds)
500{
501 unsigned int index = ds->index;
536 int err; 502 int err;
537 503
538 for (index = 0; index < ds->num_ports; index++) { 504 if (dst->ds[index])
539 port = &ds->ports[index]; 505 return -EBUSY;
540 if (!dsa_port_is_valid(port) ||
541 dsa_port_is_dsa(port))
542 continue;
543 506
544 if (dsa_port_is_cpu(port)) { 507 dsa_tree_get(dst);
545 err = dsa_cpu_parse(port, index, dst, ds); 508 dst->ds[index] = ds;
546 if (err)
547 return err;
548 } else {
549 /* Initialize enabled_port_mask now for drv->setup()
550 * to have access to a correct value, just like what
551 * net/dsa/dsa.c::dsa_switch_setup_one does.
552 */
553 ds->enabled_port_mask |= BIT(index);
554 }
555 509
556 } 510 err = dsa_tree_setup(dst);
511 if (err)
512 dsa_tree_remove_switch(dst, index);
557 513
558 pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index); 514 return err;
515}
516
517static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
518{
519 if (!name)
520 name = "eth%d";
521
522 dp->type = DSA_PORT_TYPE_USER;
523 dp->name = name;
559 524
560 return 0; 525 return 0;
561} 526}
562 527
563static int dsa_dst_parse(struct dsa_switch_tree *dst) 528static int dsa_port_parse_dsa(struct dsa_port *dp)
564{ 529{
565 struct dsa_switch *ds; 530 dp->type = DSA_PORT_TYPE_DSA;
566 struct dsa_port *dp;
567 u32 index;
568 int port;
569 int err;
570 531
571 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 532 return 0;
572 ds = dst->ds[index]; 533}
573 if (!ds)
574 continue;
575 534
576 err = dsa_ds_parse(dst, ds); 535static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
577 if (err) 536{
578 return err; 537 struct dsa_switch *ds = dp->ds;
579 } 538 struct dsa_switch_tree *dst = ds->dst;
539 const struct dsa_device_ops *tag_ops;
540 enum dsa_tag_protocol tag_protocol;
580 541
581 if (!dst->cpu_dp) { 542 tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
582 pr_warn("Tree has no master device\n"); 543 tag_ops = dsa_resolve_tag_protocol(tag_protocol);
583 return -EINVAL; 544 if (IS_ERR(tag_ops)) {
545 dev_warn(ds->dev, "No tagger for this switch\n");
546 return PTR_ERR(tag_ops);
584 } 547 }
585 548
586 /* Assign the default CPU port to all ports of the fabric */ 549 dp->type = DSA_PORT_TYPE_CPU;
587 for (index = 0; index < DSA_MAX_SWITCHES; index++) { 550 dp->rcv = tag_ops->rcv;
588 ds = dst->ds[index]; 551 dp->tag_ops = tag_ops;
589 if (!ds) 552 dp->master = master;
590 continue; 553 dp->dst = dst;
591 554
592 for (port = 0; port < ds->num_ports; port++) { 555 return 0;
593 dp = &ds->ports[port]; 556}
594 if (!dsa_port_is_valid(dp) ||
595 dsa_port_is_dsa(dp) ||
596 dsa_port_is_cpu(dp))
597 continue;
598 557
599 dp->cpu_dp = dst->cpu_dp; 558static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
600 } 559{
560 struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
561 const char *name = of_get_property(dn, "label", NULL);
562 bool link = of_property_read_bool(dn, "link");
563
564 dp->dn = dn;
565
566 if (ethernet) {
567 struct net_device *master;
568
569 master = of_find_net_device_by_node(ethernet);
570 if (!master)
571 return -EPROBE_DEFER;
572
573 return dsa_port_parse_cpu(dp, master);
601 } 574 }
602 575
603 pr_info("DSA: tree %d parsed\n", dst->tree); 576 if (link)
577 return dsa_port_parse_dsa(dp);
604 578
605 return 0; 579 return dsa_port_parse_user(dp, name);
606} 580}
607 581
608static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) 582static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
583 struct device_node *dn)
609{ 584{
610 struct device_node *port; 585 struct device_node *ports, *port;
611 int err; 586 struct dsa_port *dp;
612 u32 reg; 587 u32 reg;
588 int err;
589
590 ports = of_get_child_by_name(dn, "ports");
591 if (!ports) {
592 dev_err(ds->dev, "no ports child node found\n");
593 return -EINVAL;
594 }
613 595
614 for_each_available_child_of_node(ports, port) { 596 for_each_available_child_of_node(ports, port) {
615 err = of_property_read_u32(port, "reg", &reg); 597 err = of_property_read_u32(port, "reg", &reg);
@@ -619,174 +601,140 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
619 if (reg >= ds->num_ports) 601 if (reg >= ds->num_ports)
620 return -EINVAL; 602 return -EINVAL;
621 603
622 ds->ports[reg].dn = port; 604 dp = &ds->ports[reg];
605
606 err = dsa_port_parse_of(dp, port);
607 if (err)
608 return err;
623 } 609 }
624 610
625 return 0; 611 return 0;
626} 612}
627 613
628static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) 614static int dsa_switch_parse_member_of(struct dsa_switch *ds,
615 struct device_node *dn)
629{ 616{
630 bool valid_name_found = false; 617 u32 m[2] = { 0, 0 };
631 unsigned int i; 618 int sz;
632 619
633 for (i = 0; i < DSA_MAX_PORTS; i++) { 620 /* Don't error out if this optional property isn't found */
634 if (!cd->port_names[i]) 621 sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2);
635 continue; 622 if (sz < 0 && sz != -EINVAL)
623 return sz;
636 624
637 ds->ports[i].name = cd->port_names[i]; 625 ds->index = m[1];
638 valid_name_found = true; 626 if (ds->index >= DSA_MAX_SWITCHES)
639 }
640
641 if (!valid_name_found && i == DSA_MAX_PORTS)
642 return -EINVAL; 627 return -EINVAL;
643 628
629 ds->dst = dsa_tree_touch(m[0]);
630 if (!ds->dst)
631 return -ENOMEM;
632
644 return 0; 633 return 0;
645} 634}
646 635
647static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) 636static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
648{ 637{
649 int err; 638 int err;
650 639
651 *tree = *index = 0; 640 err = dsa_switch_parse_member_of(ds, dn);
652
653 err = of_property_read_u32_index(np, "dsa,member", 0, tree);
654 if (err) {
655 /* Does not exist, but it is optional */
656 if (err == -EINVAL)
657 return 0;
658 return err;
659 }
660
661 err = of_property_read_u32_index(np, "dsa,member", 1, index);
662 if (err) 641 if (err)
663 return err; 642 return err;
664 643
665 if (*index >= DSA_MAX_SWITCHES) 644 return dsa_switch_parse_ports_of(ds, dn);
666 return -EINVAL;
667
668 return 0;
669} 645}
670 646
671static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) 647static int dsa_port_parse(struct dsa_port *dp, const char *name,
648 struct device *dev)
672{ 649{
673 if (!pd) 650 if (!strcmp(name, "cpu")) {
674 return -ENODEV; 651 struct net_device *master;
675
676 /* We do not support complex trees with dsa_chip_data */
677 *tree = 0;
678 *index = 0;
679 652
680 return 0; 653 master = dsa_dev_to_net_device(dev);
681} 654 if (!master)
655 return -EPROBE_DEFER;
682 656
683static struct device_node *dsa_get_ports(struct dsa_switch *ds, 657 dev_put(master);
684 struct device_node *np)
685{
686 struct device_node *ports;
687 658
688 ports = of_get_child_by_name(np, "ports"); 659 return dsa_port_parse_cpu(dp, master);
689 if (!ports) {
690 dev_err(ds->dev, "no ports child node found\n");
691 return ERR_PTR(-EINVAL);
692 } 660 }
693 661
694 return ports; 662 if (!strcmp(name, "dsa"))
663 return dsa_port_parse_dsa(dp);
664
665 return dsa_port_parse_user(dp, name);
695} 666}
696 667
697static int _dsa_register_switch(struct dsa_switch *ds) 668static int dsa_switch_parse_ports(struct dsa_switch *ds,
669 struct dsa_chip_data *cd)
698{ 670{
699 struct dsa_chip_data *pdata = ds->dev->platform_data; 671 bool valid_name_found = false;
700 struct device_node *np = ds->dev->of_node; 672 struct dsa_port *dp;
701 struct dsa_switch_tree *dst; 673 struct device *dev;
702 struct device_node *ports; 674 const char *name;
703 u32 tree, index; 675 unsigned int i;
704 int i, err; 676 int err;
705
706 if (np) {
707 err = dsa_parse_member_dn(np, &tree, &index);
708 if (err)
709 return err;
710 677
711 ports = dsa_get_ports(ds, np); 678 for (i = 0; i < DSA_MAX_PORTS; i++) {
712 if (IS_ERR(ports)) 679 name = cd->port_names[i];
713 return PTR_ERR(ports); 680 dev = cd->netdev[i];
681 dp = &ds->ports[i];
714 682
715 err = dsa_parse_ports_dn(ports, ds); 683 if (!name)
716 if (err) 684 continue;
717 return err;
718 } else {
719 err = dsa_parse_member(pdata, &tree, &index);
720 if (err)
721 return err;
722 685
723 err = dsa_parse_ports(pdata, ds); 686 err = dsa_port_parse(dp, name, dev);
724 if (err) 687 if (err)
725 return err; 688 return err;
726 }
727 689
728 dst = dsa_get_dst(tree); 690 valid_name_found = true;
729 if (!dst) {
730 dst = dsa_add_dst(tree);
731 if (!dst)
732 return -ENOMEM;
733 }
734
735 if (dst->ds[index]) {
736 err = -EBUSY;
737 goto out;
738 } 691 }
739 692
740 ds->dst = dst; 693 if (!valid_name_found && i == DSA_MAX_PORTS)
741 ds->index = index; 694 return -EINVAL;
742 ds->cd = pdata;
743
744 /* Initialize the routing table */
745 for (i = 0; i < DSA_MAX_SWITCHES; ++i)
746 ds->rtable[i] = DSA_RTABLE_NONE;
747 695
748 dsa_dst_add_ds(dst, ds, index); 696 return 0;
697}
749 698
750 err = dsa_dst_complete(dst); 699static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
751 if (err < 0) 700{
752 goto out_del_dst; 701 ds->cd = cd;
753 702
754 if (err == 1) { 703 /* We don't support interconnected switches nor multiple trees via
755 /* Not all switches registered yet */ 704 * platform data, so this is the unique switch of the tree.
756 err = 0; 705 */
757 goto out; 706 ds->index = 0;
758 } 707 ds->dst = dsa_tree_touch(0);
708 if (!ds->dst)
709 return -ENOMEM;
759 710
760 if (dst->applied) { 711 return dsa_switch_parse_ports(ds, cd);
761 pr_info("DSA: Disjoint trees?\n"); 712}
762 return -EINVAL;
763 }
764 713
765 err = dsa_dst_parse(dst); 714static int dsa_switch_add(struct dsa_switch *ds)
766 if (err) { 715{
767 if (err == -EPROBE_DEFER) { 716 struct dsa_switch_tree *dst = ds->dst;
768 dsa_dst_del_ds(dst, ds, ds->index);
769 return err;
770 }
771 717
772 goto out_del_dst; 718 return dsa_tree_add_switch(dst, ds);
773 } 719}
774 720
775 err = dsa_dst_apply(dst); 721static int dsa_switch_probe(struct dsa_switch *ds)
776 if (err) { 722{
777 dsa_dst_unapply(dst); 723 struct dsa_chip_data *pdata = ds->dev->platform_data;
778 goto out_del_dst; 724 struct device_node *np = ds->dev->of_node;
779 } 725 int err;
780 726
781 dsa_put_dst(dst); 727 if (np)
782 return 0; 728 err = dsa_switch_parse_of(ds, np);
729 else if (pdata)
730 err = dsa_switch_parse(ds, pdata);
731 else
732 err = -ENODEV;
783 733
784out_del_dst: 734 if (err)
785 dsa_dst_del_ds(dst, ds, ds->index); 735 return err;
786out:
787 dsa_put_dst(dst);
788 736
789 return err; 737 return dsa_switch_add(ds);
790} 738}
791 739
792struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) 740struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
@@ -816,26 +764,25 @@ int dsa_register_switch(struct dsa_switch *ds)
816 int err; 764 int err;
817 765
818 mutex_lock(&dsa2_mutex); 766 mutex_lock(&dsa2_mutex);
819 err = _dsa_register_switch(ds); 767 err = dsa_switch_probe(ds);
820 mutex_unlock(&dsa2_mutex); 768 mutex_unlock(&dsa2_mutex);
821 769
822 return err; 770 return err;
823} 771}
824EXPORT_SYMBOL_GPL(dsa_register_switch); 772EXPORT_SYMBOL_GPL(dsa_register_switch);
825 773
826static void _dsa_unregister_switch(struct dsa_switch *ds) 774static void dsa_switch_remove(struct dsa_switch *ds)
827{ 775{
828 struct dsa_switch_tree *dst = ds->dst; 776 struct dsa_switch_tree *dst = ds->dst;
777 unsigned int index = ds->index;
829 778
830 dsa_dst_unapply(dst); 779 dsa_tree_remove_switch(dst, index);
831
832 dsa_dst_del_ds(dst, ds, ds->index);
833} 780}
834 781
835void dsa_unregister_switch(struct dsa_switch *ds) 782void dsa_unregister_switch(struct dsa_switch *ds)
836{ 783{
837 mutex_lock(&dsa2_mutex); 784 mutex_lock(&dsa2_mutex);
838 _dsa_unregister_switch(ds); 785 dsa_switch_remove(ds);
839 mutex_unlock(&dsa2_mutex); 786 mutex_unlock(&dsa2_mutex);
840} 787}
841EXPORT_SYMBOL_GPL(dsa_unregister_switch); 788EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9c3eeb72462d..7d036696e8c4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -66,7 +66,7 @@ struct dsa_notifier_vlan_info {
66}; 66};
67 67
68struct dsa_slave_priv { 68struct dsa_slave_priv {
69 /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */ 69 /* Copy of CPU port xmit for faster access in slave transmit hot path */
70 struct sk_buff * (*xmit)(struct sk_buff *skb, 70 struct sk_buff * (*xmit)(struct sk_buff *skb,
71 struct net_device *dev); 71 struct net_device *dev);
72 72
@@ -79,7 +79,6 @@ struct dsa_slave_priv {
79 * The phylib phy_device pointer for the PHY connected 79 * The phylib phy_device pointer for the PHY connected
80 * to this port. 80 * to this port.
81 */ 81 */
82 struct phy_device *phy;
83 phy_interface_t phy_interface; 82 phy_interface_t phy_interface;
84 int old_link; 83 int old_link;
85 int old_pause; 84 int old_pause;
@@ -94,11 +93,7 @@ struct dsa_slave_priv {
94}; 93};
95 94
96/* dsa.c */ 95/* dsa.c */
97int dsa_cpu_dsa_setup(struct dsa_port *port);
98void dsa_cpu_dsa_destroy(struct dsa_port *dport);
99const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); 96const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
100int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
101void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
102bool dsa_schedule_work(struct work_struct *work); 97bool dsa_schedule_work(struct work_struct *work);
103 98
104/* legacy.c */ 99/* legacy.c */
@@ -112,10 +107,35 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
112 struct net_device *dev, 107 struct net_device *dev,
113 const unsigned char *addr, u16 vid); 108 const unsigned char *addr, u16 vid);
114 109
110/* master.c */
111int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
112void dsa_master_teardown(struct net_device *dev);
113
114static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
115 int device, int port)
116{
117 struct dsa_port *cpu_dp = dev->dsa_ptr;
118 struct dsa_switch_tree *dst = cpu_dp->dst;
119 struct dsa_switch *ds;
120
121 if (device < 0 || device >= DSA_MAX_SWITCHES)
122 return NULL;
123
124 ds = dst->ds[device];
125 if (!ds)
126 return NULL;
127
128 if (port < 0 || port >= ds->num_ports)
129 return NULL;
130
131 return ds->ports[port].slave;
132}
133
115/* port.c */ 134/* port.c */
116int dsa_port_set_state(struct dsa_port *dp, u8 state, 135int dsa_port_set_state(struct dsa_port *dp, u8 state,
117 struct switchdev_trans *trans); 136 struct switchdev_trans *trans);
118void dsa_port_set_state_now(struct dsa_port *dp, u8 state); 137int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
138void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy);
119int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); 139int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
120void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); 140void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
121int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, 141int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
@@ -126,33 +146,52 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
126 u16 vid); 146 u16 vid);
127int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, 147int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
128 u16 vid); 148 u16 vid);
129int dsa_port_mdb_add(struct dsa_port *dp, 149int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
150int dsa_port_mdb_add(const struct dsa_port *dp,
130 const struct switchdev_obj_port_mdb *mdb, 151 const struct switchdev_obj_port_mdb *mdb,
131 struct switchdev_trans *trans); 152 struct switchdev_trans *trans);
132int dsa_port_mdb_del(struct dsa_port *dp, 153int dsa_port_mdb_del(const struct dsa_port *dp,
133 const struct switchdev_obj_port_mdb *mdb); 154 const struct switchdev_obj_port_mdb *mdb);
134int dsa_port_vlan_add(struct dsa_port *dp, 155int dsa_port_vlan_add(struct dsa_port *dp,
135 const struct switchdev_obj_port_vlan *vlan, 156 const struct switchdev_obj_port_vlan *vlan,
136 struct switchdev_trans *trans); 157 struct switchdev_trans *trans);
137int dsa_port_vlan_del(struct dsa_port *dp, 158int dsa_port_vlan_del(struct dsa_port *dp,
138 const struct switchdev_obj_port_vlan *vlan); 159 const struct switchdev_obj_port_vlan *vlan);
160int dsa_port_fixed_link_register_of(struct dsa_port *dp);
161void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
162
139/* slave.c */ 163/* slave.c */
140extern const struct dsa_device_ops notag_netdev_ops; 164extern const struct dsa_device_ops notag_netdev_ops;
141void dsa_slave_mii_bus_init(struct dsa_switch *ds); 165void dsa_slave_mii_bus_init(struct dsa_switch *ds);
142void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops); 166int dsa_slave_create(struct dsa_port *dp);
143int dsa_slave_create(struct dsa_port *port, const char *name);
144void dsa_slave_destroy(struct net_device *slave_dev); 167void dsa_slave_destroy(struct net_device *slave_dev);
145int dsa_slave_suspend(struct net_device *slave_dev); 168int dsa_slave_suspend(struct net_device *slave_dev);
146int dsa_slave_resume(struct net_device *slave_dev); 169int dsa_slave_resume(struct net_device *slave_dev);
147int dsa_slave_register_notifier(void); 170int dsa_slave_register_notifier(void);
148void dsa_slave_unregister_notifier(void); 171void dsa_slave_unregister_notifier(void);
149 172
173static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
174{
175 struct dsa_slave_priv *p = netdev_priv(dev);
176
177 return p->dp;
178}
179
180static inline struct net_device *
181dsa_slave_to_master(const struct net_device *dev)
182{
183 struct dsa_port *dp = dsa_slave_to_port(dev);
184
185 return dp->cpu_dp->master;
186}
187
150/* switch.c */ 188/* switch.c */
151int dsa_switch_register_notifier(struct dsa_switch *ds); 189int dsa_switch_register_notifier(struct dsa_switch *ds);
152void dsa_switch_unregister_notifier(struct dsa_switch *ds); 190void dsa_switch_unregister_notifier(struct dsa_switch *ds);
153 191
154/* tag_brcm.c */ 192/* tag_brcm.c */
155extern const struct dsa_device_ops brcm_netdev_ops; 193extern const struct dsa_device_ops brcm_netdev_ops;
194extern const struct dsa_device_ops brcm_prepend_netdev_ops;
156 195
157/* tag_dsa.c */ 196/* tag_dsa.c */
158extern const struct dsa_device_ops dsa_netdev_ops; 197extern const struct dsa_device_ops dsa_netdev_ops;
@@ -175,14 +214,4 @@ extern const struct dsa_device_ops qca_netdev_ops;
175/* tag_trailer.c */ 214/* tag_trailer.c */
176extern const struct dsa_device_ops trailer_netdev_ops; 215extern const struct dsa_device_ops trailer_netdev_ops;
177 216
178static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
179{
180 return p->dp->cpu_dp->netdev;
181}
182
183static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst)
184{
185 return dst->cpu_dp;
186}
187
188#endif 217#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 91e6f7981d39..84611d7fcfa2 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
86 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) 86 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
87 continue; 87 continue;
88 88
89 ret = dsa_cpu_dsa_setup(&ds->ports[port]); 89 ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
90 if (ret) 90 if (ret)
91 return ret; 91 return ret;
92 } 92 }
@@ -101,6 +101,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
101 struct dsa_chip_data *cd = ds->cd; 101 struct dsa_chip_data *cd = ds->cd;
102 bool valid_name_found = false; 102 bool valid_name_found = false;
103 int index = ds->index; 103 int index = ds->index;
104 struct dsa_port *dp;
104 int i, ret; 105 int i, ret;
105 106
106 /* 107 /*
@@ -109,9 +110,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
109 for (i = 0; i < ds->num_ports; i++) { 110 for (i = 0; i < ds->num_ports; i++) {
110 char *name; 111 char *name;
111 112
113 dp = &ds->ports[i];
114
112 name = cd->port_names[i]; 115 name = cd->port_names[i];
113 if (name == NULL) 116 if (name == NULL)
114 continue; 117 continue;
118 dp->name = name;
115 119
116 if (!strcmp(name, "cpu")) { 120 if (!strcmp(name, "cpu")) {
117 if (dst->cpu_dp) { 121 if (dst->cpu_dp) {
@@ -120,12 +124,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
120 return -EINVAL; 124 return -EINVAL;
121 } 125 }
122 dst->cpu_dp = &ds->ports[i]; 126 dst->cpu_dp = &ds->ports[i];
123 dst->cpu_dp->netdev = master; 127 dst->cpu_dp->master = master;
124 ds->cpu_port_mask |= 1 << i; 128 dp->type = DSA_PORT_TYPE_CPU;
125 } else if (!strcmp(name, "dsa")) { 129 } else if (!strcmp(name, "dsa")) {
126 ds->dsa_port_mask |= 1 << i; 130 dp->type = DSA_PORT_TYPE_DSA;
127 } else { 131 } else {
128 ds->enabled_port_mask |= 1 << i; 132 dp->type = DSA_PORT_TYPE_USER;
129 } 133 }
130 valid_name_found = true; 134 valid_name_found = true;
131 } 135 }
@@ -136,7 +140,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
136 /* Make the built-in MII bus mask match the number of ports, 140 /* Make the built-in MII bus mask match the number of ports,
137 * switch drivers can override this later 141 * switch drivers can override this later
138 */ 142 */
139 ds->phys_mii_mask = ds->enabled_port_mask; 143 ds->phys_mii_mask |= dsa_user_ports(ds);
140 144
141 /* 145 /*
142 * If the CPU connects to this switch, set the switch tree 146 * If the CPU connects to this switch, set the switch tree
@@ -144,14 +148,19 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
144 * switch. 148 * switch.
145 */ 149 */
146 if (dst->cpu_dp->ds == ds) { 150 if (dst->cpu_dp->ds == ds) {
151 const struct dsa_device_ops *tag_ops;
147 enum dsa_tag_protocol tag_protocol; 152 enum dsa_tag_protocol tag_protocol;
148 153
149 tag_protocol = ops->get_tag_protocol(ds); 154 tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index);
150 dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); 155 tag_ops = dsa_resolve_tag_protocol(tag_protocol);
151 if (IS_ERR(dst->tag_ops)) 156 if (IS_ERR(tag_ops))
152 return PTR_ERR(dst->tag_ops); 157 return PTR_ERR(tag_ops);
158
159 dst->cpu_dp->tag_ops = tag_ops;
153 160
154 dst->rcv = dst->tag_ops->rcv; 161 /* Few copies for faster access in master receive hot path */
162 dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
163 dst->cpu_dp->dst = dst;
155 } 164 }
156 165
157 memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); 166 memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
@@ -167,12 +176,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
167 if (ret) 176 if (ret)
168 return ret; 177 return ret;
169 178
170 if (ops->set_addr) {
171 ret = ops->set_addr(ds, master->dev_addr);
172 if (ret < 0)
173 return ret;
174 }
175
176 if (!ds->slave_mii_bus && ops->phy_read) { 179 if (!ds->slave_mii_bus && ops->phy_read) {
177 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); 180 ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
178 if (!ds->slave_mii_bus) 181 if (!ds->slave_mii_bus)
@@ -191,10 +194,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
191 ds->ports[i].dn = cd->port_dn[i]; 194 ds->ports[i].dn = cd->port_dn[i];
192 ds->ports[i].cpu_dp = dst->cpu_dp; 195 ds->ports[i].cpu_dp = dst->cpu_dp;
193 196
194 if (!(ds->enabled_port_mask & (1 << i))) 197 if (dsa_is_user_port(ds, i))
195 continue; 198 continue;
196 199
197 ret = dsa_slave_create(&ds->ports[i], cd->port_names[i]); 200 ret = dsa_slave_create(&ds->ports[i]);
198 if (ret < 0) 201 if (ret < 0)
199 netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", 202 netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
200 index, i, cd->port_names[i], ret); 203 index, i, cd->port_names[i], ret);
@@ -206,10 +209,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
206 netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", 209 netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
207 index); 210 index);
208 211
209 ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp);
210 if (ret)
211 return ret;
212
213 return 0; 212 return 0;
214} 213}
215 214
@@ -263,24 +262,20 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
263 262
264 /* Destroy network devices for physical switch ports. */ 263 /* Destroy network devices for physical switch ports. */
265 for (port = 0; port < ds->num_ports; port++) { 264 for (port = 0; port < ds->num_ports; port++) {
266 if (!(ds->enabled_port_mask & (1 << port))) 265 if (!dsa_is_user_port(ds, port))
267 continue; 266 continue;
268 267
269 if (!ds->ports[port].netdev) 268 if (!ds->ports[port].slave)
270 continue; 269 continue;
271 270
272 dsa_slave_destroy(ds->ports[port].netdev); 271 dsa_slave_destroy(ds->ports[port].slave);
273 } 272 }
274 273
275 /* Disable configuration of the CPU and DSA ports */ 274 /* Disable configuration of the CPU and DSA ports */
276 for (port = 0; port < ds->num_ports; port++) { 275 for (port = 0; port < ds->num_ports; port++) {
277 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) 276 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
278 continue; 277 continue;
279 dsa_cpu_dsa_destroy(&ds->ports[port]); 278 dsa_port_fixed_link_unregister_of(&ds->ports[port]);
280
281 /* Clearing a bit which is not set does no harm */
282 ds->cpu_port_mask |= ~(1 << port);
283 ds->dsa_port_mask |= ~(1 << port);
284 } 279 }
285 280
286 if (ds->slave_mii_bus && ds->ops->phy_read) 281 if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -598,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
598 if (!configured) 593 if (!configured)
599 return -EPROBE_DEFER; 594 return -EPROBE_DEFER;
600 595
601 /* 596 return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp);
602 * If we use a tagging format that doesn't have an ethertype
603 * field, make sure that all packets from this point on get
604 * sent to the tag format's receive function.
605 */
606 wmb();
607 dev->dsa_ptr = dst;
608
609 return 0;
610} 597}
611 598
612static int dsa_probe(struct platform_device *pdev) 599static int dsa_probe(struct platform_device *pdev)
@@ -671,13 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
671{ 658{
672 int i; 659 int i;
673 660
674 dst->cpu_dp->netdev->dsa_ptr = NULL; 661 dsa_master_teardown(dst->cpu_dp->master);
675
676 /* If we used a tagging format that doesn't have an ethertype
677 * field, make sure that all packets from this point get sent
678 * without the tag and go through the regular receive path.
679 */
680 wmb();
681 662
682 for (i = 0; i < dst->pd->nr_chips; i++) { 663 for (i = 0; i < dst->pd->nr_chips; i++) {
683 struct dsa_switch *ds = dst->ds[i]; 664 struct dsa_switch *ds = dst->ds[i];
@@ -686,9 +667,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
686 dsa_switch_destroy(ds); 667 dsa_switch_destroy(ds);
687 } 668 }
688 669
689 dsa_cpu_port_ethtool_restore(dst->cpu_dp); 670 dev_put(dst->cpu_dp->master);
690
691 dev_put(dst->cpu_dp->netdev);
692} 671}
693 672
694static int dsa_remove(struct platform_device *pdev) 673static int dsa_remove(struct platform_device *pdev)
@@ -745,8 +724,7 @@ int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
745 const unsigned char *addr, u16 vid, 724 const unsigned char *addr, u16 vid,
746 u16 flags) 725 u16 flags)
747{ 726{
748 struct dsa_slave_priv *p = netdev_priv(dev); 727 struct dsa_port *dp = dsa_slave_to_port(dev);
749 struct dsa_port *dp = p->dp;
750 728
751 return dsa_port_fdb_add(dp, addr, vid); 729 return dsa_port_fdb_add(dp, addr, vid);
752} 730}
@@ -755,8 +733,7 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
755 struct net_device *dev, 733 struct net_device *dev,
756 const unsigned char *addr, u16 vid) 734 const unsigned char *addr, u16 vid)
757{ 735{
758 struct dsa_slave_priv *p = netdev_priv(dev); 736 struct dsa_port *dp = dsa_slave_to_port(dev);
759 struct dsa_port *dp = p->dp;
760 737
761 return dsa_port_fdb_del(dp, addr, vid); 738 return dsa_port_fdb_del(dp, addr, vid);
762} 739}
diff --git a/net/dsa/master.c b/net/dsa/master.c
new file mode 100644
index 000000000000..00589147f042
--- /dev/null
+++ b/net/dsa/master.c
@@ -0,0 +1,143 @@
1/*
2 * Handling of a master device, switching frames via its switch fabric CPU port
3 *
4 * Copyright (c) 2017 Savoir-faire Linux Inc.
5 * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 */
12
13#include "dsa_priv.h"
14
15static void dsa_master_get_ethtool_stats(struct net_device *dev,
16 struct ethtool_stats *stats,
17 uint64_t *data)
18{
19 struct dsa_port *cpu_dp = dev->dsa_ptr;
20 const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
21 struct dsa_switch *ds = cpu_dp->ds;
22 int port = cpu_dp->index;
23 int count = 0;
24
25 if (ops && ops->get_sset_count && ops->get_ethtool_stats) {
26 count = ops->get_sset_count(dev, ETH_SS_STATS);
27 ops->get_ethtool_stats(dev, stats, data);
28 }
29
30 if (ds->ops->get_ethtool_stats)
31 ds->ops->get_ethtool_stats(ds, port, data + count);
32}
33
34static int dsa_master_get_sset_count(struct net_device *dev, int sset)
35{
36 struct dsa_port *cpu_dp = dev->dsa_ptr;
37 const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
38 struct dsa_switch *ds = cpu_dp->ds;
39 int count = 0;
40
41 if (ops && ops->get_sset_count)
42 count += ops->get_sset_count(dev, sset);
43
44 if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
45 count += ds->ops->get_sset_count(ds);
46
47 return count;
48}
49
50static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
51 uint8_t *data)
52{
53 struct dsa_port *cpu_dp = dev->dsa_ptr;
54 const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
55 struct dsa_switch *ds = cpu_dp->ds;
56 int port = cpu_dp->index;
57 int len = ETH_GSTRING_LEN;
58 int mcount = 0, count;
59 unsigned int i;
60 uint8_t pfx[4];
61 uint8_t *ndata;
62
63 snprintf(pfx, sizeof(pfx), "p%.2d", port);
64 /* We do not want to be NULL-terminated, since this is a prefix */
65 pfx[sizeof(pfx) - 1] = '_';
66
67 if (ops && ops->get_sset_count && ops->get_strings) {
68 mcount = ops->get_sset_count(dev, ETH_SS_STATS);
69 ops->get_strings(dev, stringset, data);
70 }
71
72 if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
73 ndata = data + mcount * len;
74 /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
75 * the output after to prepend our CPU port prefix we
76 * constructed earlier
77 */
78 ds->ops->get_strings(ds, port, ndata);
79 count = ds->ops->get_sset_count(ds);
80 for (i = 0; i < count; i++) {
81 memmove(ndata + (i * len + sizeof(pfx)),
82 ndata + i * len, len - sizeof(pfx));
83 memcpy(ndata + i * len, pfx, sizeof(pfx));
84 }
85 }
86}
87
88static int dsa_master_ethtool_setup(struct net_device *dev)
89{
90 struct dsa_port *cpu_dp = dev->dsa_ptr;
91 struct dsa_switch *ds = cpu_dp->ds;
92 struct ethtool_ops *ops;
93
94 ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
95 if (!ops)
96 return -ENOMEM;
97
98 cpu_dp->orig_ethtool_ops = dev->ethtool_ops;
99 if (cpu_dp->orig_ethtool_ops)
100 memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
101
102 ops->get_sset_count = dsa_master_get_sset_count;
103 ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
104 ops->get_strings = dsa_master_get_strings;
105
106 dev->ethtool_ops = ops;
107
108 return 0;
109}
110
111static void dsa_master_ethtool_teardown(struct net_device *dev)
112{
113 struct dsa_port *cpu_dp = dev->dsa_ptr;
114
115 dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
116 cpu_dp->orig_ethtool_ops = NULL;
117}
118
119int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
120{
121 /* If we use a tagging format that doesn't have an ethertype
122 * field, make sure that all packets from this point on get
123 * sent to the tag format's receive function.
124 */
125 wmb();
126
127 dev->dsa_ptr = cpu_dp;
128
129 return dsa_master_ethtool_setup(dev);
130}
131
132void dsa_master_teardown(struct net_device *dev)
133{
134 dsa_master_ethtool_teardown(dev);
135
136 dev->dsa_ptr = NULL;
137
138 /* If we used a tagging format that doesn't have an ethertype
139 * field, make sure that all packets from this point get sent
140 * without the tag and go through the regular receive path.
141 */
142 wmb();
143}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 659676ba3f8b..bb4be2679904 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -12,10 +12,12 @@
12 12
13#include <linux/if_bridge.h> 13#include <linux/if_bridge.h>
14#include <linux/notifier.h> 14#include <linux/notifier.h>
15#include <linux/of_mdio.h>
16#include <linux/of_net.h>
15 17
16#include "dsa_priv.h" 18#include "dsa_priv.h"
17 19
18static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v) 20static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
19{ 21{
20 struct raw_notifier_head *nh = &dp->ds->dst->nh; 22 struct raw_notifier_head *nh = &dp->ds->dst->nh;
21 int err; 23 int err;
@@ -56,7 +58,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state,
56 return 0; 58 return 0;
57} 59}
58 60
59void dsa_port_set_state_now(struct dsa_port *dp, u8 state) 61static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
60{ 62{
61 int err; 63 int err;
62 64
@@ -65,6 +67,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
65 pr_err("DSA: failed to set STP state %u (%d)\n", state, err); 67 pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
66} 68}
67 69
70int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
71{
72 u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
73 struct dsa_switch *ds = dp->ds;
74 int port = dp->index;
75 int err;
76
77 if (ds->ops->port_enable) {
78 err = ds->ops->port_enable(ds, port, phy);
79 if (err)
80 return err;
81 }
82
83 dsa_port_set_state_now(dp, stp_state);
84
85 return 0;
86}
87
88void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
89{
90 struct dsa_switch *ds = dp->ds;
91 int port = dp->index;
92
93 dsa_port_set_state_now(dp, BR_STATE_DISABLED);
94
95 if (ds->ops->port_disable)
96 ds->ops->port_disable(ds, port, phy);
97}
98
68int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) 99int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
69{ 100{
70 struct dsa_notifier_bridge_info info = { 101 struct dsa_notifier_bridge_info info = {
@@ -173,7 +204,18 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
173 return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); 204 return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
174} 205}
175 206
176int dsa_port_mdb_add(struct dsa_port *dp, 207int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
208{
209 struct dsa_switch *ds = dp->ds;
210 int port = dp->index;
211
212 if (!ds->ops->port_fdb_dump)
213 return -EOPNOTSUPP;
214
215 return ds->ops->port_fdb_dump(ds, port, cb, data);
216}
217
218int dsa_port_mdb_add(const struct dsa_port *dp,
177 const struct switchdev_obj_port_mdb *mdb, 219 const struct switchdev_obj_port_mdb *mdb,
178 struct switchdev_trans *trans) 220 struct switchdev_trans *trans)
179{ 221{
@@ -187,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp,
187 return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); 229 return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
188} 230}
189 231
190int dsa_port_mdb_del(struct dsa_port *dp, 232int dsa_port_mdb_del(const struct dsa_port *dp,
191 const struct switchdev_obj_port_mdb *mdb) 233 const struct switchdev_obj_port_mdb *mdb)
192{ 234{
193 struct dsa_notifier_mdb_info info = { 235 struct dsa_notifier_mdb_info info = {
@@ -210,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp,
210 .vlan = vlan, 252 .vlan = vlan,
211 }; 253 };
212 254
213 return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); 255 if (br_vlan_enabled(dp->bridge_dev))
256 return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
257
258 return 0;
214} 259}
215 260
216int dsa_port_vlan_del(struct dsa_port *dp, 261int dsa_port_vlan_del(struct dsa_port *dp,
@@ -222,5 +267,53 @@ int dsa_port_vlan_del(struct dsa_port *dp,
222 .vlan = vlan, 267 .vlan = vlan,
223 }; 268 };
224 269
225 return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); 270 if (br_vlan_enabled(dp->bridge_dev))
271 return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
272
273 return 0;
274}
275
276int dsa_port_fixed_link_register_of(struct dsa_port *dp)
277{
278 struct device_node *dn = dp->dn;
279 struct dsa_switch *ds = dp->ds;
280 struct phy_device *phydev;
281 int port = dp->index;
282 int mode;
283 int err;
284
285 if (of_phy_is_fixed_link(dn)) {
286 err = of_phy_register_fixed_link(dn);
287 if (err) {
288 dev_err(ds->dev,
289 "failed to register the fixed PHY of port %d\n",
290 port);
291 return err;
292 }
293
294 phydev = of_phy_find_device(dn);
295
296 mode = of_get_phy_mode(dn);
297 if (mode < 0)
298 mode = PHY_INTERFACE_MODE_NA;
299 phydev->interface = mode;
300
301 genphy_config_init(phydev);
302 genphy_read_status(phydev);
303
304 if (ds->ops->adjust_link)
305 ds->ops->adjust_link(ds, port, phydev);
306
307 put_device(&phydev->mdio.dev);
308 }
309
310 return 0;
311}
312
313void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
314{
315 struct device_node *dn = dp->dn;
316
317 if (of_phy_is_fixed_link(dn))
318 of_phy_deregister_fixed_link(dn);
226} 319}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 865e29e62bad..d6e7a642493b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -55,7 +55,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
55 ds->slave_mii_bus->read = dsa_slave_phy_read; 55 ds->slave_mii_bus->read = dsa_slave_phy_read;
56 ds->slave_mii_bus->write = dsa_slave_phy_write; 56 ds->slave_mii_bus->write = dsa_slave_phy_write;
57 snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", 57 snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
58 ds->dst->tree, ds->index); 58 ds->dst->index, ds->index);
59 ds->slave_mii_bus->parent = ds->dev; 59 ds->slave_mii_bus->parent = ds->dev;
60 ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; 60 ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
61} 61}
@@ -64,18 +64,13 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
64/* slave device handling ****************************************************/ 64/* slave device handling ****************************************************/
65static int dsa_slave_get_iflink(const struct net_device *dev) 65static int dsa_slave_get_iflink(const struct net_device *dev)
66{ 66{
67 struct dsa_slave_priv *p = netdev_priv(dev); 67 return dsa_slave_to_master(dev)->ifindex;
68
69 return dsa_master_netdev(p)->ifindex;
70} 68}
71 69
72static int dsa_slave_open(struct net_device *dev) 70static int dsa_slave_open(struct net_device *dev)
73{ 71{
74 struct dsa_slave_priv *p = netdev_priv(dev); 72 struct net_device *master = dsa_slave_to_master(dev);
75 struct dsa_port *dp = p->dp; 73 struct dsa_port *dp = dsa_slave_to_port(dev);
76 struct dsa_switch *ds = dp->ds;
77 struct net_device *master = dsa_master_netdev(p);
78 u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
79 int err; 74 int err;
80 75
81 if (!(master->flags & IFF_UP)) 76 if (!(master->flags & IFF_UP))
@@ -98,16 +93,12 @@ static int dsa_slave_open(struct net_device *dev)
98 goto clear_allmulti; 93 goto clear_allmulti;
99 } 94 }
100 95
101 if (ds->ops->port_enable) { 96 err = dsa_port_enable(dp, dev->phydev);
102 err = ds->ops->port_enable(ds, p->dp->index, p->phy); 97 if (err)
103 if (err) 98 goto clear_promisc;
104 goto clear_promisc;
105 }
106
107 dsa_port_set_state_now(p->dp, stp_state);
108 99
109 if (p->phy) 100 if (dev->phydev)
110 phy_start(p->phy); 101 phy_start(dev->phydev);
111 102
112 return 0; 103 return 0;
113 104
@@ -126,12 +117,13 @@ out:
126 117
127static int dsa_slave_close(struct net_device *dev) 118static int dsa_slave_close(struct net_device *dev)
128{ 119{
129 struct dsa_slave_priv *p = netdev_priv(dev); 120 struct net_device *master = dsa_slave_to_master(dev);
130 struct net_device *master = dsa_master_netdev(p); 121 struct dsa_port *dp = dsa_slave_to_port(dev);
131 struct dsa_switch *ds = p->dp->ds; 122
123 if (dev->phydev)
124 phy_stop(dev->phydev);
132 125
133 if (p->phy) 126 dsa_port_disable(dp, dev->phydev);
134 phy_stop(p->phy);
135 127
136 dev_mc_unsync(master, dev); 128 dev_mc_unsync(master, dev);
137 dev_uc_unsync(master, dev); 129 dev_uc_unsync(master, dev);
@@ -143,18 +135,12 @@ static int dsa_slave_close(struct net_device *dev)
143 if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) 135 if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
144 dev_uc_del(master, dev->dev_addr); 136 dev_uc_del(master, dev->dev_addr);
145 137
146 if (ds->ops->port_disable)
147 ds->ops->port_disable(ds, p->dp->index, p->phy);
148
149 dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
150
151 return 0; 138 return 0;
152} 139}
153 140
154static void dsa_slave_change_rx_flags(struct net_device *dev, int change) 141static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
155{ 142{
156 struct dsa_slave_priv *p = netdev_priv(dev); 143 struct net_device *master = dsa_slave_to_master(dev);
157 struct net_device *master = dsa_master_netdev(p);
158 144
159 if (change & IFF_ALLMULTI) 145 if (change & IFF_ALLMULTI)
160 dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); 146 dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -164,8 +150,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
164 150
165static void dsa_slave_set_rx_mode(struct net_device *dev) 151static void dsa_slave_set_rx_mode(struct net_device *dev)
166{ 152{
167 struct dsa_slave_priv *p = netdev_priv(dev); 153 struct net_device *master = dsa_slave_to_master(dev);
168 struct net_device *master = dsa_master_netdev(p);
169 154
170 dev_mc_sync(master, dev); 155 dev_mc_sync(master, dev);
171 dev_uc_sync(master, dev); 156 dev_uc_sync(master, dev);
@@ -173,8 +158,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
173 158
174static int dsa_slave_set_mac_address(struct net_device *dev, void *a) 159static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
175{ 160{
176 struct dsa_slave_priv *p = netdev_priv(dev); 161 struct net_device *master = dsa_slave_to_master(dev);
177 struct net_device *master = dsa_master_netdev(p);
178 struct sockaddr *addr = a; 162 struct sockaddr *addr = a;
179 int err; 163 int err;
180 164
@@ -255,43 +239,34 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
255 struct net_device *dev, struct net_device *filter_dev, 239 struct net_device *dev, struct net_device *filter_dev,
256 int *idx) 240 int *idx)
257{ 241{
242 struct dsa_port *dp = dsa_slave_to_port(dev);
258 struct dsa_slave_dump_ctx dump = { 243 struct dsa_slave_dump_ctx dump = {
259 .dev = dev, 244 .dev = dev,
260 .skb = skb, 245 .skb = skb,
261 .cb = cb, 246 .cb = cb,
262 .idx = *idx, 247 .idx = *idx,
263 }; 248 };
264 struct dsa_slave_priv *p = netdev_priv(dev);
265 struct dsa_port *dp = p->dp;
266 struct dsa_switch *ds = dp->ds;
267 int err; 249 int err;
268 250
269 if (!ds->ops->port_fdb_dump) 251 err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump);
270 return -EOPNOTSUPP;
271
272 err = ds->ops->port_fdb_dump(ds, dp->index,
273 dsa_slave_port_fdb_do_dump,
274 &dump);
275 *idx = dump.idx; 252 *idx = dump.idx;
253
276 return err; 254 return err;
277} 255}
278 256
279static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 257static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
280{ 258{
281 struct dsa_slave_priv *p = netdev_priv(dev); 259 if (!dev->phydev)
282 260 return -ENODEV;
283 if (p->phy != NULL)
284 return phy_mii_ioctl(p->phy, ifr, cmd);
285 261
286 return -EOPNOTSUPP; 262 return phy_mii_ioctl(dev->phydev, ifr, cmd);
287} 263}
288 264
289static int dsa_slave_port_attr_set(struct net_device *dev, 265static int dsa_slave_port_attr_set(struct net_device *dev,
290 const struct switchdev_attr *attr, 266 const struct switchdev_attr *attr,
291 struct switchdev_trans *trans) 267 struct switchdev_trans *trans)
292{ 268{
293 struct dsa_slave_priv *p = netdev_priv(dev); 269 struct dsa_port *dp = dsa_slave_to_port(dev);
294 struct dsa_port *dp = p->dp;
295 int ret; 270 int ret;
296 271
297 switch (attr->id) { 272 switch (attr->id) {
@@ -317,8 +292,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
317 const struct switchdev_obj *obj, 292 const struct switchdev_obj *obj,
318 struct switchdev_trans *trans) 293 struct switchdev_trans *trans)
319{ 294{
320 struct dsa_slave_priv *p = netdev_priv(dev); 295 struct dsa_port *dp = dsa_slave_to_port(dev);
321 struct dsa_port *dp = p->dp;
322 int err; 296 int err;
323 297
324 /* For the prepare phase, ensure the full set of changes is feasable in 298 /* For the prepare phase, ensure the full set of changes is feasable in
@@ -330,6 +304,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
330 case SWITCHDEV_OBJ_ID_PORT_MDB: 304 case SWITCHDEV_OBJ_ID_PORT_MDB:
331 err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); 305 err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
332 break; 306 break;
307 case SWITCHDEV_OBJ_ID_HOST_MDB:
308 /* DSA can directly translate this to a normal MDB add,
309 * but on the CPU port.
310 */
311 err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj),
312 trans);
313 break;
333 case SWITCHDEV_OBJ_ID_PORT_VLAN: 314 case SWITCHDEV_OBJ_ID_PORT_VLAN:
334 err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), 315 err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
335 trans); 316 trans);
@@ -345,14 +326,19 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
345static int dsa_slave_port_obj_del(struct net_device *dev, 326static int dsa_slave_port_obj_del(struct net_device *dev,
346 const struct switchdev_obj *obj) 327 const struct switchdev_obj *obj)
347{ 328{
348 struct dsa_slave_priv *p = netdev_priv(dev); 329 struct dsa_port *dp = dsa_slave_to_port(dev);
349 struct dsa_port *dp = p->dp;
350 int err; 330 int err;
351 331
352 switch (obj->id) { 332 switch (obj->id) {
353 case SWITCHDEV_OBJ_ID_PORT_MDB: 333 case SWITCHDEV_OBJ_ID_PORT_MDB:
354 err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); 334 err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
355 break; 335 break;
336 case SWITCHDEV_OBJ_ID_HOST_MDB:
337 /* DSA can directly translate this to a normal MDB add,
338 * but on the CPU port.
339 */
340 err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
341 break;
356 case SWITCHDEV_OBJ_ID_PORT_VLAN: 342 case SWITCHDEV_OBJ_ID_PORT_VLAN:
357 err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); 343 err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
358 break; 344 break;
@@ -367,13 +353,14 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
367static int dsa_slave_port_attr_get(struct net_device *dev, 353static int dsa_slave_port_attr_get(struct net_device *dev,
368 struct switchdev_attr *attr) 354 struct switchdev_attr *attr)
369{ 355{
370 struct dsa_slave_priv *p = netdev_priv(dev); 356 struct dsa_port *dp = dsa_slave_to_port(dev);
371 struct dsa_switch *ds = p->dp->ds; 357 struct dsa_switch *ds = dp->ds;
358 struct dsa_switch_tree *dst = ds->dst;
372 359
373 switch (attr->id) { 360 switch (attr->id) {
374 case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: 361 case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
375 attr->u.ppid.id_len = sizeof(ds->index); 362 attr->u.ppid.id_len = sizeof(dst->index);
376 memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); 363 memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len);
377 break; 364 break;
378 case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: 365 case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
379 attr->u.brport_flags_support = 0; 366 attr->u.brport_flags_support = 0;
@@ -385,10 +372,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
385 return 0; 372 return 0;
386} 373}
387 374
388static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p, 375static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
389 struct sk_buff *skb) 376 struct sk_buff *skb)
390{ 377{
391#ifdef CONFIG_NET_POLL_CONTROLLER 378#ifdef CONFIG_NET_POLL_CONTROLLER
379 struct dsa_slave_priv *p = netdev_priv(dev);
380
392 if (p->netpoll) 381 if (p->netpoll)
393 netpoll_send_skb(p->netpoll, skb); 382 netpoll_send_skb(p->netpoll, skb);
394#else 383#else
@@ -422,43 +411,18 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
422 * tag to be successfully transmitted 411 * tag to be successfully transmitted
423 */ 412 */
424 if (unlikely(netpoll_tx_running(dev))) 413 if (unlikely(netpoll_tx_running(dev)))
425 return dsa_netpoll_send_skb(p, nskb); 414 return dsa_slave_netpoll_send_skb(dev, nskb);
426 415
427 /* Queue the SKB for transmission on the parent interface, but 416 /* Queue the SKB for transmission on the parent interface, but
428 * do not modify its EtherType 417 * do not modify its EtherType
429 */ 418 */
430 nskb->dev = dsa_master_netdev(p); 419 nskb->dev = dsa_slave_to_master(dev);
431 dev_queue_xmit(nskb); 420 dev_queue_xmit(nskb);
432 421
433 return NETDEV_TX_OK; 422 return NETDEV_TX_OK;
434} 423}
435 424
436/* ethtool operations *******************************************************/ 425/* ethtool operations *******************************************************/
437static int
438dsa_slave_get_link_ksettings(struct net_device *dev,
439 struct ethtool_link_ksettings *cmd)
440{
441 struct dsa_slave_priv *p = netdev_priv(dev);
442
443 if (!p->phy)
444 return -EOPNOTSUPP;
445
446 phy_ethtool_ksettings_get(p->phy, cmd);
447
448 return 0;
449}
450
451static int
452dsa_slave_set_link_ksettings(struct net_device *dev,
453 const struct ethtool_link_ksettings *cmd)
454{
455 struct dsa_slave_priv *p = netdev_priv(dev);
456
457 if (p->phy != NULL)
458 return phy_ethtool_ksettings_set(p->phy, cmd);
459
460 return -EOPNOTSUPP;
461}
462 426
463static void dsa_slave_get_drvinfo(struct net_device *dev, 427static void dsa_slave_get_drvinfo(struct net_device *dev,
464 struct ethtool_drvinfo *drvinfo) 428 struct ethtool_drvinfo *drvinfo)
@@ -470,11 +434,11 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
470 434
471static int dsa_slave_get_regs_len(struct net_device *dev) 435static int dsa_slave_get_regs_len(struct net_device *dev)
472{ 436{
473 struct dsa_slave_priv *p = netdev_priv(dev); 437 struct dsa_port *dp = dsa_slave_to_port(dev);
474 struct dsa_switch *ds = p->dp->ds; 438 struct dsa_switch *ds = dp->ds;
475 439
476 if (ds->ops->get_regs_len) 440 if (ds->ops->get_regs_len)
477 return ds->ops->get_regs_len(ds, p->dp->index); 441 return ds->ops->get_regs_len(ds, dp->index);
478 442
479 return -EOPNOTSUPP; 443 return -EOPNOTSUPP;
480} 444}
@@ -482,39 +446,27 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
482static void 446static void
483dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) 447dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
484{ 448{
485 struct dsa_slave_priv *p = netdev_priv(dev); 449 struct dsa_port *dp = dsa_slave_to_port(dev);
486 struct dsa_switch *ds = p->dp->ds; 450 struct dsa_switch *ds = dp->ds;
487 451
488 if (ds->ops->get_regs) 452 if (ds->ops->get_regs)
489 ds->ops->get_regs(ds, p->dp->index, regs, _p); 453 ds->ops->get_regs(ds, dp->index, regs, _p);
490}
491
492static int dsa_slave_nway_reset(struct net_device *dev)
493{
494 struct dsa_slave_priv *p = netdev_priv(dev);
495
496 if (p->phy != NULL)
497 return genphy_restart_aneg(p->phy);
498
499 return -EOPNOTSUPP;
500} 454}
501 455
502static u32 dsa_slave_get_link(struct net_device *dev) 456static u32 dsa_slave_get_link(struct net_device *dev)
503{ 457{
504 struct dsa_slave_priv *p = netdev_priv(dev); 458 if (!dev->phydev)
459 return -ENODEV;
505 460
506 if (p->phy != NULL) { 461 genphy_update_link(dev->phydev);
507 genphy_update_link(p->phy);
508 return p->phy->link;
509 }
510 462
511 return -EOPNOTSUPP; 463 return dev->phydev->link;
512} 464}
513 465
514static int dsa_slave_get_eeprom_len(struct net_device *dev) 466static int dsa_slave_get_eeprom_len(struct net_device *dev)
515{ 467{
516 struct dsa_slave_priv *p = netdev_priv(dev); 468 struct dsa_port *dp = dsa_slave_to_port(dev);
517 struct dsa_switch *ds = p->dp->ds; 469 struct dsa_switch *ds = dp->ds;
518 470
519 if (ds->cd && ds->cd->eeprom_len) 471 if (ds->cd && ds->cd->eeprom_len)
520 return ds->cd->eeprom_len; 472 return ds->cd->eeprom_len;
@@ -528,8 +480,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
528static int dsa_slave_get_eeprom(struct net_device *dev, 480static int dsa_slave_get_eeprom(struct net_device *dev,
529 struct ethtool_eeprom *eeprom, u8 *data) 481 struct ethtool_eeprom *eeprom, u8 *data)
530{ 482{
531 struct dsa_slave_priv *p = netdev_priv(dev); 483 struct dsa_port *dp = dsa_slave_to_port(dev);
532 struct dsa_switch *ds = p->dp->ds; 484 struct dsa_switch *ds = dp->ds;
533 485
534 if (ds->ops->get_eeprom) 486 if (ds->ops->get_eeprom)
535 return ds->ops->get_eeprom(ds, eeprom, data); 487 return ds->ops->get_eeprom(ds, eeprom, data);
@@ -540,8 +492,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
540static int dsa_slave_set_eeprom(struct net_device *dev, 492static int dsa_slave_set_eeprom(struct net_device *dev,
541 struct ethtool_eeprom *eeprom, u8 *data) 493 struct ethtool_eeprom *eeprom, u8 *data)
542{ 494{
543 struct dsa_slave_priv *p = netdev_priv(dev); 495 struct dsa_port *dp = dsa_slave_to_port(dev);
544 struct dsa_switch *ds = p->dp->ds; 496 struct dsa_switch *ds = dp->ds;
545 497
546 if (ds->ops->set_eeprom) 498 if (ds->ops->set_eeprom)
547 return ds->ops->set_eeprom(ds, eeprom, data); 499 return ds->ops->set_eeprom(ds, eeprom, data);
@@ -552,8 +504,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
552static void dsa_slave_get_strings(struct net_device *dev, 504static void dsa_slave_get_strings(struct net_device *dev,
553 uint32_t stringset, uint8_t *data) 505 uint32_t stringset, uint8_t *data)
554{ 506{
555 struct dsa_slave_priv *p = netdev_priv(dev); 507 struct dsa_port *dp = dsa_slave_to_port(dev);
556 struct dsa_switch *ds = p->dp->ds; 508 struct dsa_switch *ds = dp->ds;
557 509
558 if (stringset == ETH_SS_STATS) { 510 if (stringset == ETH_SS_STATS) {
559 int len = ETH_GSTRING_LEN; 511 int len = ETH_GSTRING_LEN;
@@ -563,80 +515,7 @@ static void dsa_slave_get_strings(struct net_device *dev,
563 strncpy(data + 2 * len, "rx_packets", len); 515 strncpy(data + 2 * len, "rx_packets", len);
564 strncpy(data + 3 * len, "rx_bytes", len); 516 strncpy(data + 3 * len, "rx_bytes", len);
565 if (ds->ops->get_strings) 517 if (ds->ops->get_strings)
566 ds->ops->get_strings(ds, p->dp->index, data + 4 * len); 518 ds->ops->get_strings(ds, dp->index, data + 4 * len);
567 }
568}
569
570static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
571 struct ethtool_stats *stats,
572 uint64_t *data)
573{
574 struct dsa_switch_tree *dst = dev->dsa_ptr;
575 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
576 struct dsa_switch *ds = cpu_dp->ds;
577 s8 cpu_port = cpu_dp->index;
578 int count = 0;
579
580 if (cpu_dp->ethtool_ops.get_sset_count) {
581 count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
582 cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data);
583 }
584
585 if (ds->ops->get_ethtool_stats)
586 ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
587}
588
589static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
590{
591 struct dsa_switch_tree *dst = dev->dsa_ptr;
592 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
593 struct dsa_switch *ds = cpu_dp->ds;
594 int count = 0;
595
596 if (cpu_dp->ethtool_ops.get_sset_count)
597 count += cpu_dp->ethtool_ops.get_sset_count(dev, sset);
598
599 if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
600 count += ds->ops->get_sset_count(ds);
601
602 return count;
603}
604
605static void dsa_cpu_port_get_strings(struct net_device *dev,
606 uint32_t stringset, uint8_t *data)
607{
608 struct dsa_switch_tree *dst = dev->dsa_ptr;
609 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
610 struct dsa_switch *ds = cpu_dp->ds;
611 s8 cpu_port = cpu_dp->index;
612 int len = ETH_GSTRING_LEN;
613 int mcount = 0, count;
614 unsigned int i;
615 uint8_t pfx[4];
616 uint8_t *ndata;
617
618 snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port);
619 /* We do not want to be NULL-terminated, since this is a prefix */
620 pfx[sizeof(pfx) - 1] = '_';
621
622 if (cpu_dp->ethtool_ops.get_sset_count) {
623 mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
624 cpu_dp->ethtool_ops.get_strings(dev, stringset, data);
625 }
626
627 if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
628 ndata = data + mcount * len;
629 /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
630 * the output after to prepend our CPU port prefix we
631 * constructed earlier
632 */
633 ds->ops->get_strings(ds, cpu_port, ndata);
634 count = ds->ops->get_sset_count(ds);
635 for (i = 0; i < count; i++) {
636 memmove(ndata + (i * len + sizeof(pfx)),
637 ndata + i * len, len - sizeof(pfx));
638 memcpy(ndata + i * len, pfx, sizeof(pfx));
639 }
640 } 519 }
641} 520}
642 521
@@ -644,8 +523,9 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
644 struct ethtool_stats *stats, 523 struct ethtool_stats *stats,
645 uint64_t *data) 524 uint64_t *data)
646{ 525{
526 struct dsa_port *dp = dsa_slave_to_port(dev);
647 struct dsa_slave_priv *p = netdev_priv(dev); 527 struct dsa_slave_priv *p = netdev_priv(dev);
648 struct dsa_switch *ds = p->dp->ds; 528 struct dsa_switch *ds = dp->ds;
649 struct pcpu_sw_netstats *s; 529 struct pcpu_sw_netstats *s;
650 unsigned int start; 530 unsigned int start;
651 int i; 531 int i;
@@ -667,13 +547,13 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
667 data[3] += rx_bytes; 547 data[3] += rx_bytes;
668 } 548 }
669 if (ds->ops->get_ethtool_stats) 549 if (ds->ops->get_ethtool_stats)
670 ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4); 550 ds->ops->get_ethtool_stats(ds, dp->index, data + 4);
671} 551}
672 552
673static int dsa_slave_get_sset_count(struct net_device *dev, int sset) 553static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
674{ 554{
675 struct dsa_slave_priv *p = netdev_priv(dev); 555 struct dsa_port *dp = dsa_slave_to_port(dev);
676 struct dsa_switch *ds = p->dp->ds; 556 struct dsa_switch *ds = dp->ds;
677 557
678 if (sset == ETH_SS_STATS) { 558 if (sset == ETH_SS_STATS) {
679 int count; 559 int count;
@@ -690,77 +570,77 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
690 570
691static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) 571static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
692{ 572{
693 struct dsa_slave_priv *p = netdev_priv(dev); 573 struct dsa_port *dp = dsa_slave_to_port(dev);
694 struct dsa_switch *ds = p->dp->ds; 574 struct dsa_switch *ds = dp->ds;
695 575
696 if (ds->ops->get_wol) 576 if (ds->ops->get_wol)
697 ds->ops->get_wol(ds, p->dp->index, w); 577 ds->ops->get_wol(ds, dp->index, w);
698} 578}
699 579
700static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) 580static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
701{ 581{
702 struct dsa_slave_priv *p = netdev_priv(dev); 582 struct dsa_port *dp = dsa_slave_to_port(dev);
703 struct dsa_switch *ds = p->dp->ds; 583 struct dsa_switch *ds = dp->ds;
704 int ret = -EOPNOTSUPP; 584 int ret = -EOPNOTSUPP;
705 585
706 if (ds->ops->set_wol) 586 if (ds->ops->set_wol)
707 ret = ds->ops->set_wol(ds, p->dp->index, w); 587 ret = ds->ops->set_wol(ds, dp->index, w);
708 588
709 return ret; 589 return ret;
710} 590}
711 591
712static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) 592static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
713{ 593{
714 struct dsa_slave_priv *p = netdev_priv(dev); 594 struct dsa_port *dp = dsa_slave_to_port(dev);
715 struct dsa_switch *ds = p->dp->ds; 595 struct dsa_switch *ds = dp->ds;
716 int ret; 596 int ret;
717 597
718 /* Port's PHY and MAC both need to be EEE capable */ 598 /* Port's PHY and MAC both need to be EEE capable */
719 if (!p->phy) 599 if (!dev->phydev)
720 return -ENODEV; 600 return -ENODEV;
721 601
722 if (!ds->ops->set_mac_eee) 602 if (!ds->ops->set_mac_eee)
723 return -EOPNOTSUPP; 603 return -EOPNOTSUPP;
724 604
725 ret = ds->ops->set_mac_eee(ds, p->dp->index, e); 605 ret = ds->ops->set_mac_eee(ds, dp->index, e);
726 if (ret) 606 if (ret)
727 return ret; 607 return ret;
728 608
729 if (e->eee_enabled) { 609 if (e->eee_enabled) {
730 ret = phy_init_eee(p->phy, 0); 610 ret = phy_init_eee(dev->phydev, 0);
731 if (ret) 611 if (ret)
732 return ret; 612 return ret;
733 } 613 }
734 614
735 return phy_ethtool_set_eee(p->phy, e); 615 return phy_ethtool_set_eee(dev->phydev, e);
736} 616}
737 617
738static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) 618static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
739{ 619{
740 struct dsa_slave_priv *p = netdev_priv(dev); 620 struct dsa_port *dp = dsa_slave_to_port(dev);
741 struct dsa_switch *ds = p->dp->ds; 621 struct dsa_switch *ds = dp->ds;
742 int ret; 622 int ret;
743 623
744 /* Port's PHY and MAC both need to be EEE capable */ 624 /* Port's PHY and MAC both need to be EEE capable */
745 if (!p->phy) 625 if (!dev->phydev)
746 return -ENODEV; 626 return -ENODEV;
747 627
748 if (!ds->ops->get_mac_eee) 628 if (!ds->ops->get_mac_eee)
749 return -EOPNOTSUPP; 629 return -EOPNOTSUPP;
750 630
751 ret = ds->ops->get_mac_eee(ds, p->dp->index, e); 631 ret = ds->ops->get_mac_eee(ds, dp->index, e);
752 if (ret) 632 if (ret)
753 return ret; 633 return ret;
754 634
755 return phy_ethtool_get_eee(p->phy, e); 635 return phy_ethtool_get_eee(dev->phydev, e);
756} 636}
757 637
758#ifdef CONFIG_NET_POLL_CONTROLLER 638#ifdef CONFIG_NET_POLL_CONTROLLER
759static int dsa_slave_netpoll_setup(struct net_device *dev, 639static int dsa_slave_netpoll_setup(struct net_device *dev,
760 struct netpoll_info *ni) 640 struct netpoll_info *ni)
761{ 641{
642 struct net_device *master = dsa_slave_to_master(dev);
762 struct dsa_slave_priv *p = netdev_priv(dev); 643 struct dsa_slave_priv *p = netdev_priv(dev);
763 struct net_device *master = dsa_master_netdev(p);
764 struct netpoll *netpoll; 644 struct netpoll *netpoll;
765 int err = 0; 645 int err = 0;
766 646
@@ -800,18 +680,18 @@ static void dsa_slave_poll_controller(struct net_device *dev)
800static int dsa_slave_get_phys_port_name(struct net_device *dev, 680static int dsa_slave_get_phys_port_name(struct net_device *dev,
801 char *name, size_t len) 681 char *name, size_t len)
802{ 682{
803 struct dsa_slave_priv *p = netdev_priv(dev); 683 struct dsa_port *dp = dsa_slave_to_port(dev);
804 684
805 if (snprintf(name, len, "p%d", p->dp->index) >= len) 685 if (snprintf(name, len, "p%d", dp->index) >= len)
806 return -EINVAL; 686 return -EINVAL;
807 687
808 return 0; 688 return 0;
809} 689}
810 690
811static struct dsa_mall_tc_entry * 691static struct dsa_mall_tc_entry *
812dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p, 692dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
813 unsigned long cookie)
814{ 693{
694 struct dsa_slave_priv *p = netdev_priv(dev);
815 struct dsa_mall_tc_entry *mall_tc_entry; 695 struct dsa_mall_tc_entry *mall_tc_entry;
816 696
817 list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) 697 list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list)
@@ -825,14 +705,15 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
825 struct tc_cls_matchall_offload *cls, 705 struct tc_cls_matchall_offload *cls,
826 bool ingress) 706 bool ingress)
827{ 707{
708 struct dsa_port *dp = dsa_slave_to_port(dev);
828 struct dsa_slave_priv *p = netdev_priv(dev); 709 struct dsa_slave_priv *p = netdev_priv(dev);
829 struct dsa_mall_tc_entry *mall_tc_entry; 710 struct dsa_mall_tc_entry *mall_tc_entry;
830 __be16 protocol = cls->common.protocol; 711 __be16 protocol = cls->common.protocol;
831 struct dsa_switch *ds = p->dp->ds;
832 struct net *net = dev_net(dev); 712 struct net *net = dev_net(dev);
833 struct dsa_slave_priv *to_p; 713 struct dsa_switch *ds = dp->ds;
834 struct net_device *to_dev; 714 struct net_device *to_dev;
835 const struct tc_action *a; 715 const struct tc_action *a;
716 struct dsa_port *to_dp;
836 int err = -EOPNOTSUPP; 717 int err = -EOPNOTSUPP;
837 LIST_HEAD(actions); 718 LIST_HEAD(actions);
838 int ifindex; 719 int ifindex;
@@ -865,13 +746,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
865 mall_tc_entry->type = DSA_PORT_MALL_MIRROR; 746 mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
866 mirror = &mall_tc_entry->mirror; 747 mirror = &mall_tc_entry->mirror;
867 748
868 to_p = netdev_priv(to_dev); 749 to_dp = dsa_slave_to_port(to_dev);
869 750
870 mirror->to_local_port = to_p->dp->index; 751 mirror->to_local_port = to_dp->index;
871 mirror->ingress = ingress; 752 mirror->ingress = ingress;
872 753
873 err = ds->ops->port_mirror_add(ds, p->dp->index, mirror, 754 err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress);
874 ingress);
875 if (err) { 755 if (err) {
876 kfree(mall_tc_entry); 756 kfree(mall_tc_entry);
877 return err; 757 return err;
@@ -886,14 +766,14 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
886static void dsa_slave_del_cls_matchall(struct net_device *dev, 766static void dsa_slave_del_cls_matchall(struct net_device *dev,
887 struct tc_cls_matchall_offload *cls) 767 struct tc_cls_matchall_offload *cls)
888{ 768{
889 struct dsa_slave_priv *p = netdev_priv(dev); 769 struct dsa_port *dp = dsa_slave_to_port(dev);
890 struct dsa_mall_tc_entry *mall_tc_entry; 770 struct dsa_mall_tc_entry *mall_tc_entry;
891 struct dsa_switch *ds = p->dp->ds; 771 struct dsa_switch *ds = dp->ds;
892 772
893 if (!ds->ops->port_mirror_del) 773 if (!ds->ops->port_mirror_del)
894 return; 774 return;
895 775
896 mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie); 776 mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie);
897 if (!mall_tc_entry) 777 if (!mall_tc_entry)
898 return; 778 return;
899 779
@@ -901,8 +781,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
901 781
902 switch (mall_tc_entry->type) { 782 switch (mall_tc_entry->type) {
903 case DSA_PORT_MALL_MIRROR: 783 case DSA_PORT_MALL_MIRROR:
904 ds->ops->port_mirror_del(ds, p->dp->index, 784 ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror);
905 &mall_tc_entry->mirror);
906 break; 785 break;
907 default: 786 default:
908 WARN_ON(1); 787 WARN_ON(1);
@@ -912,17 +791,9 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
912} 791}
913 792
914static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, 793static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
915 struct tc_cls_matchall_offload *cls) 794 struct tc_cls_matchall_offload *cls,
795 bool ingress)
916{ 796{
917 bool ingress;
918
919 if (is_classid_clsact_ingress(cls->common.classid))
920 ingress = true;
921 else if (is_classid_clsact_egress(cls->common.classid))
922 ingress = false;
923 else
924 return -EOPNOTSUPP;
925
926 if (cls->common.chain_index) 797 if (cls->common.chain_index)
927 return -EOPNOTSUPP; 798 return -EOPNOTSUPP;
928 799
@@ -937,12 +808,63 @@ static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
937 } 808 }
938} 809}
939 810
811static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
812 void *cb_priv, bool ingress)
813{
814 struct net_device *dev = cb_priv;
815
816 if (!tc_can_offload(dev))
817 return -EOPNOTSUPP;
818
819 switch (type) {
820 case TC_SETUP_CLSMATCHALL:
821 return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress);
822 default:
823 return -EOPNOTSUPP;
824 }
825}
826
827static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type,
828 void *type_data, void *cb_priv)
829{
830 return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true);
831}
832
833static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type,
834 void *type_data, void *cb_priv)
835{
836 return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false);
837}
838
839static int dsa_slave_setup_tc_block(struct net_device *dev,
840 struct tc_block_offload *f)
841{
842 tc_setup_cb_t *cb;
843
844 if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
845 cb = dsa_slave_setup_tc_block_cb_ig;
846 else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
847 cb = dsa_slave_setup_tc_block_cb_eg;
848 else
849 return -EOPNOTSUPP;
850
851 switch (f->command) {
852 case TC_BLOCK_BIND:
853 return tcf_block_cb_register(f->block, cb, dev, dev);
854 case TC_BLOCK_UNBIND:
855 tcf_block_cb_unregister(f->block, cb, dev);
856 return 0;
857 default:
858 return -EOPNOTSUPP;
859 }
860}
861
940static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, 862static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
941 void *type_data) 863 void *type_data)
942{ 864{
943 switch (type) { 865 switch (type) {
944 case TC_SETUP_CLSMATCHALL: 866 case TC_SETUP_BLOCK:
945 return dsa_slave_setup_tc_cls_matchall(dev, type_data); 867 return dsa_slave_setup_tc_block(dev, type_data);
946 default: 868 default:
947 return -EOPNOTSUPP; 869 return -EOPNOTSUPP;
948 } 870 }
@@ -976,42 +898,35 @@ static void dsa_slave_get_stats64(struct net_device *dev,
976 } 898 }
977} 899}
978 900
979void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
980{
981 ops->get_sset_count = dsa_cpu_port_get_sset_count;
982 ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
983 ops->get_strings = dsa_cpu_port_get_strings;
984}
985
986static int dsa_slave_get_rxnfc(struct net_device *dev, 901static int dsa_slave_get_rxnfc(struct net_device *dev,
987 struct ethtool_rxnfc *nfc, u32 *rule_locs) 902 struct ethtool_rxnfc *nfc, u32 *rule_locs)
988{ 903{
989 struct dsa_slave_priv *p = netdev_priv(dev); 904 struct dsa_port *dp = dsa_slave_to_port(dev);
990 struct dsa_switch *ds = p->dp->ds; 905 struct dsa_switch *ds = dp->ds;
991 906
992 if (!ds->ops->get_rxnfc) 907 if (!ds->ops->get_rxnfc)
993 return -EOPNOTSUPP; 908 return -EOPNOTSUPP;
994 909
995 return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs); 910 return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs);
996} 911}
997 912
998static int dsa_slave_set_rxnfc(struct net_device *dev, 913static int dsa_slave_set_rxnfc(struct net_device *dev,
999 struct ethtool_rxnfc *nfc) 914 struct ethtool_rxnfc *nfc)
1000{ 915{
1001 struct dsa_slave_priv *p = netdev_priv(dev); 916 struct dsa_port *dp = dsa_slave_to_port(dev);
1002 struct dsa_switch *ds = p->dp->ds; 917 struct dsa_switch *ds = dp->ds;
1003 918
1004 if (!ds->ops->set_rxnfc) 919 if (!ds->ops->set_rxnfc)
1005 return -EOPNOTSUPP; 920 return -EOPNOTSUPP;
1006 921
1007 return ds->ops->set_rxnfc(ds, p->dp->index, nfc); 922 return ds->ops->set_rxnfc(ds, dp->index, nfc);
1008} 923}
1009 924
1010static const struct ethtool_ops dsa_slave_ethtool_ops = { 925static const struct ethtool_ops dsa_slave_ethtool_ops = {
1011 .get_drvinfo = dsa_slave_get_drvinfo, 926 .get_drvinfo = dsa_slave_get_drvinfo,
1012 .get_regs_len = dsa_slave_get_regs_len, 927 .get_regs_len = dsa_slave_get_regs_len,
1013 .get_regs = dsa_slave_get_regs, 928 .get_regs = dsa_slave_get_regs,
1014 .nway_reset = dsa_slave_nway_reset, 929 .nway_reset = phy_ethtool_nway_reset,
1015 .get_link = dsa_slave_get_link, 930 .get_link = dsa_slave_get_link,
1016 .get_eeprom_len = dsa_slave_get_eeprom_len, 931 .get_eeprom_len = dsa_slave_get_eeprom_len,
1017 .get_eeprom = dsa_slave_get_eeprom, 932 .get_eeprom = dsa_slave_get_eeprom,
@@ -1023,8 +938,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
1023 .get_wol = dsa_slave_get_wol, 938 .get_wol = dsa_slave_get_wol,
1024 .set_eee = dsa_slave_set_eee, 939 .set_eee = dsa_slave_set_eee,
1025 .get_eee = dsa_slave_get_eee, 940 .get_eee = dsa_slave_get_eee,
1026 .get_link_ksettings = dsa_slave_get_link_ksettings, 941 .get_link_ksettings = phy_ethtool_get_link_ksettings,
1027 .set_link_ksettings = dsa_slave_set_link_ksettings, 942 .set_link_ksettings = phy_ethtool_set_link_ksettings,
1028 .get_rxnfc = dsa_slave_get_rxnfc, 943 .get_rxnfc = dsa_slave_get_rxnfc,
1029 .set_rxnfc = dsa_slave_set_rxnfc, 944 .set_rxnfc = dsa_slave_set_rxnfc,
1030}; 945};
@@ -1064,78 +979,81 @@ static struct device_type dsa_type = {
1064 979
1065static void dsa_slave_adjust_link(struct net_device *dev) 980static void dsa_slave_adjust_link(struct net_device *dev)
1066{ 981{
982 struct dsa_port *dp = dsa_slave_to_port(dev);
1067 struct dsa_slave_priv *p = netdev_priv(dev); 983 struct dsa_slave_priv *p = netdev_priv(dev);
1068 struct dsa_switch *ds = p->dp->ds; 984 struct dsa_switch *ds = dp->ds;
1069 unsigned int status_changed = 0; 985 unsigned int status_changed = 0;
1070 986
1071 if (p->old_link != p->phy->link) { 987 if (p->old_link != dev->phydev->link) {
1072 status_changed = 1; 988 status_changed = 1;
1073 p->old_link = p->phy->link; 989 p->old_link = dev->phydev->link;
1074 } 990 }
1075 991
1076 if (p->old_duplex != p->phy->duplex) { 992 if (p->old_duplex != dev->phydev->duplex) {
1077 status_changed = 1; 993 status_changed = 1;
1078 p->old_duplex = p->phy->duplex; 994 p->old_duplex = dev->phydev->duplex;
1079 } 995 }
1080 996
1081 if (p->old_pause != p->phy->pause) { 997 if (p->old_pause != dev->phydev->pause) {
1082 status_changed = 1; 998 status_changed = 1;
1083 p->old_pause = p->phy->pause; 999 p->old_pause = dev->phydev->pause;
1084 } 1000 }
1085 1001
1086 if (ds->ops->adjust_link && status_changed) 1002 if (ds->ops->adjust_link && status_changed)
1087 ds->ops->adjust_link(ds, p->dp->index, p->phy); 1003 ds->ops->adjust_link(ds, dp->index, dev->phydev);
1088 1004
1089 if (status_changed) 1005 if (status_changed)
1090 phy_print_status(p->phy); 1006 phy_print_status(dev->phydev);
1091} 1007}
1092 1008
1093static int dsa_slave_fixed_link_update(struct net_device *dev, 1009static int dsa_slave_fixed_link_update(struct net_device *dev,
1094 struct fixed_phy_status *status) 1010 struct fixed_phy_status *status)
1095{ 1011{
1096 struct dsa_slave_priv *p;
1097 struct dsa_switch *ds; 1012 struct dsa_switch *ds;
1013 struct dsa_port *dp;
1098 1014
1099 if (dev) { 1015 if (dev) {
1100 p = netdev_priv(dev); 1016 dp = dsa_slave_to_port(dev);
1101 ds = p->dp->ds; 1017 ds = dp->ds;
1102 if (ds->ops->fixed_link_update) 1018 if (ds->ops->fixed_link_update)
1103 ds->ops->fixed_link_update(ds, p->dp->index, status); 1019 ds->ops->fixed_link_update(ds, dp->index, status);
1104 } 1020 }
1105 1021
1106 return 0; 1022 return 0;
1107} 1023}
1108 1024
1109/* slave device setup *******************************************************/ 1025/* slave device setup *******************************************************/
1110static int dsa_slave_phy_connect(struct dsa_slave_priv *p, 1026static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
1111 struct net_device *slave_dev,
1112 int addr)
1113{ 1027{
1114 struct dsa_switch *ds = p->dp->ds; 1028 struct dsa_port *dp = dsa_slave_to_port(slave_dev);
1029 struct dsa_slave_priv *p = netdev_priv(slave_dev);
1030 struct dsa_switch *ds = dp->ds;
1115 1031
1116 p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); 1032 slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
1117 if (!p->phy) { 1033 if (!slave_dev->phydev) {
1118 netdev_err(slave_dev, "no phy at %d\n", addr); 1034 netdev_err(slave_dev, "no phy at %d\n", addr);
1119 return -ENODEV; 1035 return -ENODEV;
1120 } 1036 }
1121 1037
1122 /* Use already configured phy mode */ 1038 /* Use already configured phy mode */
1123 if (p->phy_interface == PHY_INTERFACE_MODE_NA) 1039 if (p->phy_interface == PHY_INTERFACE_MODE_NA)
1124 p->phy_interface = p->phy->interface; 1040 p->phy_interface = slave_dev->phydev->interface;
1125 return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, 1041
1126 p->phy_interface); 1042 return phy_connect_direct(slave_dev, slave_dev->phydev,
1043 dsa_slave_adjust_link, p->phy_interface);
1127} 1044}
1128 1045
1129static int dsa_slave_phy_setup(struct dsa_slave_priv *p, 1046static int dsa_slave_phy_setup(struct net_device *slave_dev)
1130 struct net_device *slave_dev)
1131{ 1047{
1132 struct dsa_switch *ds = p->dp->ds; 1048 struct dsa_port *dp = dsa_slave_to_port(slave_dev);
1133 struct device_node *phy_dn, *port_dn; 1049 struct dsa_slave_priv *p = netdev_priv(slave_dev);
1050 struct device_node *port_dn = dp->dn;
1051 struct dsa_switch *ds = dp->ds;
1052 struct device_node *phy_dn;
1134 bool phy_is_fixed = false; 1053 bool phy_is_fixed = false;
1135 u32 phy_flags = 0; 1054 u32 phy_flags = 0;
1136 int mode, ret; 1055 int mode, ret;
1137 1056
1138 port_dn = p->dp->dn;
1139 mode = of_get_phy_mode(port_dn); 1057 mode = of_get_phy_mode(port_dn);
1140 if (mode < 0) 1058 if (mode < 0)
1141 mode = PHY_INTERFACE_MODE_NA; 1059 mode = PHY_INTERFACE_MODE_NA;
@@ -1156,52 +1074,35 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
1156 } 1074 }
1157 1075
1158 if (ds->ops->get_phy_flags) 1076 if (ds->ops->get_phy_flags)
1159 phy_flags = ds->ops->get_phy_flags(ds, p->dp->index); 1077 phy_flags = ds->ops->get_phy_flags(ds, dp->index);
1160 1078
1161 if (phy_dn) { 1079 if (phy_dn) {
1162 int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); 1080 slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
1163 1081 dsa_slave_adjust_link,
1164 /* If this PHY address is part of phys_mii_mask, which means 1082 phy_flags,
1165 * that we need to divert reads and writes to/from it, then we 1083 p->phy_interface);
1166 * want to bind this device using the slave MII bus created by
1167 * DSA to make that happen.
1168 */
1169 if (!phy_is_fixed && phy_id >= 0 &&
1170 (ds->phys_mii_mask & (1 << phy_id))) {
1171 ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
1172 if (ret) {
1173 netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
1174 of_node_put(phy_dn);
1175 return ret;
1176 }
1177 } else {
1178 p->phy = of_phy_connect(slave_dev, phy_dn,
1179 dsa_slave_adjust_link,
1180 phy_flags,
1181 p->phy_interface);
1182 }
1183
1184 of_node_put(phy_dn); 1084 of_node_put(phy_dn);
1185 } 1085 }
1186 1086
1187 if (p->phy && phy_is_fixed) 1087 if (slave_dev->phydev && phy_is_fixed)
1188 fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update); 1088 fixed_phy_set_link_update(slave_dev->phydev,
1089 dsa_slave_fixed_link_update);
1189 1090
1190 /* We could not connect to a designated PHY, so use the switch internal 1091 /* We could not connect to a designated PHY, so use the switch internal
1191 * MDIO bus instead 1092 * MDIO bus instead
1192 */ 1093 */
1193 if (!p->phy) { 1094 if (!slave_dev->phydev) {
1194 ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index); 1095 ret = dsa_slave_phy_connect(slave_dev, dp->index);
1195 if (ret) { 1096 if (ret) {
1196 netdev_err(slave_dev, "failed to connect to port %d: %d\n", 1097 netdev_err(slave_dev, "failed to connect to port %d: %d\n",
1197 p->dp->index, ret); 1098 dp->index, ret);
1198 if (phy_is_fixed) 1099 if (phy_is_fixed)
1199 of_phy_deregister_fixed_link(port_dn); 1100 of_phy_deregister_fixed_link(port_dn);
1200 return ret; 1101 return ret;
1201 } 1102 }
1202 } 1103 }
1203 1104
1204 phy_attached_info(p->phy); 1105 phy_attached_info(slave_dev->phydev);
1205 1106
1206 return 0; 1107 return 0;
1207} 1108}
@@ -1221,12 +1122,12 @@ int dsa_slave_suspend(struct net_device *slave_dev)
1221 1122
1222 netif_device_detach(slave_dev); 1123 netif_device_detach(slave_dev);
1223 1124
1224 if (p->phy) { 1125 if (slave_dev->phydev) {
1225 phy_stop(p->phy); 1126 phy_stop(slave_dev->phydev);
1226 p->old_pause = -1; 1127 p->old_pause = -1;
1227 p->old_link = -1; 1128 p->old_link = -1;
1228 p->old_duplex = -1; 1129 p->old_duplex = -1;
1229 phy_suspend(p->phy); 1130 phy_suspend(slave_dev->phydev);
1230 } 1131 }
1231 1132
1232 return 0; 1133 return 0;
@@ -1234,31 +1135,40 @@ int dsa_slave_suspend(struct net_device *slave_dev)
1234 1135
1235int dsa_slave_resume(struct net_device *slave_dev) 1136int dsa_slave_resume(struct net_device *slave_dev)
1236{ 1137{
1237 struct dsa_slave_priv *p = netdev_priv(slave_dev);
1238
1239 netif_device_attach(slave_dev); 1138 netif_device_attach(slave_dev);
1240 1139
1241 if (p->phy) { 1140 if (slave_dev->phydev) {
1242 phy_resume(p->phy); 1141 phy_resume(slave_dev->phydev);
1243 phy_start(p->phy); 1142 phy_start(slave_dev->phydev);
1244 } 1143 }
1245 1144
1246 return 0; 1145 return 0;
1247} 1146}
1248 1147
1249int dsa_slave_create(struct dsa_port *port, const char *name) 1148static void dsa_slave_notify(struct net_device *dev, unsigned long val)
1149{
1150 struct net_device *master = dsa_slave_to_master(dev);
1151 struct dsa_port *dp = dsa_slave_to_port(dev);
1152 struct dsa_notifier_register_info rinfo = {
1153 .switch_number = dp->ds->index,
1154 .port_number = dp->index,
1155 .master = master,
1156 .info.dev = dev,
1157 };
1158
1159 call_dsa_notifiers(val, dev, &rinfo.info);
1160}
1161
1162int dsa_slave_create(struct dsa_port *port)
1250{ 1163{
1164 const struct dsa_port *cpu_dp = port->cpu_dp;
1165 struct net_device *master = cpu_dp->master;
1251 struct dsa_switch *ds = port->ds; 1166 struct dsa_switch *ds = port->ds;
1252 struct dsa_switch_tree *dst = ds->dst; 1167 const char *name = port->name;
1253 struct net_device *master;
1254 struct net_device *slave_dev; 1168 struct net_device *slave_dev;
1255 struct dsa_slave_priv *p; 1169 struct dsa_slave_priv *p;
1256 struct dsa_port *cpu_dp;
1257 int ret; 1170 int ret;
1258 1171
1259 cpu_dp = ds->dst->cpu_dp;
1260 master = cpu_dp->netdev;
1261
1262 if (!ds->num_tx_queues) 1172 if (!ds->num_tx_queues)
1263 ds->num_tx_queues = 1; 1173 ds->num_tx_queues = 1;
1264 1174
@@ -1294,22 +1204,24 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
1294 } 1204 }
1295 p->dp = port; 1205 p->dp = port;
1296 INIT_LIST_HEAD(&p->mall_tc_list); 1206 INIT_LIST_HEAD(&p->mall_tc_list);
1297 p->xmit = dst->tag_ops->xmit; 1207 p->xmit = cpu_dp->tag_ops->xmit;
1298 1208
1299 p->old_pause = -1; 1209 p->old_pause = -1;
1300 p->old_link = -1; 1210 p->old_link = -1;
1301 p->old_duplex = -1; 1211 p->old_duplex = -1;
1302 1212
1303 port->netdev = slave_dev; 1213 port->slave = slave_dev;
1304 1214
1305 netif_carrier_off(slave_dev); 1215 netif_carrier_off(slave_dev);
1306 1216
1307 ret = dsa_slave_phy_setup(p, slave_dev); 1217 ret = dsa_slave_phy_setup(slave_dev);
1308 if (ret) { 1218 if (ret) {
1309 netdev_err(master, "error %d setting up slave phy\n", ret); 1219 netdev_err(master, "error %d setting up slave phy\n", ret);
1310 goto out_free; 1220 goto out_free;
1311 } 1221 }
1312 1222
1223 dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
1224
1313 ret = register_netdev(slave_dev); 1225 ret = register_netdev(slave_dev);
1314 if (ret) { 1226 if (ret) {
1315 netdev_err(master, "error %d registering interface %s\n", 1227 netdev_err(master, "error %d registering interface %s\n",
@@ -1320,30 +1232,30 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
1320 return 0; 1232 return 0;
1321 1233
1322out_phy: 1234out_phy:
1323 phy_disconnect(p->phy); 1235 phy_disconnect(slave_dev->phydev);
1324 if (of_phy_is_fixed_link(p->dp->dn)) 1236 if (of_phy_is_fixed_link(port->dn))
1325 of_phy_deregister_fixed_link(p->dp->dn); 1237 of_phy_deregister_fixed_link(port->dn);
1326out_free: 1238out_free:
1327 free_percpu(p->stats64); 1239 free_percpu(p->stats64);
1328 free_netdev(slave_dev); 1240 free_netdev(slave_dev);
1329 port->netdev = NULL; 1241 port->slave = NULL;
1330 return ret; 1242 return ret;
1331} 1243}
1332 1244
1333void dsa_slave_destroy(struct net_device *slave_dev) 1245void dsa_slave_destroy(struct net_device *slave_dev)
1334{ 1246{
1247 struct dsa_port *dp = dsa_slave_to_port(slave_dev);
1335 struct dsa_slave_priv *p = netdev_priv(slave_dev); 1248 struct dsa_slave_priv *p = netdev_priv(slave_dev);
1336 struct device_node *port_dn; 1249 struct device_node *port_dn = dp->dn;
1337
1338 port_dn = p->dp->dn;
1339 1250
1340 netif_carrier_off(slave_dev); 1251 netif_carrier_off(slave_dev);
1341 if (p->phy) { 1252 if (slave_dev->phydev) {
1342 phy_disconnect(p->phy); 1253 phy_disconnect(slave_dev->phydev);
1343 1254
1344 if (of_phy_is_fixed_link(port_dn)) 1255 if (of_phy_is_fixed_link(port_dn))
1345 of_phy_deregister_fixed_link(port_dn); 1256 of_phy_deregister_fixed_link(port_dn);
1346 } 1257 }
1258 dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
1347 unregister_netdev(slave_dev); 1259 unregister_netdev(slave_dev);
1348 free_percpu(p->stats64); 1260 free_percpu(p->stats64);
1349 free_netdev(slave_dev); 1261 free_netdev(slave_dev);
@@ -1357,8 +1269,7 @@ static bool dsa_slave_dev_check(struct net_device *dev)
1357static int dsa_slave_changeupper(struct net_device *dev, 1269static int dsa_slave_changeupper(struct net_device *dev,
1358 struct netdev_notifier_changeupper_info *info) 1270 struct netdev_notifier_changeupper_info *info)
1359{ 1271{
1360 struct dsa_slave_priv *p = netdev_priv(dev); 1272 struct dsa_port *dp = dsa_slave_to_port(dev);
1361 struct dsa_port *dp = p->dp;
1362 int err = NOTIFY_DONE; 1273 int err = NOTIFY_DONE;
1363 1274
1364 if (netif_is_bridge_master(info->upper_dev)) { 1275 if (netif_is_bridge_master(info->upper_dev)) {
@@ -1379,7 +1290,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
1379{ 1290{
1380 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1291 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1381 1292
1382 if (dev->netdev_ops != &dsa_slave_netdev_ops) 1293 if (!dsa_slave_dev_check(dev))
1383 return NOTIFY_DONE; 1294 return NOTIFY_DONE;
1384 1295
1385 if (event == NETDEV_CHANGEUPPER) 1296 if (event == NETDEV_CHANGEUPPER)
@@ -1401,14 +1312,14 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
1401 container_of(work, struct dsa_switchdev_event_work, work); 1312 container_of(work, struct dsa_switchdev_event_work, work);
1402 struct net_device *dev = switchdev_work->dev; 1313 struct net_device *dev = switchdev_work->dev;
1403 struct switchdev_notifier_fdb_info *fdb_info; 1314 struct switchdev_notifier_fdb_info *fdb_info;
1404 struct dsa_slave_priv *p = netdev_priv(dev); 1315 struct dsa_port *dp = dsa_slave_to_port(dev);
1405 int err; 1316 int err;
1406 1317
1407 rtnl_lock(); 1318 rtnl_lock();
1408 switch (switchdev_work->event) { 1319 switch (switchdev_work->event) {
1409 case SWITCHDEV_FDB_ADD_TO_DEVICE: 1320 case SWITCHDEV_FDB_ADD_TO_DEVICE:
1410 fdb_info = &switchdev_work->fdb_info; 1321 fdb_info = &switchdev_work->fdb_info;
1411 err = dsa_port_fdb_add(p->dp, fdb_info->addr, fdb_info->vid); 1322 err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
1412 if (err) { 1323 if (err) {
1413 netdev_dbg(dev, "fdb add failed err=%d\n", err); 1324 netdev_dbg(dev, "fdb add failed err=%d\n", err);
1414 break; 1325 break;
@@ -1419,7 +1330,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
1419 1330
1420 case SWITCHDEV_FDB_DEL_TO_DEVICE: 1331 case SWITCHDEV_FDB_DEL_TO_DEVICE:
1421 fdb_info = &switchdev_work->fdb_info; 1332 fdb_info = &switchdev_work->fdb_info;
1422 err = dsa_port_fdb_del(p->dp, fdb_info->addr, fdb_info->vid); 1333 err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
1423 if (err) { 1334 if (err) {
1424 netdev_dbg(dev, "fdb del failed err=%d\n", err); 1335 netdev_dbg(dev, "fdb del failed err=%d\n", err);
1425 dev_close(dev); 1336 dev_close(dev);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 1e2929f4290a..29608d087a7c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -121,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
121 if (ds->index == info->sw_index) 121 if (ds->index == info->sw_index)
122 set_bit(info->port, group); 122 set_bit(info->port, group);
123 for (port = 0; port < ds->num_ports; port++) 123 for (port = 0; port < ds->num_ports; port++)
124 if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) 124 if (dsa_is_dsa_port(ds, port))
125 set_bit(port, group); 125 set_bit(port, group);
126 126
127 if (switchdev_trans_ph_prepare(trans)) { 127 if (switchdev_trans_ph_prepare(trans)) {
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index dbb016434ace..e6e0b7b6025c 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -59,9 +59,11 @@
59#define BRCM_EG_TC_MASK 0x7 59#define BRCM_EG_TC_MASK 0x7
60#define BRCM_EG_PID_MASK 0x1f 60#define BRCM_EG_PID_MASK 0x1f
61 61
62static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev) 62static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
63 struct net_device *dev,
64 unsigned int offset)
63{ 65{
64 struct dsa_slave_priv *p = netdev_priv(dev); 66 struct dsa_port *dp = dsa_slave_to_port(dev);
65 u16 queue = skb_get_queue_mapping(skb); 67 u16 queue = skb_get_queue_mapping(skb);
66 u8 *brcm_tag; 68 u8 *brcm_tag;
67 69
@@ -70,10 +72,10 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
70 72
71 skb_push(skb, BRCM_TAG_LEN); 73 skb_push(skb, BRCM_TAG_LEN);
72 74
73 memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN); 75 if (offset)
76 memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
74 77
75 /* Build the tag after the MAC Source Address */ 78 brcm_tag = skb->data + offset;
76 brcm_tag = skb->data + 2 * ETH_ALEN;
77 79
78 /* Set the ingress opcode, traffic class, tag enforcment is 80 /* Set the ingress opcode, traffic class, tag enforcment is
79 * deprecated 81 * deprecated
@@ -82,27 +84,30 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
82 ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT); 84 ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT);
83 brcm_tag[1] = 0; 85 brcm_tag[1] = 0;
84 brcm_tag[2] = 0; 86 brcm_tag[2] = 0;
85 if (p->dp->index == 8) 87 if (dp->index == 8)
86 brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; 88 brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
87 brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK; 89 brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK;
90
91 /* Now tell the master network device about the desired output queue
92 * as well
93 */
94 skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue));
88 95
89 return skb; 96 return skb;
90} 97}
91 98
92static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, 99static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
93 struct packet_type *pt) 100 struct net_device *dev,
101 struct packet_type *pt,
102 unsigned int offset)
94{ 103{
95 struct dsa_switch_tree *dst = dev->dsa_ptr;
96 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
97 struct dsa_switch *ds = cpu_dp->ds;
98 int source_port; 104 int source_port;
99 u8 *brcm_tag; 105 u8 *brcm_tag;
100 106
101 if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) 107 if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
102 return NULL; 108 return NULL;
103 109
104 /* skb->data points to the EtherType, the tag is right before it */ 110 brcm_tag = skb->data - offset;
105 brcm_tag = skb->data - 2;
106 111
107 /* The opcode should never be different than 0b000 */ 112 /* The opcode should never be different than 0b000 */
108 if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK)) 113 if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
@@ -117,24 +122,67 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
117 /* Locate which port this is coming from */ 122 /* Locate which port this is coming from */
118 source_port = brcm_tag[3] & BRCM_EG_PID_MASK; 123 source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
119 124
120 /* Validate port against switch setup, either the port is totally */ 125 skb->dev = dsa_master_find_slave(dev, 0, source_port);
121 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) 126 if (!skb->dev)
122 return NULL; 127 return NULL;
123 128
124 /* Remove Broadcom tag and update checksum */ 129 /* Remove Broadcom tag and update checksum */
125 skb_pull_rcsum(skb, BRCM_TAG_LEN); 130 skb_pull_rcsum(skb, BRCM_TAG_LEN);
126 131
132 return skb;
133}
134
135#ifdef CONFIG_NET_DSA_TAG_BRCM
136static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
137 struct net_device *dev)
138{
139 /* Build the tag after the MAC Source Address */
140 return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN);
141}
142
143
144static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
145 struct packet_type *pt)
146{
147 struct sk_buff *nskb;
148
149 /* skb->data points to the EtherType, the tag is right before it */
150 nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
151 if (!nskb)
152 return nskb;
153
127 /* Move the Ethernet DA and SA */ 154 /* Move the Ethernet DA and SA */
128 memmove(skb->data - ETH_HLEN, 155 memmove(nskb->data - ETH_HLEN,
129 skb->data - ETH_HLEN - BRCM_TAG_LEN, 156 nskb->data - ETH_HLEN - BRCM_TAG_LEN,
130 2 * ETH_ALEN); 157 2 * ETH_ALEN);
131 158
132 skb->dev = ds->ports[source_port].netdev; 159 return nskb;
133
134 return skb;
135} 160}
136 161
137const struct dsa_device_ops brcm_netdev_ops = { 162const struct dsa_device_ops brcm_netdev_ops = {
138 .xmit = brcm_tag_xmit, 163 .xmit = brcm_tag_xmit,
139 .rcv = brcm_tag_rcv, 164 .rcv = brcm_tag_rcv,
140}; 165};
166#endif
167
168#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
169static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
170 struct net_device *dev)
171{
172 /* tag is prepended to the packet */
173 return brcm_tag_xmit_ll(skb, dev, 0);
174}
175
176static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
177 struct net_device *dev,
178 struct packet_type *pt)
179{
180 /* tag is prepended to the packet */
181 return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
182}
183
184const struct dsa_device_ops brcm_prepend_netdev_ops = {
185 .xmit = brcm_tag_xmit_prepend,
186 .rcv = brcm_tag_rcv_prepend,
187};
188#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index fbf9ca954773..cd13cfc542ce 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -18,7 +18,7 @@
18 18
19static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) 19static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
20{ 20{
21 struct dsa_slave_priv *p = netdev_priv(dev); 21 struct dsa_port *dp = dsa_slave_to_port(dev);
22 u8 *dsa_header; 22 u8 *dsa_header;
23 23
24 /* 24 /*
@@ -34,8 +34,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
34 * Construct tagged FROM_CPU DSA tag from 802.1q tag. 34 * Construct tagged FROM_CPU DSA tag from 802.1q tag.
35 */ 35 */
36 dsa_header = skb->data + 2 * ETH_ALEN; 36 dsa_header = skb->data + 2 * ETH_ALEN;
37 dsa_header[0] = 0x60 | p->dp->ds->index; 37 dsa_header[0] = 0x60 | dp->ds->index;
38 dsa_header[1] = p->dp->index << 3; 38 dsa_header[1] = dp->index << 3;
39 39
40 /* 40 /*
41 * Move CFI field from byte 2 to byte 1. 41 * Move CFI field from byte 2 to byte 1.
@@ -55,8 +55,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
55 * Construct untagged FROM_CPU DSA tag. 55 * Construct untagged FROM_CPU DSA tag.
56 */ 56 */
57 dsa_header = skb->data + 2 * ETH_ALEN; 57 dsa_header = skb->data + 2 * ETH_ALEN;
58 dsa_header[0] = 0x40 | p->dp->ds->index; 58 dsa_header[0] = 0x40 | dp->ds->index;
59 dsa_header[1] = p->dp->index << 3; 59 dsa_header[1] = dp->index << 3;
60 dsa_header[2] = 0x00; 60 dsa_header[2] = 0x00;
61 dsa_header[3] = 0x00; 61 dsa_header[3] = 0x00;
62 } 62 }
@@ -67,8 +67,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
67static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, 67static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
68 struct packet_type *pt) 68 struct packet_type *pt)
69{ 69{
70 struct dsa_switch_tree *dst = dev->dsa_ptr;
71 struct dsa_switch *ds;
72 u8 *dsa_header; 70 u8 *dsa_header;
73 int source_device; 71 int source_device;
74 int source_port; 72 int source_port;
@@ -93,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
93 source_device = dsa_header[0] & 0x1f; 91 source_device = dsa_header[0] & 0x1f;
94 source_port = (dsa_header[1] >> 3) & 0x1f; 92 source_port = (dsa_header[1] >> 3) & 0x1f;
95 93
96 /* 94 skb->dev = dsa_master_find_slave(dev, source_device, source_port);
97 * Check that the source device exists and that the source 95 if (!skb->dev)
98 * port is a registered DSA port.
99 */
100 if (source_device >= DSA_MAX_SWITCHES)
101 return NULL;
102
103 ds = dst->ds[source_device];
104 if (!ds)
105 return NULL;
106
107 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
108 return NULL; 96 return NULL;
109 97
110 /* 98 /*
@@ -153,7 +141,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
153 2 * ETH_ALEN); 141 2 * ETH_ALEN);
154 } 142 }
155 143
156 skb->dev = ds->ports[source_port].netdev; 144 skb->offload_fwd_mark = 1;
157 145
158 return skb; 146 return skb;
159} 147}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 76367ba1b2e2..4083326b806e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -19,7 +19,7 @@
19 19
20static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) 20static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
21{ 21{
22 struct dsa_slave_priv *p = netdev_priv(dev); 22 struct dsa_port *dp = dsa_slave_to_port(dev);
23 u8 *edsa_header; 23 u8 *edsa_header;
24 24
25 /* 25 /*
@@ -43,8 +43,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
43 edsa_header[1] = ETH_P_EDSA & 0xff; 43 edsa_header[1] = ETH_P_EDSA & 0xff;
44 edsa_header[2] = 0x00; 44 edsa_header[2] = 0x00;
45 edsa_header[3] = 0x00; 45 edsa_header[3] = 0x00;
46 edsa_header[4] = 0x60 | p->dp->ds->index; 46 edsa_header[4] = 0x60 | dp->ds->index;
47 edsa_header[5] = p->dp->index << 3; 47 edsa_header[5] = dp->index << 3;
48 48
49 /* 49 /*
50 * Move CFI field from byte 6 to byte 5. 50 * Move CFI field from byte 6 to byte 5.
@@ -68,8 +68,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
68 edsa_header[1] = ETH_P_EDSA & 0xff; 68 edsa_header[1] = ETH_P_EDSA & 0xff;
69 edsa_header[2] = 0x00; 69 edsa_header[2] = 0x00;
70 edsa_header[3] = 0x00; 70 edsa_header[3] = 0x00;
71 edsa_header[4] = 0x40 | p->dp->ds->index; 71 edsa_header[4] = 0x40 | dp->ds->index;
72 edsa_header[5] = p->dp->index << 3; 72 edsa_header[5] = dp->index << 3;
73 edsa_header[6] = 0x00; 73 edsa_header[6] = 0x00;
74 edsa_header[7] = 0x00; 74 edsa_header[7] = 0x00;
75 } 75 }
@@ -80,8 +80,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
80static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, 80static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
81 struct packet_type *pt) 81 struct packet_type *pt)
82{ 82{
83 struct dsa_switch_tree *dst = dev->dsa_ptr;
84 struct dsa_switch *ds;
85 u8 *edsa_header; 83 u8 *edsa_header;
86 int source_device; 84 int source_device;
87 int source_port; 85 int source_port;
@@ -106,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
106 source_device = edsa_header[0] & 0x1f; 104 source_device = edsa_header[0] & 0x1f;
107 source_port = (edsa_header[1] >> 3) & 0x1f; 105 source_port = (edsa_header[1] >> 3) & 0x1f;
108 106
109 /* 107 skb->dev = dsa_master_find_slave(dev, source_device, source_port);
110 * Check that the source device exists and that the source 108 if (!skb->dev)
111 * port is a registered DSA port.
112 */
113 if (source_device >= DSA_MAX_SWITCHES)
114 return NULL;
115
116 ds = dst->ds[source_device];
117 if (!ds)
118 return NULL;
119
120 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
121 return NULL; 109 return NULL;
122 110
123 /* 111 /*
@@ -172,7 +160,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
172 2 * ETH_ALEN); 160 2 * ETH_ALEN);
173 } 161 }
174 162
175 skb->dev = ds->ports[source_port].netdev; 163 skb->offload_fwd_mark = 1;
176 164
177 return skb; 165 return skb;
178} 166}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 010ca0a336c4..0f62effad88f 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -34,7 +34,7 @@
34 34
35static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) 35static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
36{ 36{
37 struct dsa_slave_priv *p = netdev_priv(dev); 37 struct dsa_port *dp = dsa_slave_to_port(dev);
38 struct sk_buff *nskb; 38 struct sk_buff *nskb;
39 int padlen; 39 int padlen;
40 u8 *tag; 40 u8 *tag;
@@ -72,7 +72,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
72 72
73 tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); 73 tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
74 tag[0] = 0; 74 tag[0] = 0;
75 tag[1] = 1 << p->dp->index; /* destination port */ 75 tag[1] = 1 << dp->index; /* destination port */
76 76
77 return nskb; 77 return nskb;
78} 78}
@@ -80,22 +80,19 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
80static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev, 80static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
81 struct packet_type *pt) 81 struct packet_type *pt)
82{ 82{
83 struct dsa_switch_tree *dst = dev->dsa_ptr;
84 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
85 struct dsa_switch *ds = cpu_dp->ds;
86 u8 *tag; 83 u8 *tag;
87 int source_port; 84 int source_port;
88 85
89 tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; 86 tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
90 87
91 source_port = tag[0] & 7; 88 source_port = tag[0] & 7;
92 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) 89
90 skb->dev = dsa_master_find_slave(dev, 0, source_port);
91 if (!skb->dev)
93 return NULL; 92 return NULL;
94 93
95 pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN); 94 pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
96 95
97 skb->dev = ds->ports[source_port].netdev;
98
99 return skb; 96 return skb;
100} 97}
101 98
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 0b9826105e42..548c00254c07 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -11,6 +11,7 @@
11 * GNU General Public License for more details. 11 * GNU General Public License for more details.
12 * 12 *
13 */ 13 */
14#include <linux/dsa/lan9303.h>
14#include <linux/etherdevice.h> 15#include <linux/etherdevice.h>
15#include <linux/list.h> 16#include <linux/list.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
@@ -39,10 +40,30 @@
39 */ 40 */
40 41
41#define LAN9303_TAG_LEN 4 42#define LAN9303_TAG_LEN 4
43# define LAN9303_TAG_TX_USE_ALR BIT(3)
44# define LAN9303_TAG_TX_STP_OVERRIDE BIT(4)
45# define LAN9303_TAG_RX_IGMP BIT(3)
46# define LAN9303_TAG_RX_STP BIT(4)
47# define LAN9303_TAG_RX_TRAPPED_TO_CPU (LAN9303_TAG_RX_IGMP | \
48 LAN9303_TAG_RX_STP)
49
50/* Decide whether to transmit using ALR lookup, or transmit directly to
51 * port using tag. ALR learning is performed only when using ALR lookup.
52 * If the two external ports are bridged and the frame is unicast,
53 * then use ALR lookup to allow ALR learning on CPU port.
54 * Otherwise transmit directly to port with STP state override.
55 * See also: lan9303_separate_ports() and lan9303.pdf 6.4.10.1
56 */
57static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr)
58{
59 struct lan9303 *chip = dp->ds->priv;
60
61 return chip->is_bridged && !is_multicast_ether_addr(dest_addr);
62}
42 63
43static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) 64static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
44{ 65{
45 struct dsa_slave_priv *p = netdev_priv(dev); 66 struct dsa_port *dp = dsa_slave_to_port(dev);
46 u16 *lan9303_tag; 67 u16 *lan9303_tag;
47 68
48 /* insert a special VLAN tag between the MAC addresses 69 /* insert a special VLAN tag between the MAC addresses
@@ -62,26 +83,21 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
62 83
63 lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN); 84 lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
64 lan9303_tag[0] = htons(ETH_P_8021Q); 85 lan9303_tag[0] = htons(ETH_P_8021Q);
65 lan9303_tag[1] = htons(p->dp->index | BIT(4)); 86 lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ?
87 LAN9303_TAG_TX_USE_ALR :
88 dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
89 lan9303_tag[1] = htons(lan9303_tag[1]);
66 90
67 return skb; 91 return skb;
68} 92}
69 93
70static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, 94static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
71 struct packet_type *pt) 95 struct packet_type *pt)
72{ 96{
73 u16 *lan9303_tag; 97 u16 *lan9303_tag;
74 struct dsa_switch_tree *dst = dev->dsa_ptr; 98 u16 lan9303_tag1;
75 struct dsa_switch *ds;
76 unsigned int source_port; 99 unsigned int source_port;
77 100
78 ds = dst->ds[0];
79
80 if (unlikely(!ds)) {
81 dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n");
82 return NULL;
83 }
84
85 if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) { 101 if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
86 dev_warn_ratelimited(&dev->dev, 102 dev_warn_ratelimited(&dev->dev,
87 "Dropping packet, cannot pull\n"); 103 "Dropping packet, cannot pull\n");
@@ -101,27 +117,22 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
101 return NULL; 117 return NULL;
102 } 118 }
103 119
104 source_port = ntohs(lan9303_tag[1]) & 0x3; 120 lan9303_tag1 = ntohs(lan9303_tag[1]);
121 source_port = lan9303_tag1 & 0x3;
105 122
106 if (source_port >= ds->num_ports) { 123 skb->dev = dsa_master_find_slave(dev, 0, source_port);
124 if (!skb->dev) {
107 dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); 125 dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
108 return NULL; 126 return NULL;
109 } 127 }
110 128
111 if (!ds->ports[source_port].netdev) {
112 dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n");
113 return NULL;
114 }
115
116 /* remove the special VLAN tag between the MAC addresses 129 /* remove the special VLAN tag between the MAC addresses
117 * and the current ethertype field. 130 * and the current ethertype field.
118 */ 131 */
119 skb_pull_rcsum(skb, 2 + 2); 132 skb_pull_rcsum(skb, 2 + 2);
120 memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), 133 memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
121 2 * ETH_ALEN); 134 2 * ETH_ALEN);
122 135 skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
123 /* forward the packet to the dedicated interface */
124 skb->dev = ds->ports[source_port].netdev;
125 136
126 return skb; 137 return skb;
127} 138}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index ec8ee5f43255..8475434af7d5 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -23,7 +23,7 @@
23static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, 23static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
24 struct net_device *dev) 24 struct net_device *dev)
25{ 25{
26 struct dsa_slave_priv *p = netdev_priv(dev); 26 struct dsa_port *dp = dsa_slave_to_port(dev);
27 u8 *mtk_tag; 27 u8 *mtk_tag;
28 28
29 if (skb_cow_head(skb, MTK_HDR_LEN) < 0) 29 if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
@@ -36,7 +36,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
36 /* Build the tag after the MAC Source Address */ 36 /* Build the tag after the MAC Source Address */
37 mtk_tag = skb->data + 2 * ETH_ALEN; 37 mtk_tag = skb->data + 2 * ETH_ALEN;
38 mtk_tag[0] = 0; 38 mtk_tag[0] = 0;
39 mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; 39 mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
40 mtk_tag[2] = 0; 40 mtk_tag[2] = 0;
41 mtk_tag[3] = 0; 41 mtk_tag[3] = 0;
42 42
@@ -46,8 +46,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
46static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, 46static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
47 struct packet_type *pt) 47 struct packet_type *pt)
48{ 48{
49 struct dsa_switch_tree *dst = dev->dsa_ptr;
50 struct dsa_switch *ds;
51 int port; 49 int port;
52 __be16 *phdr, hdr; 50 __be16 *phdr, hdr;
53 51
@@ -68,20 +66,12 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
68 skb->data - ETH_HLEN - MTK_HDR_LEN, 66 skb->data - ETH_HLEN - MTK_HDR_LEN,
69 2 * ETH_ALEN); 67 2 * ETH_ALEN);
70 68
71 /* This protocol doesn't support cascading multiple
72 * switches so it's safe to assume the switch is first
73 * in the tree.
74 */
75 ds = dst->ds[0];
76 if (!ds)
77 return NULL;
78
79 /* Get source port information */ 69 /* Get source port information */
80 port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); 70 port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
81 if (!ds->ports[port].netdev)
82 return NULL;
83 71
84 skb->dev = ds->ports[port].netdev; 72 skb->dev = dsa_master_find_slave(dev, 0, port);
73 if (!skb->dev)
74 return NULL;
85 75
86 return skb; 76 return skb;
87} 77}
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1d4c70711c0f..613f4ee97771 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -38,7 +38,7 @@
38 38
39static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) 39static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
40{ 40{
41 struct dsa_slave_priv *p = netdev_priv(dev); 41 struct dsa_port *dp = dsa_slave_to_port(dev);
42 u16 *phdr, hdr; 42 u16 *phdr, hdr;
43 43
44 dev->stats.tx_packets++; 44 dev->stats.tx_packets++;
@@ -54,8 +54,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
54 54
55 /* Set the version field, and set destination port information */ 55 /* Set the version field, and set destination port information */
56 hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | 56 hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
57 QCA_HDR_XMIT_FROM_CPU | 57 QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
58 BIT(p->dp->index);
59 58
60 *phdr = htons(hdr); 59 *phdr = htons(hdr);
61 60
@@ -65,9 +64,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
65static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, 64static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
66 struct packet_type *pt) 65 struct packet_type *pt)
67{ 66{
68 struct dsa_switch_tree *dst = dev->dsa_ptr;
69 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
70 struct dsa_switch *ds;
71 u8 ver; 67 u8 ver;
72 int port; 68 int port;
73 __be16 *phdr, hdr; 69 __be16 *phdr, hdr;
@@ -92,20 +88,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
92 memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, 88 memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
93 ETH_HLEN - QCA_HDR_LEN); 89 ETH_HLEN - QCA_HDR_LEN);
94 90
95 /* This protocol doesn't support cascading multiple switches so it's
96 * safe to assume the switch is first in the tree
97 */
98 ds = cpu_dp->ds;
99 if (!ds)
100 return NULL;
101
102 /* Get source port information */ 91 /* Get source port information */
103 port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); 92 port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
104 if (!ds->ports[port].netdev)
105 return NULL;
106 93
107 /* Update skb & forward the frame accordingly */ 94 skb->dev = dsa_master_find_slave(dev, 0, port);
108 skb->dev = ds->ports[port].netdev; 95 if (!skb->dev)
96 return NULL;
109 97
110 return skb; 98 return skb;
111} 99}
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index d2fd4923aa3e..7d20e1f3de28 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -16,7 +16,7 @@
16 16
17static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) 17static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
18{ 18{
19 struct dsa_slave_priv *p = netdev_priv(dev); 19 struct dsa_port *dp = dsa_slave_to_port(dev);
20 struct sk_buff *nskb; 20 struct sk_buff *nskb;
21 int padlen; 21 int padlen;
22 u8 *trailer; 22 u8 *trailer;
@@ -48,7 +48,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
48 48
49 trailer = skb_put(nskb, 4); 49 trailer = skb_put(nskb, 4);
50 trailer[0] = 0x80; 50 trailer[0] = 0x80;
51 trailer[1] = 1 << p->dp->index; 51 trailer[1] = 1 << dp->index;
52 trailer[2] = 0x10; 52 trailer[2] = 0x10;
53 trailer[3] = 0x00; 53 trailer[3] = 0x00;
54 54
@@ -58,9 +58,6 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
58static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, 58static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
59 struct packet_type *pt) 59 struct packet_type *pt)
60{ 60{
61 struct dsa_switch_tree *dst = dev->dsa_ptr;
62 struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
63 struct dsa_switch *ds = cpu_dp->ds;
64 u8 *trailer; 61 u8 *trailer;
65 int source_port; 62 int source_port;
66 63
@@ -73,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
73 return NULL; 70 return NULL;
74 71
75 source_port = trailer[1] & 7; 72 source_port = trailer[1] & 7;
76 if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) 73
74 skb->dev = dsa_master_find_slave(dev, 0, source_port);
75 if (!skb->dev)
77 return NULL; 76 return NULL;
78 77
79 pskb_trim_rcsum(skb, skb->len - 4); 78 pskb_trim_rcsum(skb, skb->len - 4);
80 79
81 skb->dev = ds->ports[source_port].netdev;
82
83 return skb; 80 return skb;
84} 81}
85 82
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 172d8309f89e..b8cd43c9ed5b 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -328,12 +328,12 @@ out:
328 328
329/* Announce (supervision frame) timer function 329/* Announce (supervision frame) timer function
330 */ 330 */
331static void hsr_announce(unsigned long data) 331static void hsr_announce(struct timer_list *t)
332{ 332{
333 struct hsr_priv *hsr; 333 struct hsr_priv *hsr;
334 struct hsr_port *master; 334 struct hsr_port *master;
335 335
336 hsr = (struct hsr_priv *) data; 336 hsr = from_timer(hsr, t, announce_timer);
337 337
338 rcu_read_lock(); 338 rcu_read_lock();
339 master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); 339 master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
@@ -463,9 +463,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
463 hsr->sequence_nr = HSR_SEQNR_START; 463 hsr->sequence_nr = HSR_SEQNR_START;
464 hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; 464 hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
465 465
466 setup_timer(&hsr->announce_timer, hsr_announce, (unsigned long)hsr); 466 timer_setup(&hsr->announce_timer, hsr_announce, 0);
467 467 timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
468 setup_timer(&hsr->prune_timer, hsr_prune_nodes, (unsigned long)hsr);
469 468
470 ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); 469 ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
471 hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; 470 hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 284a9b820df8..286ceb41ac0c 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -365,16 +365,14 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
365/* Remove stale sequence_nr records. Called by timer every 365/* Remove stale sequence_nr records. Called by timer every
366 * HSR_LIFE_CHECK_INTERVAL (two seconds or so). 366 * HSR_LIFE_CHECK_INTERVAL (two seconds or so).
367 */ 367 */
368void hsr_prune_nodes(unsigned long data) 368void hsr_prune_nodes(struct timer_list *t)
369{ 369{
370 struct hsr_priv *hsr; 370 struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
371 struct hsr_node *node; 371 struct hsr_node *node;
372 struct hsr_port *port; 372 struct hsr_port *port;
373 unsigned long timestamp; 373 unsigned long timestamp;
374 unsigned long time_a, time_b; 374 unsigned long time_a, time_b;
375 375
376 hsr = (struct hsr_priv *) data;
377
378 rcu_read_lock(); 376 rcu_read_lock();
379 list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { 377 list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
380 /* Shorthand */ 378 /* Shorthand */
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 4e04f0e868e9..370b45998121 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -33,7 +33,7 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
33int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, 33int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
34 u16 sequence_nr); 34 u16 sequence_nr);
35 35
36void hsr_prune_nodes(unsigned long data); 36void hsr_prune_nodes(struct timer_list *t);
37 37
38int hsr_create_self_node(struct list_head *self_node_db, 38int hsr_create_self_node(struct list_head *self_node_db,
39 unsigned char addr_a[ETH_ALEN], 39 unsigned char addr_a[ETH_ALEN],
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f85b08baff16..85bf86ad6b18 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
80 fq->daddr = *arg->dst; 80 fq->daddr = *arg->dst;
81} 81}
82 82
83static void lowpan_frag_expire(unsigned long data) 83static void lowpan_frag_expire(struct timer_list *t)
84{ 84{
85 struct inet_frag_queue *frag = from_timer(frag, t, timer);
85 struct frag_queue *fq; 86 struct frag_queue *fq;
86 struct net *net; 87 struct net *net;
87 88
88 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); 89 fq = container_of(frag, struct frag_queue, q);
89 net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags); 90 net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
90 91
91 spin_lock(&fq->q.lock); 92 spin_lock(&fq->q.lock);
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 6bde9e5a5503..96636e3b7aa9 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -89,7 +89,7 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
89 return genlmsg_reply(msg, info); 89 return genlmsg_reply(msg, info);
90} 90}
91 91
92static const struct genl_ops ieee8021154_ops[] = { 92static const struct genl_ops ieee802154_ops[] = {
93 /* see nl-phy.c */ 93 /* see nl-phy.c */
94 IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy, 94 IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy,
95 ieee802154_dump_phy), 95 ieee802154_dump_phy),
@@ -137,8 +137,8 @@ struct genl_family nl802154_family __ro_after_init = {
137 .version = 1, 137 .version = 1,
138 .maxattr = IEEE802154_ATTR_MAX, 138 .maxattr = IEEE802154_ATTR_MAX,
139 .module = THIS_MODULE, 139 .module = THIS_MODULE,
140 .ops = ieee8021154_ops, 140 .ops = ieee802154_ops,
141 .n_ops = ARRAY_SIZE(ieee8021154_ops), 141 .n_ops = ARRAY_SIZE(ieee802154_ops),
142 .mcgrps = ieee802154_mcgrps, 142 .mcgrps = ieee802154_mcgrps,
143 .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), 143 .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
144}; 144};
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e5ef79..ce4aa827be05 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
195{ 195{
196 struct sock *sk = sock->sk; 196 struct sock *sk = sock->sk;
197 unsigned char old_state; 197 unsigned char old_state;
198 int err; 198 int err, tcp_fastopen;
199 199
200 lock_sock(sk); 200 lock_sock(sk);
201 201
@@ -217,11 +217,12 @@ int inet_listen(struct socket *sock, int backlog)
217 * because the socket was in TCP_LISTEN state previously but 217 * because the socket was in TCP_LISTEN state previously but
218 * was shutdown() rather than close(). 218 * was shutdown() rather than close().
219 */ 219 */
220 if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && 220 tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
221 (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && 221 if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
222 (tcp_fastopen & TFO_SERVER_ENABLE) &&
222 !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { 223 !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
223 fastopen_queue_tune(sk, backlog); 224 fastopen_queue_tune(sk, backlog);
224 tcp_fastopen_init_key_once(true); 225 tcp_fastopen_init_key_once(sock_net(sk));
225 } 226 }
226 227
227 err = inet_csk_listen_start(sk, backlog); 228 err = inet_csk_listen_start(sk, backlog);
@@ -826,6 +827,7 @@ int inet_shutdown(struct socket *sock, int how)
826 err = -ENOTCONN; 827 err = -ENOTCONN;
827 /* Hack to wake up other listeners, who can poll for 828 /* Hack to wake up other listeners, who can poll for
828 POLLHUP, even on eg. unconnected UDP sockets -- RR */ 829 POLLHUP, even on eg. unconnected UDP sockets -- RR */
830 /* fall through */
829 default: 831 default:
830 sk->sk_shutdown |= how; 832 sk->sk_shutdown |= how;
831 if (sk->sk_prot->shutdown) 833 if (sk->sk_prot->shutdown)
@@ -839,7 +841,7 @@ int inet_shutdown(struct socket *sock, int how)
839 case TCP_LISTEN: 841 case TCP_LISTEN:
840 if (!(how & RCV_SHUTDOWN)) 842 if (!(how & RCV_SHUTDOWN))
841 break; 843 break;
842 /* Fall through */ 844 /* fall through */
843 case TCP_SYN_SENT: 845 case TCP_SYN_SENT:
844 err = sk->sk_prot->disconnect(sk, O_NONBLOCK); 846 err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
845 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 847 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 37db44f60718..4dd95cdd8070 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -240,7 +240,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
240 if (err == -EINPROGRESS) 240 if (err == -EINPROGRESS)
241 goto out; 241 goto out;
242 242
243 if (err == -EBUSY) 243 if (err == -ENOSPC)
244 err = NET_XMIT_DROP; 244 err = NET_XMIT_DROP;
245 goto out_free; 245 goto out_free;
246 } 246 }
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7c45b8896709..a8d7c5a9fb05 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1180,6 +1180,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1180 case SIOCSARP: 1180 case SIOCSARP:
1181 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1181 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1182 return -EPERM; 1182 return -EPERM;
1183 /* fall through */
1183 case SIOCGARP: 1184 case SIOCGARP:
1184 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1185 err = copy_from_user(&r, arg, sizeof(struct arpreq));
1185 if (err) 1186 if (err)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d7adc0616599..a4573bccd6da 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
137 */ 137 */
138struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 138struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139{ 139{
140 u32 hash = inet_addr_hash(net, addr);
141 struct net_device *result = NULL; 140 struct net_device *result = NULL;
142 struct in_ifaddr *ifa; 141 struct in_ifaddr *ifa;
143 142
144 rcu_read_lock(); 143 rcu_read_lock();
145 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { 144 ifa = inet_lookup_ifaddr_rcu(net, addr);
146 if (ifa->ifa_local == addr) { 145 if (!ifa) {
147 struct net_device *dev = ifa->ifa_dev->dev;
148
149 if (!net_eq(dev_net(dev), net))
150 continue;
151 result = dev;
152 break;
153 }
154 }
155 if (!result) {
156 struct flowi4 fl4 = { .daddr = addr }; 146 struct flowi4 fl4 = { .daddr = addr };
157 struct fib_result res = { 0 }; 147 struct fib_result res = { 0 };
158 struct fib_table *local; 148 struct fib_table *local;
@@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && 155 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 res.type == RTN_LOCAL) 156 res.type == RTN_LOCAL)
167 result = FIB_RES_DEV(res); 157 result = FIB_RES_DEV(res);
158 } else {
159 result = ifa->ifa_dev->dev;
168 } 160 }
169 if (result && devref) 161 if (result && devref)
170 dev_hold(result); 162 dev_hold(result);
@@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
173} 165}
174EXPORT_SYMBOL(__ip_dev_find); 166EXPORT_SYMBOL(__ip_dev_find);
175 167
168/* called under RCU lock */
169struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
170{
171 u32 hash = inet_addr_hash(net, addr);
172 struct in_ifaddr *ifa;
173
174 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
175 if (ifa->ifa_local == addr &&
176 net_eq(dev_net(ifa->ifa_dev->dev), net))
177 return ifa;
178
179 return NULL;
180}
181
176static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 182static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 183
178static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 184static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -438,7 +444,7 @@ static void check_lifetime(struct work_struct *work);
438static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); 444static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
439 445
440static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 446static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
441 u32 portid) 447 u32 portid, struct netlink_ext_ack *extack)
442{ 448{
443 struct in_device *in_dev = ifa->ifa_dev; 449 struct in_device *in_dev = ifa->ifa_dev;
444 struct in_ifaddr *ifa1, **ifap, **last_primary; 450 struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -483,6 +489,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
483 */ 489 */
484 ivi.ivi_addr = ifa->ifa_address; 490 ivi.ivi_addr = ifa->ifa_address;
485 ivi.ivi_dev = ifa->ifa_dev; 491 ivi.ivi_dev = ifa->ifa_dev;
492 ivi.extack = extack;
486 ret = blocking_notifier_call_chain(&inetaddr_validator_chain, 493 ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
487 NETDEV_UP, &ivi); 494 NETDEV_UP, &ivi);
488 ret = notifier_to_errno(ret); 495 ret = notifier_to_errno(ret);
@@ -515,7 +522,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
515 522
516static int inet_insert_ifa(struct in_ifaddr *ifa) 523static int inet_insert_ifa(struct in_ifaddr *ifa)
517{ 524{
518 return __inet_insert_ifa(ifa, NULL, 0); 525 return __inet_insert_ifa(ifa, NULL, 0, NULL);
519} 526}
520 527
521static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 528static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
@@ -896,7 +903,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
896 return ret; 903 return ret;
897 } 904 }
898 } 905 }
899 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); 906 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
907 extack);
900 } else { 908 } else {
901 inet_free_ifa(ifa); 909 inet_free_ifa(ifa);
902 910
@@ -1516,6 +1524,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1516 if (inetdev_valid_mtu(dev->mtu)) 1524 if (inetdev_valid_mtu(dev->mtu))
1517 break; 1525 break;
1518 /* disable IP when MTU is not enough */ 1526 /* disable IP when MTU is not enough */
1527 /* fall through */
1519 case NETDEV_UNREGISTER: 1528 case NETDEV_UNREGISTER:
1520 inetdev_destroy(in_dev); 1529 inetdev_destroy(in_dev);
1521 break; 1530 break;
@@ -1751,7 +1760,7 @@ static int inet_validate_link_af(const struct net_device *dev,
1751 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1760 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1752 int err, rem; 1761 int err, rem;
1753 1762
1754 if (dev && !__in_dev_get_rtnl(dev)) 1763 if (dev && !__in_dev_get_rcu(dev))
1755 return -EAFNOSUPPORT; 1764 return -EAFNOSUPPORT;
1756 1765
1757 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); 1766 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
@@ -1775,7 +1784,7 @@ static int inet_validate_link_af(const struct net_device *dev,
1775 1784
1776static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1785static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1777{ 1786{
1778 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1787 struct in_device *in_dev = __in_dev_get_rcu(dev);
1779 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1788 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1780 int rem; 1789 int rem;
1781 1790
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b00e4a43b4dc..d57aa64fa7c7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -432,7 +432,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
432 case -EINPROGRESS: 432 case -EINPROGRESS:
433 goto error; 433 goto error;
434 434
435 case -EBUSY: 435 case -ENOSPC:
436 err = NET_XMIT_DROP; 436 err = NET_XMIT_DROP;
437 break; 437 break;
438 438
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 37819ab4cc74..f52d27a422c3 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -73,6 +73,11 @@ fail:
73 fib_free_table(main_table); 73 fib_free_table(main_table);
74 return -ENOMEM; 74 return -ENOMEM;
75} 75}
76
77static bool fib4_has_custom_rules(struct net *net)
78{
79 return false;
80}
76#else 81#else
77 82
78struct fib_table *fib_new_table(struct net *net, u32 id) 83struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -128,6 +133,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
128 } 133 }
129 return NULL; 134 return NULL;
130} 135}
136
137static bool fib4_has_custom_rules(struct net *net)
138{
139 return net->ipv4.fib_has_custom_rules;
140}
131#endif /* CONFIG_IP_MULTIPLE_TABLES */ 141#endif /* CONFIG_IP_MULTIPLE_TABLES */
132 142
133static void fib_replace_table(struct net *net, struct fib_table *old, 143static void fib_replace_table(struct net *net, struct fib_table *old,
@@ -345,9 +355,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
345 if (res.type != RTN_UNICAST && 355 if (res.type != RTN_UNICAST &&
346 (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) 356 (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
347 goto e_inval; 357 goto e_inval;
348 if (!rpf && !fib_num_tclassid_users(net) &&
349 (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
350 goto last_resort;
351 fib_combine_itag(itag, &res); 358 fib_combine_itag(itag, &res);
352 dev_match = false; 359 dev_match = false;
353 360
@@ -402,13 +409,28 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
402 struct in_device *idev, u32 *itag) 409 struct in_device *idev, u32 *itag)
403{ 410{
404 int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); 411 int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
412 struct net *net = dev_net(dev);
405 413
406 if (!r && !fib_num_tclassid_users(dev_net(dev)) && 414 if (!r && !fib_num_tclassid_users(net) &&
407 IN_DEV_ACCEPT_LOCAL(idev) &&
408 (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { 415 (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
416 if (IN_DEV_ACCEPT_LOCAL(idev))
417 goto ok;
418 /* with custom local routes in place, checking local addresses
419 * only will be too optimistic, with custom rules, checking
420 * local addresses only can be too strict, e.g. due to vrf
421 */
422 if (net->ipv4.fib_has_custom_local_routes ||
423 fib4_has_custom_rules(net))
424 goto full_check;
425 if (inet_lookup_ifaddr_rcu(net, src))
426 return -EINVAL;
427
428ok:
409 *itag = 0; 429 *itag = 0;
410 return 0; 430 return 0;
411 } 431 }
432
433full_check:
412 return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); 434 return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
413} 435}
414 436
@@ -759,6 +781,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
759 } 781 }
760 782
761 err = fib_table_insert(net, tb, &cfg, extack); 783 err = fib_table_insert(net, tb, &cfg, extack);
784 if (!err && cfg.fc_type == RTN_LOCAL)
785 net->ipv4.fib_has_custom_local_routes = true;
762errout: 786errout:
763 return err; 787 return err;
764} 788}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 01ed22139ac2..f04d944f8abe 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -601,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi)
601 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); 601 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
602 } endfor_nexthops(fi); 602 } endfor_nexthops(fi);
603} 603}
604
605static inline void fib_add_weight(struct fib_info *fi,
606 const struct fib_nh *nh)
607{
608 fi->fib_weight += nh->nh_weight;
609}
610
611#else /* CONFIG_IP_ROUTE_MULTIPATH */ 604#else /* CONFIG_IP_ROUTE_MULTIPATH */
612 605
613#define fib_rebalance(fi) do { } while (0) 606#define fib_rebalance(fi) do { } while (0)
614#define fib_add_weight(fi, nh) do { } while (0)
615 607
616#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 608#endif /* CONFIG_IP_ROUTE_MULTIPATH */
617 609
@@ -718,7 +710,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
718 bool ecn_ca = false; 710 bool ecn_ca = false;
719 711
720 nla_strlcpy(tmp, nla, sizeof(tmp)); 712 nla_strlcpy(tmp, nla, sizeof(tmp));
721 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 713 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
722 } else { 714 } else {
723 val = nla_get_u32(nla); 715 val = nla_get_u32(nla);
724 } 716 }
@@ -774,8 +766,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
774 * | 766 * |
775 * |-> {local prefix} (terminal node) 767 * |-> {local prefix} (terminal node)
776 */ 768 */
777static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 769static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
778 struct fib_nh *nh, struct netlink_ext_ack *extack) 770 struct netlink_ext_ack *extack)
779{ 771{
780 int err = 0; 772 int err = 0;
781 struct net *net; 773 struct net *net;
@@ -1038,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
1038 char tmp[TCP_CA_NAME_MAX]; 1030 char tmp[TCP_CA_NAME_MAX];
1039 1031
1040 nla_strlcpy(tmp, nla, sizeof(tmp)); 1032 nla_strlcpy(tmp, nla, sizeof(tmp));
1041 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 1033 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
1042 if (val == TCP_CA_UNSPEC) 1034 if (val == TCP_CA_UNSPEC)
1043 return -EINVAL; 1035 return -EINVAL;
1044 } else { 1036 } else {
@@ -1258,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
1258 int linkdown = 0; 1250 int linkdown = 0;
1259 1251
1260 change_nexthops(fi) { 1252 change_nexthops(fi) {
1261 err = fib_check_nh(cfg, fi, nexthop_nh, extack); 1253 err = fib_check_nh(cfg, nexthop_nh, extack);
1262 if (err != 0) 1254 if (err != 0)
1263 goto failure; 1255 goto failure;
1264 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) 1256 if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1275,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
1275 1267
1276 change_nexthops(fi) { 1268 change_nexthops(fi) {
1277 fib_info_update_nh_saddr(net, nexthop_nh); 1269 fib_info_update_nh_saddr(net, nexthop_nh);
1278 fib_add_weight(fi, nexthop_nh);
1279 } endfor_nexthops(fi) 1270 } endfor_nexthops(fi)
1280 1271
1281 fib_rebalance(fi); 1272 fib_rebalance(fi);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c636650a6a70..5ddc4aefff12 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -87,32 +87,32 @@
87 87
88static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, 88static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
89 enum fib_event_type event_type, u32 dst, 89 enum fib_event_type event_type, u32 dst,
90 int dst_len, struct fib_info *fi, 90 int dst_len, struct fib_alias *fa)
91 u8 tos, u8 type, u32 tb_id)
92{ 91{
93 struct fib_entry_notifier_info info = { 92 struct fib_entry_notifier_info info = {
94 .dst = dst, 93 .dst = dst,
95 .dst_len = dst_len, 94 .dst_len = dst_len,
96 .fi = fi, 95 .fi = fa->fa_info,
97 .tos = tos, 96 .tos = fa->fa_tos,
98 .type = type, 97 .type = fa->fa_type,
99 .tb_id = tb_id, 98 .tb_id = fa->tb_id,
100 }; 99 };
101 return call_fib4_notifier(nb, net, event_type, &info.info); 100 return call_fib4_notifier(nb, net, event_type, &info.info);
102} 101}
103 102
104static int call_fib_entry_notifiers(struct net *net, 103static int call_fib_entry_notifiers(struct net *net,
105 enum fib_event_type event_type, u32 dst, 104 enum fib_event_type event_type, u32 dst,
106 int dst_len, struct fib_info *fi, 105 int dst_len, struct fib_alias *fa,
107 u8 tos, u8 type, u32 tb_id) 106 struct netlink_ext_ack *extack)
108{ 107{
109 struct fib_entry_notifier_info info = { 108 struct fib_entry_notifier_info info = {
109 .info.extack = extack,
110 .dst = dst, 110 .dst = dst,
111 .dst_len = dst_len, 111 .dst_len = dst_len,
112 .fi = fi, 112 .fi = fa->fa_info,
113 .tos = tos, 113 .tos = fa->fa_tos,
114 .type = type, 114 .type = fa->fa_type,
115 .tb_id = tb_id, 115 .tb_id = fa->tb_id,
116 }; 116 };
117 return call_fib4_notifiers(net, event_type, &info.info); 117 return call_fib4_notifiers(net, event_type, &info.info);
118} 118}
@@ -1216,9 +1216,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
1216 new_fa->fa_default = -1; 1216 new_fa->fa_default = -1;
1217 1217
1218 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, 1218 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1219 key, plen, fi, 1219 key, plen, new_fa, extack);
1220 new_fa->fa_tos, cfg->fc_type,
1221 tb->tb_id);
1222 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1220 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1223 tb->tb_id, &cfg->fc_nlinfo, nlflags); 1221 tb->tb_id, &cfg->fc_nlinfo, nlflags);
1224 1222
@@ -1273,8 +1271,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
1273 tb->tb_num_default++; 1271 tb->tb_num_default++;
1274 1272
1275 rt_cache_flush(cfg->fc_nlinfo.nl_net); 1273 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1276 call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type, 1274 call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
1277 tb->tb_id);
1278 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, 1275 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1279 &cfg->fc_nlinfo, nlflags); 1276 &cfg->fc_nlinfo, nlflags);
1280succeeded: 1277succeeded:
@@ -1574,8 +1571,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
1574 return -ESRCH; 1571 return -ESRCH;
1575 1572
1576 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, 1573 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
1577 fa_to_delete->fa_info, tos, 1574 fa_to_delete, extack);
1578 fa_to_delete->fa_type, tb->tb_id);
1579 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1575 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1580 &cfg->fc_nlinfo, 0); 1576 &cfg->fc_nlinfo, 0);
1581 1577
@@ -1892,9 +1888,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
1892 1888
1893 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, 1889 call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1894 n->key, 1890 n->key,
1895 KEYLENGTH - fa->fa_slen, 1891 KEYLENGTH - fa->fa_slen, fa,
1896 fi, fa->fa_tos, fa->fa_type, 1892 NULL);
1897 tb->tb_id);
1898 hlist_del_rcu(&fa->fa_list); 1893 hlist_del_rcu(&fa->fa_list);
1899 fib_release_info(fa->fa_info); 1894 fib_release_info(fa->fa_info);
1900 alias_free_mem_rcu(fa); 1895 alias_free_mem_rcu(fa);
@@ -1932,8 +1927,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
1932 continue; 1927 continue;
1933 1928
1934 call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, 1929 call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
1935 KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, 1930 KEYLENGTH - fa->fa_slen, fa);
1936 fa->fa_type, fa->tb_id);
1937 } 1931 }
1938} 1932}
1939 1933
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 681e33998e03..1617604c9284 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -782,7 +782,7 @@ static bool icmp_tag_validation(int proto)
782} 782}
783 783
784/* 784/*
785 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and 785 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
786 * ICMP_PARAMETERPROB. 786 * ICMP_PARAMETERPROB.
787 */ 787 */
788 788
@@ -810,7 +810,8 @@ static bool icmp_unreach(struct sk_buff *skb)
810 if (iph->ihl < 5) /* Mangled header, drop. */ 810 if (iph->ihl < 5) /* Mangled header, drop. */
811 goto out_err; 811 goto out_err;
812 812
813 if (icmph->type == ICMP_DEST_UNREACH) { 813 switch (icmph->type) {
814 case ICMP_DEST_UNREACH:
814 switch (icmph->code & 15) { 815 switch (icmph->code & 15) {
815 case ICMP_NET_UNREACH: 816 case ICMP_NET_UNREACH:
816 case ICMP_HOST_UNREACH: 817 case ICMP_HOST_UNREACH:
@@ -846,8 +847,16 @@ static bool icmp_unreach(struct sk_buff *skb)
846 } 847 }
847 if (icmph->code > NR_ICMP_UNREACH) 848 if (icmph->code > NR_ICMP_UNREACH)
848 goto out; 849 goto out;
849 } else if (icmph->type == ICMP_PARAMETERPROB) 850 break;
851 case ICMP_PARAMETERPROB:
850 info = ntohl(icmph->un.gateway) >> 24; 852 info = ntohl(icmph->un.gateway) >> 24;
853 break;
854 case ICMP_TIME_EXCEEDED:
855 __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
856 if (icmph->code == ICMP_EXC_FRAGTIME)
857 goto out;
858 break;
859 }
851 860
852 /* 861 /*
853 * Throw it at our lower layers 862 * Throw it at our lower layers
@@ -959,8 +968,9 @@ static bool icmp_timestamp(struct sk_buff *skb)
959 */ 968 */
960 icmp_param.data.times[1] = inet_current_timestamp(); 969 icmp_param.data.times[1] = inet_current_timestamp();
961 icmp_param.data.times[2] = icmp_param.data.times[1]; 970 icmp_param.data.times[2] = icmp_param.data.times[1];
962 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) 971
963 BUG(); 972 BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4));
973
964 icmp_param.data.icmph = *icmp_hdr(skb); 974 icmp_param.data.icmph = *icmp_hdr(skb);
965 icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; 975 icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
966 icmp_param.data.icmph.code = 0; 976 icmp_param.data.icmph.code = 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index ab183af0b5b6..d1f8f302dbf3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -752,18 +752,18 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
752 return ip_local_out(net, skb->sk, skb); 752 return ip_local_out(net, skb->sk, skb);
753} 753}
754 754
755static void igmp_gq_timer_expire(unsigned long data) 755static void igmp_gq_timer_expire(struct timer_list *t)
756{ 756{
757 struct in_device *in_dev = (struct in_device *)data; 757 struct in_device *in_dev = from_timer(in_dev, t, mr_gq_timer);
758 758
759 in_dev->mr_gq_running = 0; 759 in_dev->mr_gq_running = 0;
760 igmpv3_send_report(in_dev, NULL); 760 igmpv3_send_report(in_dev, NULL);
761 in_dev_put(in_dev); 761 in_dev_put(in_dev);
762} 762}
763 763
764static void igmp_ifc_timer_expire(unsigned long data) 764static void igmp_ifc_timer_expire(struct timer_list *t)
765{ 765{
766 struct in_device *in_dev = (struct in_device *)data; 766 struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer);
767 767
768 igmpv3_send_cr(in_dev); 768 igmpv3_send_cr(in_dev);
769 if (in_dev->mr_ifc_count) { 769 if (in_dev->mr_ifc_count) {
@@ -784,9 +784,9 @@ static void igmp_ifc_event(struct in_device *in_dev)
784} 784}
785 785
786 786
787static void igmp_timer_expire(unsigned long data) 787static void igmp_timer_expire(struct timer_list *t)
788{ 788{
789 struct ip_mc_list *im = (struct ip_mc_list *)data; 789 struct ip_mc_list *im = from_timer(im, t, timer);
790 struct in_device *in_dev = im->interface; 790 struct in_device *in_dev = im->interface;
791 791
792 spin_lock(&im->lock); 792 spin_lock(&im->lock);
@@ -1385,7 +1385,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1385 refcount_set(&im->refcnt, 1); 1385 refcount_set(&im->refcnt, 1);
1386 spin_lock_init(&im->lock); 1386 spin_lock_init(&im->lock);
1387#ifdef CONFIG_IP_MULTICAST 1387#ifdef CONFIG_IP_MULTICAST
1388 setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); 1388 timer_setup(&im->timer, igmp_timer_expire, 0);
1389 im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; 1389 im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
1390#endif 1390#endif
1391 1391
@@ -1695,10 +1695,8 @@ void ip_mc_init_dev(struct in_device *in_dev)
1695 ASSERT_RTNL(); 1695 ASSERT_RTNL();
1696 1696
1697#ifdef CONFIG_IP_MULTICAST 1697#ifdef CONFIG_IP_MULTICAST
1698 setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 1698 timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
1699 (unsigned long)in_dev); 1699 timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
1700 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
1701 (unsigned long)in_dev);
1702 in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; 1700 in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
1703#endif 1701#endif
1704 1702
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b47a59cb3573..4ca46dc08e63 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
39 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, 39 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
40 * and 0.0.0.0 equals to 0.0.0.0 only 40 * and 0.0.0.0 equals to 0.0.0.0 only
41 */ 41 */
42static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, 42static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
43 const struct in6_addr *sk2_rcv_saddr6, 43 const struct in6_addr *sk2_rcv_saddr6,
44 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 44 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
45 bool sk1_ipv6only, bool sk2_ipv6only, 45 bool sk1_ipv6only, bool sk2_ipv6only,
46 bool match_wildcard) 46 bool match_wildcard)
47{ 47{
48 int addr_type = ipv6_addr_type(sk1_rcv_saddr6); 48 int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
49 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; 49 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
52 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { 52 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
53 if (!sk2_ipv6only) { 53 if (!sk2_ipv6only) {
54 if (sk1_rcv_saddr == sk2_rcv_saddr) 54 if (sk1_rcv_saddr == sk2_rcv_saddr)
55 return 1; 55 return true;
56 if (!sk1_rcv_saddr || !sk2_rcv_saddr) 56 if (!sk1_rcv_saddr || !sk2_rcv_saddr)
57 return match_wildcard; 57 return match_wildcard;
58 } 58 }
59 return 0; 59 return false;
60 } 60 }
61 61
62 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) 62 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
63 return 1; 63 return true;
64 64
65 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && 65 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
66 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 66 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
67 return 1; 67 return true;
68 68
69 if (addr_type == IPV6_ADDR_ANY && match_wildcard && 69 if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
70 !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) 70 !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
71 return 1; 71 return true;
72 72
73 if (sk2_rcv_saddr6 && 73 if (sk2_rcv_saddr6 &&
74 ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) 74 ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
75 return 1; 75 return true;
76 76
77 return 0; 77 return false;
78} 78}
79#endif 79#endif
80 80
@@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
82 * match_wildcard == false: addresses must be exactly the same, i.e. 82 * match_wildcard == false: addresses must be exactly the same, i.e.
83 * 0.0.0.0 only equals to 0.0.0.0 83 * 0.0.0.0 only equals to 0.0.0.0
84 */ 84 */
85static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 85static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
86 bool sk2_ipv6only, bool match_wildcard) 86 bool sk2_ipv6only, bool match_wildcard)
87{ 87{
88 if (!sk2_ipv6only) { 88 if (!sk2_ipv6only) {
89 if (sk1_rcv_saddr == sk2_rcv_saddr) 89 if (sk1_rcv_saddr == sk2_rcv_saddr)
90 return 1; 90 return true;
91 if (!sk1_rcv_saddr || !sk2_rcv_saddr) 91 if (!sk1_rcv_saddr || !sk2_rcv_saddr)
92 return match_wildcard; 92 return match_wildcard;
93 } 93 }
94 return 0; 94 return false;
95} 95}
96 96
97int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, 97bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
98 bool match_wildcard) 98 bool match_wildcard)
99{ 99{
100#if IS_ENABLED(CONFIG_IPV6) 100#if IS_ENABLED(CONFIG_IPV6)
101 if (sk->sk_family == AF_INET6) 101 if (sk->sk_family == AF_INET6)
@@ -495,17 +495,15 @@ EXPORT_SYMBOL(inet_csk_accept);
495 * to optimize. 495 * to optimize.
496 */ 496 */
497void inet_csk_init_xmit_timers(struct sock *sk, 497void inet_csk_init_xmit_timers(struct sock *sk,
498 void (*retransmit_handler)(unsigned long), 498 void (*retransmit_handler)(struct timer_list *t),
499 void (*delack_handler)(unsigned long), 499 void (*delack_handler)(struct timer_list *t),
500 void (*keepalive_handler)(unsigned long)) 500 void (*keepalive_handler)(struct timer_list *t))
501{ 501{
502 struct inet_connection_sock *icsk = inet_csk(sk); 502 struct inet_connection_sock *icsk = inet_csk(sk);
503 503
504 setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, 504 timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
505 (unsigned long)sk); 505 timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
506 setup_timer(&icsk->icsk_delack_timer, delack_handler, 506 timer_setup(&sk->sk_timer, keepalive_handler, 0);
507 (unsigned long)sk);
508 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
509 icsk->icsk_pending = icsk->icsk_ack.pending = 0; 507 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
510} 508}
511EXPORT_SYMBOL(inet_csk_init_xmit_timers); 509EXPORT_SYMBOL(inet_csk_init_xmit_timers);
@@ -676,9 +674,9 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req
676} 674}
677EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 675EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
678 676
679static void reqsk_timer_handler(unsigned long data) 677static void reqsk_timer_handler(struct timer_list *t)
680{ 678{
681 struct request_sock *req = (struct request_sock *)data; 679 struct request_sock *req = from_timer(req, t, rsk_timer);
682 struct sock *sk_listener = req->rsk_listener; 680 struct sock *sk_listener = req->rsk_listener;
683 struct net *net = sock_net(sk_listener); 681 struct net *net = sock_net(sk_listener);
684 struct inet_connection_sock *icsk = inet_csk(sk_listener); 682 struct inet_connection_sock *icsk = inet_csk(sk_listener);
@@ -749,8 +747,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
749 req->num_timeout = 0; 747 req->num_timeout = 0;
750 req->sk = NULL; 748 req->sk = NULL;
751 749
752 setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler, 750 timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
753 (unsigned long)req);
754 mod_timer(&req->rsk_timer, jiffies + timeout); 751 mod_timer(&req->rsk_timer, jiffies + timeout);
755 752
756 inet_ehash_insert(req_to_sk(req), NULL); 753 inet_ehash_insert(req_to_sk(req), NULL);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index f9597ba26599..26a3d0315728 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
147 spin_unlock(&hb->chain_lock); 147 spin_unlock(&hb->chain_lock);
148 148
149 hlist_for_each_entry_safe(fq, n, &expired, list_evictor) 149 hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
150 f->frag_expire((unsigned long) fq); 150 f->frag_expire(&fq->timer);
151 151
152 return evicted; 152 return evicted;
153} 153}
@@ -366,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
366 f->constructor(q, arg); 366 f->constructor(q, arg);
367 add_frag_mem_limit(nf, f->qsize); 367 add_frag_mem_limit(nf, f->qsize);
368 368
369 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 369 timer_setup(&q->timer, f->frag_expire, 0);
370 spin_lock_init(&q->lock); 370 spin_lock_init(&q->lock);
371 refcount_set(&q->refcnt, 1); 371 refcount_set(&q->refcnt, 1);
372 372
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 5b039159e67a..c690cd0d9b3f 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -9,7 +9,6 @@
9 */ 9 */
10 10
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/kmemcheck.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <net/inet_hashtables.h> 14#include <net/inet_hashtables.h>
@@ -142,9 +141,9 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
142} 141}
143EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 142EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
144 143
145static void tw_timer_handler(unsigned long data) 144static void tw_timer_handler(struct timer_list *t)
146{ 145{
147 struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; 146 struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
148 147
149 if (tw->tw_kill) 148 if (tw->tw_kill)
150 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); 149 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
@@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
167 if (tw) { 166 if (tw) {
168 const struct inet_sock *inet = inet_sk(sk); 167 const struct inet_sock *inet = inet_sk(sk);
169 168
170 kmemcheck_annotate_bitfield(tw, flags);
171
172 tw->tw_dr = dr; 169 tw->tw_dr = dr;
173 /* Give us an identity. */ 170 /* Give us an identity. */
174 tw->tw_daddr = inet->inet_daddr; 171 tw->tw_daddr = inet->inet_daddr;
@@ -188,8 +185,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
188 tw->tw_prot = sk->sk_prot_creator; 185 tw->tw_prot = sk->sk_prot_creator;
189 atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); 186 atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
190 twsk_net_set(tw, sock_net(sk)); 187 twsk_net_set(tw, sock_net(sk));
191 setup_pinned_timer(&tw->tw_timer, tw_timer_handler, 188 timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
192 (unsigned long)tw);
193 /* 189 /*
194 * Because we use RCU lookups, we should not set tw_refcnt 190 * Because we use RCU lookups, we should not set tw_refcnt
195 * to a non null value before everything is setup for this 191 * to a non null value before everything is setup for this
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index b20c8ac64081..914d56928578 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -284,14 +284,17 @@ EXPORT_SYMBOL(inet_peer_xrlim_allow);
284 284
285void inetpeer_invalidate_tree(struct inet_peer_base *base) 285void inetpeer_invalidate_tree(struct inet_peer_base *base)
286{ 286{
287 struct inet_peer *p, *n; 287 struct rb_node *p = rb_first(&base->rb_root);
288 288
289 rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) { 289 while (p) {
290 inet_putpeer(p); 290 struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node);
291
292 p = rb_next(p);
293 rb_erase(&peer->rb_node, &base->rb_root);
294 inet_putpeer(peer);
291 cond_resched(); 295 cond_resched();
292 } 296 }
293 297
294 base->rb_root = RB_ROOT;
295 base->total = 0; 298 base->total = 0;
296} 299}
297EXPORT_SYMBOL(inetpeer_invalidate_tree); 300EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index df8fe0503de0..bbf1b94942c0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -191,12 +191,13 @@ static bool frag_expire_skip_icmp(u32 user)
191/* 191/*
192 * Oops, a fragment queue timed out. Kill it and send an ICMP reply. 192 * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
193 */ 193 */
194static void ip_expire(unsigned long arg) 194static void ip_expire(struct timer_list *t)
195{ 195{
196 struct inet_frag_queue *frag = from_timer(frag, t, timer);
196 struct ipq *qp; 197 struct ipq *qp;
197 struct net *net; 198 struct net *net;
198 199
199 qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); 200 qp = container_of(frag, struct ipq, q);
200 net = container_of(qp->q.net, struct net, ipv4.frags); 201 net = container_of(qp->q.net, struct net, ipv4.frags);
201 202
202 rcu_read_lock(); 203 rcu_read_lock();
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 467e44d7587d..bb6239169b1a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -579,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
579 if (gre_handle_offloads(skb, false)) 579 if (gre_handle_offloads(skb, false))
580 goto err_free_rt; 580 goto err_free_rt;
581 581
582 if (skb->len > dev->mtu) { 582 if (skb->len > dev->mtu + dev->hard_header_len) {
583 pskb_trim(skb, dev->mtu); 583 pskb_trim(skb, dev->mtu + dev->hard_header_len);
584 truncate = true; 584 truncate = true;
585 } 585 }
586 586
@@ -731,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
731 if (skb_cow_head(skb, dev->needed_headroom)) 731 if (skb_cow_head(skb, dev->needed_headroom))
732 goto free_skb; 732 goto free_skb;
733 733
734 if (skb->len - dev->hard_header_len > dev->mtu) { 734 if (skb->len > dev->mtu + dev->hard_header_len) {
735 pskb_trim(skb, dev->mtu); 735 pskb_trim(skb, dev->mtu + dev->hard_header_len);
736 truncate = true; 736 truncate = true;
737 } 737 }
738 738
@@ -773,20 +773,46 @@ free_skb:
773 return NETDEV_TX_OK; 773 return NETDEV_TX_OK;
774} 774}
775 775
776static void ipgre_link_update(struct net_device *dev, bool set_mtu)
777{
778 struct ip_tunnel *tunnel = netdev_priv(dev);
779 int len;
780
781 len = tunnel->tun_hlen;
782 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
783 len = tunnel->tun_hlen - len;
784 tunnel->hlen = tunnel->hlen + len;
785
786 dev->needed_headroom = dev->needed_headroom + len;
787 if (set_mtu)
788 dev->mtu = max_t(int, dev->mtu - len, 68);
789
790 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
791 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
792 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
793 dev->features |= NETIF_F_GSO_SOFTWARE;
794 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
795 }
796 dev->features |= NETIF_F_LLTX;
797 }
798}
799
776static int ipgre_tunnel_ioctl(struct net_device *dev, 800static int ipgre_tunnel_ioctl(struct net_device *dev,
777 struct ifreq *ifr, int cmd) 801 struct ifreq *ifr, int cmd)
778{ 802{
779 int err;
780 struct ip_tunnel_parm p; 803 struct ip_tunnel_parm p;
804 int err;
781 805
782 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 806 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
783 return -EFAULT; 807 return -EFAULT;
808
784 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 809 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
785 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 810 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
786 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 811 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
787 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 812 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
788 return -EINVAL; 813 return -EINVAL;
789 } 814 }
815
790 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 816 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
791 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 817 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
792 818
@@ -794,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
794 if (err) 820 if (err)
795 return err; 821 return err;
796 822
823 if (cmd == SIOCCHGTUNNEL) {
824 struct ip_tunnel *t = netdev_priv(dev);
825
826 t->parms.i_flags = p.i_flags;
827 t->parms.o_flags = p.o_flags;
828
829 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
830 ipgre_link_update(dev, true);
831 }
832
797 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); 833 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
798 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); 834 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
799 835
800 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 836 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
801 return -EFAULT; 837 return -EFAULT;
838
802 return 0; 839 return 0;
803} 840}
804 841
@@ -1011,15 +1048,14 @@ static int __net_init ipgre_init_net(struct net *net)
1011 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 1048 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1012} 1049}
1013 1050
1014static void __net_exit ipgre_exit_net(struct net *net) 1051static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1015{ 1052{
1016 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); 1053 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1017 ip_tunnel_delete_net(itn, &ipgre_link_ops);
1018} 1054}
1019 1055
1020static struct pernet_operations ipgre_net_ops = { 1056static struct pernet_operations ipgre_net_ops = {
1021 .init = ipgre_init_net, 1057 .init = ipgre_init_net,
1022 .exit = ipgre_exit_net, 1058 .exit_batch = ipgre_exit_batch_net,
1023 .id = &ipgre_net_id, 1059 .id = &ipgre_net_id,
1024 .size = sizeof(struct ip_tunnel_net), 1060 .size = sizeof(struct ip_tunnel_net),
1025}; 1061};
@@ -1308,9 +1344,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1308 struct netlink_ext_ack *extack) 1344 struct netlink_ext_ack *extack)
1309{ 1345{
1310 struct ip_tunnel *t = netdev_priv(dev); 1346 struct ip_tunnel *t = netdev_priv(dev);
1311 struct ip_tunnel_parm p;
1312 struct ip_tunnel_encap ipencap; 1347 struct ip_tunnel_encap ipencap;
1313 __u32 fwmark = t->fwmark; 1348 __u32 fwmark = t->fwmark;
1349 struct ip_tunnel_parm p;
1314 int err; 1350 int err;
1315 1351
1316 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1352 if (ipgre_netlink_encap_parms(data, &ipencap)) {
@@ -1323,7 +1359,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1323 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1359 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1324 if (err < 0) 1360 if (err < 0)
1325 return err; 1361 return err;
1326 return ip_tunnel_changelink(dev, tb, &p, fwmark); 1362
1363 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1364 if (err < 0)
1365 return err;
1366
1367 t->parms.i_flags = p.i_flags;
1368 t->parms.o_flags = p.o_flags;
1369
1370 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1371 ipgre_link_update(dev, !tb[IFLA_MTU]);
1372
1373 return 0;
1327} 1374}
1328 1375
1329static size_t ipgre_get_size(const struct net_device *dev) 1376static size_t ipgre_get_size(const struct net_device *dev)
@@ -1542,15 +1589,14 @@ static int __net_init ipgre_tap_init_net(struct net *net)
1542 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1589 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1543} 1590}
1544 1591
1545static void __net_exit ipgre_tap_exit_net(struct net *net) 1592static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1546{ 1593{
1547 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); 1594 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1548 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1549} 1595}
1550 1596
1551static struct pernet_operations ipgre_tap_net_ops = { 1597static struct pernet_operations ipgre_tap_net_ops = {
1552 .init = ipgre_tap_init_net, 1598 .init = ipgre_tap_init_net,
1553 .exit = ipgre_tap_exit_net, 1599 .exit_batch = ipgre_tap_exit_batch_net,
1554 .id = &gre_tap_net_id, 1600 .id = &gre_tap_net_id,
1555 .size = sizeof(struct ip_tunnel_net), 1601 .size = sizeof(struct ip_tunnel_net),
1556}; 1602};
@@ -1561,16 +1607,14 @@ static int __net_init erspan_init_net(struct net *net)
1561 &erspan_link_ops, "erspan0"); 1607 &erspan_link_ops, "erspan0");
1562} 1608}
1563 1609
1564static void __net_exit erspan_exit_net(struct net *net) 1610static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1565{ 1611{
1566 struct ip_tunnel_net *itn = net_generic(net, erspan_net_id); 1612 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1567
1568 ip_tunnel_delete_net(itn, &erspan_link_ops);
1569} 1613}
1570 1614
1571static struct pernet_operations erspan_net_ops = { 1615static struct pernet_operations erspan_net_ops = {
1572 .init = erspan_init_net, 1616 .init = erspan_init_net,
1573 .exit = erspan_exit_net, 1617 .exit_batch = erspan_exit_batch_net,
1574 .id = &erspan_net_id, 1618 .id = &erspan_net_id,
1575 .size = sizeof(struct ip_tunnel_net), 1619 .size = sizeof(struct ip_tunnel_net),
1576}; 1620};
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e9805ad664ac..fe6fee728ce4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1061 } 1061 }
1062} 1062}
1063 1063
1064void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1064void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1065 struct rtnl_link_ops *ops)
1065{ 1066{
1067 struct ip_tunnel_net *itn;
1068 struct net *net;
1066 LIST_HEAD(list); 1069 LIST_HEAD(list);
1067 1070
1068 rtnl_lock(); 1071 rtnl_lock();
1069 ip_tunnel_destroy(itn, &list, ops); 1072 list_for_each_entry(net, net_list, exit_list) {
1073 itn = net_generic(net, id);
1074 ip_tunnel_destroy(itn, &list, ops);
1075 }
1070 unregister_netdevice_many(&list); 1076 unregister_netdevice_many(&list);
1071 rtnl_unlock(); 1077 rtnl_unlock();
1072} 1078}
1073EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1079EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1074 1080
1075int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1081int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1076 struct ip_tunnel_parm *p, __u32 fwmark) 1082 struct ip_tunnel_parm *p, __u32 fwmark)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 89453cf62158..949f432a5f04 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -198,15 +198,6 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
198 goto tx_error; 198 goto tx_error;
199 } 199 }
200 200
201 if (tunnel->err_count > 0) {
202 if (time_before(jiffies,
203 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
204 tunnel->err_count--;
205 dst_link_failure(skb);
206 } else
207 tunnel->err_count = 0;
208 }
209
210 mtu = dst_mtu(dst); 201 mtu = dst_mtu(dst);
211 if (skb->len > mtu) { 202 if (skb->len > mtu) {
212 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 203 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
@@ -453,15 +444,14 @@ static int __net_init vti_init_net(struct net *net)
453 return 0; 444 return 0;
454} 445}
455 446
456static void __net_exit vti_exit_net(struct net *net) 447static void __net_exit vti_exit_batch_net(struct list_head *list_net)
457{ 448{
458 struct ip_tunnel_net *itn = net_generic(net, vti_net_id); 449 ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
459 ip_tunnel_delete_net(itn, &vti_link_ops);
460} 450}
461 451
462static struct pernet_operations vti_net_ops = { 452static struct pernet_operations vti_net_ops = {
463 .init = vti_init_net, 453 .init = vti_init_net,
464 .exit = vti_exit_net, 454 .exit_batch = vti_exit_batch_net,
465 .id = &vti_net_id, 455 .id = &vti_net_id,
466 .size = sizeof(struct ip_tunnel_net), 456 .size = sizeof(struct ip_tunnel_net),
467}; 457};
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index cdd627355ed1..c891235b4966 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -659,15 +659,14 @@ static int __net_init ipip_init_net(struct net *net)
659 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); 659 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
660} 660}
661 661
662static void __net_exit ipip_exit_net(struct net *net) 662static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
663{ 663{
664 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); 664 ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
665 ip_tunnel_delete_net(itn, &ipip_link_ops);
666} 665}
667 666
668static struct pernet_operations ipip_net_ops = { 667static struct pernet_operations ipip_net_ops = {
669 .init = ipip_init_net, 668 .init = ipip_init_net,
670 .exit = ipip_exit_net, 669 .exit_batch = ipip_exit_batch_net,
671 .id = &ipip_net_id, 670 .id = &ipip_net_id,
672 .size = sizeof(struct ip_tunnel_net), 671 .size = sizeof(struct ip_tunnel_net),
673}; 672};
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c9b3e6e069ae..fd5f19c988e4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,6 +67,7 @@
67#include <net/fib_rules.h> 67#include <net/fib_rules.h>
68#include <linux/netconf.h> 68#include <linux/netconf.h>
69#include <net/nexthop.h> 69#include <net/nexthop.h>
70#include <net/switchdev.h>
70 71
71struct ipmr_rule { 72struct ipmr_rule {
72 struct fib_rule common; 73 struct fib_rule common;
@@ -111,7 +112,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
111 int cmd); 112 int cmd);
112static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); 113static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
113static void mroute_clean_tables(struct mr_table *mrt, bool all); 114static void mroute_clean_tables(struct mr_table *mrt, bool all);
114static void ipmr_expire_process(unsigned long arg); 115static void ipmr_expire_process(struct timer_list *t);
115 116
116#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 117#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
117#define ipmr_for_each_table(mrt, net) \ 118#define ipmr_for_each_table(mrt, net) \
@@ -264,6 +265,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
264 fib_rules_unregister(net->ipv4.mr_rules_ops); 265 fib_rules_unregister(net->ipv4.mr_rules_ops);
265 rtnl_unlock(); 266 rtnl_unlock();
266} 267}
268
269static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
270{
271 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
272}
273
274static unsigned int ipmr_rules_seq_read(struct net *net)
275{
276 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
277}
278
279bool ipmr_rule_default(const struct fib_rule *rule)
280{
281 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
282}
283EXPORT_SYMBOL(ipmr_rule_default);
267#else 284#else
268#define ipmr_for_each_table(mrt, net) \ 285#define ipmr_for_each_table(mrt, net) \
269 for (mrt = net->ipv4.mrt; mrt; mrt = NULL) 286 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
@@ -298,6 +315,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
298 net->ipv4.mrt = NULL; 315 net->ipv4.mrt = NULL;
299 rtnl_unlock(); 316 rtnl_unlock();
300} 317}
318
319static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
320{
321 return 0;
322}
323
324static unsigned int ipmr_rules_seq_read(struct net *net)
325{
326 return 0;
327}
328
329bool ipmr_rule_default(const struct fib_rule *rule)
330{
331 return true;
332}
333EXPORT_SYMBOL(ipmr_rule_default);
301#endif 334#endif
302 335
303static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, 336static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -342,8 +375,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
342 INIT_LIST_HEAD(&mrt->mfc_cache_list); 375 INIT_LIST_HEAD(&mrt->mfc_cache_list);
343 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 376 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
344 377
345 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 378 timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
346 (unsigned long)mrt);
347 379
348 mrt->mroute_reg_vif_num = -1; 380 mrt->mroute_reg_vif_num = -1;
349#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 381#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -587,6 +619,82 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
587} 619}
588#endif 620#endif
589 621
622static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
623 struct net *net,
624 enum fib_event_type event_type,
625 struct vif_device *vif,
626 vifi_t vif_index, u32 tb_id)
627{
628 struct vif_entry_notifier_info info = {
629 .info = {
630 .family = RTNL_FAMILY_IPMR,
631 .net = net,
632 },
633 .dev = vif->dev,
634 .vif_index = vif_index,
635 .vif_flags = vif->flags,
636 .tb_id = tb_id,
637 };
638
639 return call_fib_notifier(nb, net, event_type, &info.info);
640}
641
642static int call_ipmr_vif_entry_notifiers(struct net *net,
643 enum fib_event_type event_type,
644 struct vif_device *vif,
645 vifi_t vif_index, u32 tb_id)
646{
647 struct vif_entry_notifier_info info = {
648 .info = {
649 .family = RTNL_FAMILY_IPMR,
650 .net = net,
651 },
652 .dev = vif->dev,
653 .vif_index = vif_index,
654 .vif_flags = vif->flags,
655 .tb_id = tb_id,
656 };
657
658 ASSERT_RTNL();
659 net->ipv4.ipmr_seq++;
660 return call_fib_notifiers(net, event_type, &info.info);
661}
662
663static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
664 struct net *net,
665 enum fib_event_type event_type,
666 struct mfc_cache *mfc, u32 tb_id)
667{
668 struct mfc_entry_notifier_info info = {
669 .info = {
670 .family = RTNL_FAMILY_IPMR,
671 .net = net,
672 },
673 .mfc = mfc,
674 .tb_id = tb_id
675 };
676
677 return call_fib_notifier(nb, net, event_type, &info.info);
678}
679
680static int call_ipmr_mfc_entry_notifiers(struct net *net,
681 enum fib_event_type event_type,
682 struct mfc_cache *mfc, u32 tb_id)
683{
684 struct mfc_entry_notifier_info info = {
685 .info = {
686 .family = RTNL_FAMILY_IPMR,
687 .net = net,
688 },
689 .mfc = mfc,
690 .tb_id = tb_id
691 };
692
693 ASSERT_RTNL();
694 net->ipv4.ipmr_seq++;
695 return call_fib_notifiers(net, event_type, &info.info);
696}
697
590/** 698/**
591 * vif_delete - Delete a VIF entry 699 * vif_delete - Delete a VIF entry
592 * @notify: Set to 1, if the caller is a notifier_call 700 * @notify: Set to 1, if the caller is a notifier_call
@@ -594,6 +702,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
594static int vif_delete(struct mr_table *mrt, int vifi, int notify, 702static int vif_delete(struct mr_table *mrt, int vifi, int notify,
595 struct list_head *head) 703 struct list_head *head)
596{ 704{
705 struct net *net = read_pnet(&mrt->net);
597 struct vif_device *v; 706 struct vif_device *v;
598 struct net_device *dev; 707 struct net_device *dev;
599 struct in_device *in_dev; 708 struct in_device *in_dev;
@@ -603,6 +712,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
603 712
604 v = &mrt->vif_table[vifi]; 713 v = &mrt->vif_table[vifi];
605 714
715 if (VIF_EXISTS(mrt, vifi))
716 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
717 mrt->id);
718
606 write_lock_bh(&mrt_lock); 719 write_lock_bh(&mrt_lock);
607 dev = v->dev; 720 dev = v->dev;
608 v->dev = NULL; 721 v->dev = NULL;
@@ -652,10 +765,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
652 kmem_cache_free(mrt_cachep, c); 765 kmem_cache_free(mrt_cachep, c);
653} 766}
654 767
655static inline void ipmr_cache_free(struct mfc_cache *c) 768void ipmr_cache_free(struct mfc_cache *c)
656{ 769{
657 call_rcu(&c->rcu, ipmr_cache_free_rcu); 770 call_rcu(&c->rcu, ipmr_cache_free_rcu);
658} 771}
772EXPORT_SYMBOL(ipmr_cache_free);
659 773
660/* Destroy an unresolved cache entry, killing queued skbs 774/* Destroy an unresolved cache entry, killing queued skbs
661 * and reporting error to netlink readers. 775 * and reporting error to netlink readers.
@@ -689,9 +803,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
689} 803}
690 804
691/* Timer process for the unresolved queue. */ 805/* Timer process for the unresolved queue. */
692static void ipmr_expire_process(unsigned long arg) 806static void ipmr_expire_process(struct timer_list *t)
693{ 807{
694 struct mr_table *mrt = (struct mr_table *)arg; 808 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
695 unsigned long now; 809 unsigned long now;
696 unsigned long expires; 810 unsigned long expires;
697 struct mfc_cache *c, *next; 811 struct mfc_cache *c, *next;
@@ -754,6 +868,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
754 struct vifctl *vifc, int mrtsock) 868 struct vifctl *vifc, int mrtsock)
755{ 869{
756 int vifi = vifc->vifc_vifi; 870 int vifi = vifc->vifc_vifi;
871 struct switchdev_attr attr = {
872 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
873 };
757 struct vif_device *v = &mrt->vif_table[vifi]; 874 struct vif_device *v = &mrt->vif_table[vifi];
758 struct net_device *dev; 875 struct net_device *dev;
759 struct in_device *in_dev; 876 struct in_device *in_dev;
@@ -828,6 +945,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
828 945
829 /* Fill in the VIF structures */ 946 /* Fill in the VIF structures */
830 947
948 attr.orig_dev = dev;
949 if (!switchdev_port_attr_get(dev, &attr)) {
950 memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
951 v->dev_parent_id.id_len = attr.u.ppid.id_len;
952 } else {
953 v->dev_parent_id.id_len = 0;
954 }
831 v->rate_limit = vifc->vifc_rate_limit; 955 v->rate_limit = vifc->vifc_rate_limit;
832 v->local = vifc->vifc_lcl_addr.s_addr; 956 v->local = vifc->vifc_lcl_addr.s_addr;
833 v->remote = vifc->vifc_rmt_addr.s_addr; 957 v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -851,6 +975,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
851 if (vifi+1 > mrt->maxvif) 975 if (vifi+1 > mrt->maxvif)
852 mrt->maxvif = vifi+1; 976 mrt->maxvif = vifi+1;
853 write_unlock_bh(&mrt_lock); 977 write_unlock_bh(&mrt_lock);
978 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
854 return 0; 979 return 0;
855} 980}
856 981
@@ -949,6 +1074,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
949 if (c) { 1074 if (c) {
950 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; 1075 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
951 c->mfc_un.res.minvif = MAXVIFS; 1076 c->mfc_un.res.minvif = MAXVIFS;
1077 refcount_set(&c->mfc_un.res.refcount, 1);
952 } 1078 }
953 return c; 1079 return c;
954} 1080}
@@ -1150,6 +1276,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
1150 1276
1151static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1277static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1152{ 1278{
1279 struct net *net = read_pnet(&mrt->net);
1153 struct mfc_cache *c; 1280 struct mfc_cache *c;
1154 1281
1155 /* The entries are added/deleted only under RTNL */ 1282 /* The entries are added/deleted only under RTNL */
@@ -1161,8 +1288,9 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1161 return -ENOENT; 1288 return -ENOENT;
1162 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1289 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1163 list_del_rcu(&c->list); 1290 list_del_rcu(&c->list);
1291 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
1164 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1292 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1165 ipmr_cache_free(c); 1293 ipmr_cache_put(c);
1166 1294
1167 return 0; 1295 return 0;
1168} 1296}
@@ -1189,6 +1317,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1189 if (!mrtsock) 1317 if (!mrtsock)
1190 c->mfc_flags |= MFC_STATIC; 1318 c->mfc_flags |= MFC_STATIC;
1191 write_unlock_bh(&mrt_lock); 1319 write_unlock_bh(&mrt_lock);
1320 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
1321 mrt->id);
1192 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1322 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1193 return 0; 1323 return 0;
1194 } 1324 }
@@ -1238,6 +1368,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1238 ipmr_cache_resolve(net, mrt, uc, c); 1368 ipmr_cache_resolve(net, mrt, uc, c);
1239 ipmr_cache_free(uc); 1369 ipmr_cache_free(uc);
1240 } 1370 }
1371 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
1241 mroute_netlink_event(mrt, c, RTM_NEWROUTE); 1372 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1242 return 0; 1373 return 0;
1243} 1374}
@@ -1245,6 +1376,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1245/* Close the multicast socket, and clear the vif tables etc */ 1376/* Close the multicast socket, and clear the vif tables etc */
1246static void mroute_clean_tables(struct mr_table *mrt, bool all) 1377static void mroute_clean_tables(struct mr_table *mrt, bool all)
1247{ 1378{
1379 struct net *net = read_pnet(&mrt->net);
1248 struct mfc_cache *c, *tmp; 1380 struct mfc_cache *c, *tmp;
1249 LIST_HEAD(list); 1381 LIST_HEAD(list);
1250 int i; 1382 int i;
@@ -1263,8 +1395,10 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
1263 continue; 1395 continue;
1264 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); 1396 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1265 list_del_rcu(&c->list); 1397 list_del_rcu(&c->list);
1398 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
1399 mrt->id);
1266 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1400 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1267 ipmr_cache_free(c); 1401 ipmr_cache_put(c);
1268 } 1402 }
1269 1403
1270 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { 1404 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
@@ -1393,6 +1527,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
1393 case MRT_ADD_MFC: 1527 case MRT_ADD_MFC:
1394 case MRT_DEL_MFC: 1528 case MRT_DEL_MFC:
1395 parent = -1; 1529 parent = -1;
1530 /* fall through */
1396 case MRT_ADD_MFC_PROXY: 1531 case MRT_ADD_MFC_PROXY:
1397 case MRT_DEL_MFC_PROXY: 1532 case MRT_DEL_MFC_PROXY:
1398 if (optlen != sizeof(mfc)) { 1533 if (optlen != sizeof(mfc)) {
@@ -1724,10 +1859,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
1724 return dst_output(net, sk, skb); 1859 return dst_output(net, sk, skb);
1725} 1860}
1726 1861
1862#ifdef CONFIG_NET_SWITCHDEV
1863static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1864 int in_vifi, int out_vifi)
1865{
1866 struct vif_device *out_vif = &mrt->vif_table[out_vifi];
1867 struct vif_device *in_vif = &mrt->vif_table[in_vifi];
1868
1869 if (!skb->offload_mr_fwd_mark)
1870 return false;
1871 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
1872 return false;
1873 return netdev_phys_item_id_same(&out_vif->dev_parent_id,
1874 &in_vif->dev_parent_id);
1875}
1876#else
1877static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1878 int in_vifi, int out_vifi)
1879{
1880 return false;
1881}
1882#endif
1883
1727/* Processing handlers for ipmr_forward */ 1884/* Processing handlers for ipmr_forward */
1728 1885
1729static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1886static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1730 struct sk_buff *skb, struct mfc_cache *c, int vifi) 1887 int in_vifi, struct sk_buff *skb,
1888 struct mfc_cache *c, int vifi)
1731{ 1889{
1732 const struct iphdr *iph = ip_hdr(skb); 1890 const struct iphdr *iph = ip_hdr(skb);
1733 struct vif_device *vif = &mrt->vif_table[vifi]; 1891 struct vif_device *vif = &mrt->vif_table[vifi];
@@ -1748,6 +1906,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1748 goto out_free; 1906 goto out_free;
1749 } 1907 }
1750 1908
1909 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
1910 goto out_free;
1911
1751 if (vif->flags & VIFF_TUNNEL) { 1912 if (vif->flags & VIFF_TUNNEL) {
1752 rt = ip_route_output_ports(net, &fl4, NULL, 1913 rt = ip_route_output_ports(net, &fl4, NULL,
1753 vif->remote, vif->local, 1914 vif->remote, vif->local,
@@ -1925,8 +2086,8 @@ forward:
1925 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2086 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1926 2087
1927 if (skb2) 2088 if (skb2)
1928 ipmr_queue_xmit(net, mrt, skb2, cache, 2089 ipmr_queue_xmit(net, mrt, true_vifi,
1929 psend); 2090 skb2, cache, psend);
1930 } 2091 }
1931 psend = ct; 2092 psend = ct;
1932 } 2093 }
@@ -1937,9 +2098,10 @@ last_forward:
1937 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 2098 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1938 2099
1939 if (skb2) 2100 if (skb2)
1940 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 2101 ipmr_queue_xmit(net, mrt, true_vifi, skb2,
2102 cache, psend);
1941 } else { 2103 } else {
1942 ipmr_queue_xmit(net, mrt, skb, cache, psend); 2104 ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
1943 return; 2105 return;
1944 } 2106 }
1945 } 2107 }
@@ -2156,6 +2318,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2156 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) 2318 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
2157 return -EMSGSIZE; 2319 return -EMSGSIZE;
2158 2320
2321 if (c->mfc_flags & MFC_OFFLOAD)
2322 rtm->rtm_flags |= RTNH_F_OFFLOAD;
2323
2159 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) 2324 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
2160 return -EMSGSIZE; 2325 return -EMSGSIZE;
2161 2326
@@ -3048,14 +3213,87 @@ static const struct net_protocol pim_protocol = {
3048}; 3213};
3049#endif 3214#endif
3050 3215
3216static unsigned int ipmr_seq_read(struct net *net)
3217{
3218 ASSERT_RTNL();
3219
3220 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
3221}
3222
3223static int ipmr_dump(struct net *net, struct notifier_block *nb)
3224{
3225 struct mr_table *mrt;
3226 int err;
3227
3228 err = ipmr_rules_dump(net, nb);
3229 if (err)
3230 return err;
3231
3232 ipmr_for_each_table(mrt, net) {
3233 struct vif_device *v = &mrt->vif_table[0];
3234 struct mfc_cache *mfc;
3235 int vifi;
3236
3237 /* Notifiy on table VIF entries */
3238 read_lock(&mrt_lock);
3239 for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
3240 if (!v->dev)
3241 continue;
3242
3243 call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
3244 v, vifi, mrt->id);
3245 }
3246 read_unlock(&mrt_lock);
3247
3248 /* Notify on table MFC entries */
3249 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
3250 call_ipmr_mfc_entry_notifier(nb, net,
3251 FIB_EVENT_ENTRY_ADD, mfc,
3252 mrt->id);
3253 }
3254
3255 return 0;
3256}
3257
3258static const struct fib_notifier_ops ipmr_notifier_ops_template = {
3259 .family = RTNL_FAMILY_IPMR,
3260 .fib_seq_read = ipmr_seq_read,
3261 .fib_dump = ipmr_dump,
3262 .owner = THIS_MODULE,
3263};
3264
3265static int __net_init ipmr_notifier_init(struct net *net)
3266{
3267 struct fib_notifier_ops *ops;
3268
3269 net->ipv4.ipmr_seq = 0;
3270
3271 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
3272 if (IS_ERR(ops))
3273 return PTR_ERR(ops);
3274 net->ipv4.ipmr_notifier_ops = ops;
3275
3276 return 0;
3277}
3278
3279static void __net_exit ipmr_notifier_exit(struct net *net)
3280{
3281 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
3282 net->ipv4.ipmr_notifier_ops = NULL;
3283}
3284
3051/* Setup for IP multicast routing */ 3285/* Setup for IP multicast routing */
3052static int __net_init ipmr_net_init(struct net *net) 3286static int __net_init ipmr_net_init(struct net *net)
3053{ 3287{
3054 int err; 3288 int err;
3055 3289
3290 err = ipmr_notifier_init(net);
3291 if (err)
3292 goto ipmr_notifier_fail;
3293
3056 err = ipmr_rules_init(net); 3294 err = ipmr_rules_init(net);
3057 if (err < 0) 3295 if (err < 0)
3058 goto fail; 3296 goto ipmr_rules_fail;
3059 3297
3060#ifdef CONFIG_PROC_FS 3298#ifdef CONFIG_PROC_FS
3061 err = -ENOMEM; 3299 err = -ENOMEM;
@@ -3072,7 +3310,9 @@ proc_cache_fail:
3072proc_vif_fail: 3310proc_vif_fail:
3073 ipmr_rules_exit(net); 3311 ipmr_rules_exit(net);
3074#endif 3312#endif
3075fail: 3313ipmr_rules_fail:
3314 ipmr_notifier_exit(net);
3315ipmr_notifier_fail:
3076 return err; 3316 return err;
3077} 3317}
3078 3318
@@ -3082,6 +3322,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
3082 remove_proc_entry("ip_mr_cache", net->proc_net); 3322 remove_proc_entry("ip_mr_cache", net->proc_net);
3083 remove_proc_entry("ip_mr_vif", net->proc_net); 3323 remove_proc_entry("ip_mr_vif", net->proc_net);
3084#endif 3324#endif
3325 ipmr_notifier_exit(net);
3085 ipmr_rules_exit(net); 3326 ipmr_rules_exit(net);
3086} 3327}
3087 3328
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e2770fd00be..f88221aebc9d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -634,6 +634,25 @@ static void get_counters(const struct xt_table_info *t,
634 } 634 }
635} 635}
636 636
637static void get_old_counters(const struct xt_table_info *t,
638 struct xt_counters counters[])
639{
640 struct arpt_entry *iter;
641 unsigned int cpu, i;
642
643 for_each_possible_cpu(cpu) {
644 i = 0;
645 xt_entry_foreach(iter, t->entries, t->size) {
646 struct xt_counters *tmp;
647
648 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
649 ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
650 ++i;
651 }
652 cond_resched();
653 }
654}
655
637static struct xt_counters *alloc_counters(const struct xt_table *table) 656static struct xt_counters *alloc_counters(const struct xt_table *table)
638{ 657{
639 unsigned int countersize; 658 unsigned int countersize;
@@ -910,8 +929,7 @@ static int __do_replace(struct net *net, const char *name,
910 (newinfo->number <= oldinfo->initial_entries)) 929 (newinfo->number <= oldinfo->initial_entries))
911 module_put(t->me); 930 module_put(t->me);
912 931
913 /* Get the old counters, and synchronize with replace */ 932 get_old_counters(oldinfo, counters);
914 get_counters(oldinfo, counters);
915 933
916 /* Decrease module usage counts and free resource */ 934 /* Decrease module usage counts and free resource */
917 loc_cpu_old_entry = oldinfo->entries; 935 loc_cpu_old_entry = oldinfo->entries;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 39286e543ee6..4cbe5e80f3bf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -781,6 +781,26 @@ get_counters(const struct xt_table_info *t,
781 } 781 }
782} 782}
783 783
784static void get_old_counters(const struct xt_table_info *t,
785 struct xt_counters counters[])
786{
787 struct ipt_entry *iter;
788 unsigned int cpu, i;
789
790 for_each_possible_cpu(cpu) {
791 i = 0;
792 xt_entry_foreach(iter, t->entries, t->size) {
793 const struct xt_counters *tmp;
794
795 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
796 ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
797 ++i; /* macro does multi eval of i */
798 }
799
800 cond_resched();
801 }
802}
803
784static struct xt_counters *alloc_counters(const struct xt_table *table) 804static struct xt_counters *alloc_counters(const struct xt_table *table)
785{ 805{
786 unsigned int countersize; 806 unsigned int countersize;
@@ -1070,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1070 (newinfo->number <= oldinfo->initial_entries)) 1090 (newinfo->number <= oldinfo->initial_entries))
1071 module_put(t->me); 1091 module_put(t->me);
1072 1092
1073 /* Get the old counters, and synchronize with replace */ 1093 get_old_counters(oldinfo, counters);
1074 get_counters(oldinfo, counters);
1075 1094
1076 /* Decrease module usage counts and free resource */ 1095 /* Decrease module usage counts and free resource */
1077 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) 1096 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fe374da4bc13..89af9d88ca21 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -344,7 +344,7 @@ static void ipv4_hooks_unregister(struct net *net)
344 mutex_unlock(&register_ipv4_hooks); 344 mutex_unlock(&register_ipv4_hooks);
345} 345}
346 346
347struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { 347const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
348 .l3proto = PF_INET, 348 .l3proto = PF_INET,
349 .pkt_to_tuple = ipv4_pkt_to_tuple, 349 .pkt_to_tuple = ipv4_pkt_to_tuple,
350 .invert_tuple = ipv4_invert_tuple, 350 .invert_tuple = ipv4_invert_tuple,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index a046c298413a..1849fedd9b81 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -81,7 +81,6 @@ static int icmp_packet(struct nf_conn *ct,
81 const struct sk_buff *skb, 81 const struct sk_buff *skb,
82 unsigned int dataoff, 82 unsigned int dataoff,
83 enum ip_conntrack_info ctinfo, 83 enum ip_conntrack_info ctinfo,
84 u_int8_t pf,
85 unsigned int *timeout) 84 unsigned int *timeout)
86{ 85{
87 /* Do not immediately delete the connection after the first 86 /* Do not immediately delete the connection after the first
@@ -165,6 +164,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
165 return NF_ACCEPT; 164 return NF_ACCEPT;
166} 165}
167 166
167static void icmp_error_log(const struct sk_buff *skb, struct net *net,
168 u8 pf, const char *msg)
169{
170 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
171}
172
168/* Small and modified version of icmp_rcv */ 173/* Small and modified version of icmp_rcv */
169static int 174static int
170icmp_error(struct net *net, struct nf_conn *tmpl, 175icmp_error(struct net *net, struct nf_conn *tmpl,
@@ -177,18 +182,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
177 /* Not enough header? */ 182 /* Not enough header? */
178 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); 183 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
179 if (icmph == NULL) { 184 if (icmph == NULL) {
180 if (LOG_INVALID(net, IPPROTO_ICMP)) 185 icmp_error_log(skb, net, pf, "short packet");
181 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
182 NULL, "nf_ct_icmp: short packet ");
183 return -NF_ACCEPT; 186 return -NF_ACCEPT;
184 } 187 }
185 188
186 /* See ip_conntrack_proto_tcp.c */ 189 /* See ip_conntrack_proto_tcp.c */
187 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 190 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
188 nf_ip_checksum(skb, hooknum, dataoff, 0)) { 191 nf_ip_checksum(skb, hooknum, dataoff, 0)) {
189 if (LOG_INVALID(net, IPPROTO_ICMP)) 192 icmp_error_log(skb, net, pf, "bad hw icmp checksum");
190 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
191 "nf_ct_icmp: bad HW ICMP checksum ");
192 return -NF_ACCEPT; 193 return -NF_ACCEPT;
193 } 194 }
194 195
@@ -199,9 +200,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
199 * discarded. 200 * discarded.
200 */ 201 */
201 if (icmph->type > NR_ICMP_TYPES) { 202 if (icmph->type > NR_ICMP_TYPES) {
202 if (LOG_INVALID(net, IPPROTO_ICMP)) 203 icmp_error_log(skb, net, pf, "invalid icmp type");
203 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
204 "nf_ct_icmp: invalid ICMP type ");
205 return -NF_ACCEPT; 204 return -NF_ACCEPT;
206 } 205 }
207 206
@@ -259,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
259 return 0; 258 return 0;
260} 259}
261 260
262static int icmp_nlattr_tuple_size(void) 261static unsigned int icmp_nlattr_tuple_size(void)
263{ 262{
264 return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); 263 static unsigned int size __read_mostly;
264
265 if (!size)
266 size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
267
268 return size;
265} 269}
266#endif 270#endif
267 271
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index a0f37b208268..0443ca4120b0 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -276,7 +276,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
276 else 276 else
277 return NF_ACCEPT; 277 return NF_ACCEPT;
278 } 278 }
279 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 279 /* Only ICMPs can be IP_CT_IS_REPLY: */
280 /* fall through */
280 case IP_CT_NEW: 281 case IP_CT_NEW:
281 /* Seen it before? This can happen for loopback, retrans, 282 /* Seen it before? This can happen for loopback, retrans,
282 * or local packets. 283 * or local packets.
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 127153f1ed8a..9f37c4727861 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -212,7 +212,6 @@ static const struct snmp_mib snmp4_net_list[] = {
212 SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), 212 SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
213 SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), 213 SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
214 SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), 214 SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
215 SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER),
216 SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER), 215 SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER),
217 SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER), 216 SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER),
218 SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER), 217 SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c0864562083b..43b69af242e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
651 struct fnhe_hash_bucket *hash; 651 struct fnhe_hash_bucket *hash;
652 struct fib_nh_exception *fnhe; 652 struct fib_nh_exception *fnhe;
653 struct rtable *rt; 653 struct rtable *rt;
654 u32 genid, hval;
654 unsigned int i; 655 unsigned int i;
655 int depth; 656 int depth;
656 u32 hval = fnhe_hashfun(daddr); 657
658 genid = fnhe_genid(dev_net(nh->nh_dev));
659 hval = fnhe_hashfun(daddr);
657 660
658 spin_lock_bh(&fnhe_lock); 661 spin_lock_bh(&fnhe_lock);
659 662
@@ -676,12 +679,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
676 } 679 }
677 680
678 if (fnhe) { 681 if (fnhe) {
682 if (fnhe->fnhe_genid != genid)
683 fnhe->fnhe_genid = genid;
679 if (gw) 684 if (gw)
680 fnhe->fnhe_gw = gw; 685 fnhe->fnhe_gw = gw;
681 if (pmtu) { 686 if (pmtu)
682 fnhe->fnhe_pmtu = pmtu; 687 fnhe->fnhe_pmtu = pmtu;
683 fnhe->fnhe_expires = max(1UL, expires); 688 fnhe->fnhe_expires = max(1UL, expires);
684 }
685 /* Update all cached dsts too */ 689 /* Update all cached dsts too */
686 rt = rcu_dereference(fnhe->fnhe_rth_input); 690 rt = rcu_dereference(fnhe->fnhe_rth_input);
687 if (rt) 691 if (rt)
@@ -700,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
700 fnhe->fnhe_next = hash->chain; 704 fnhe->fnhe_next = hash->chain;
701 rcu_assign_pointer(hash->chain, fnhe); 705 rcu_assign_pointer(hash->chain, fnhe);
702 } 706 }
703 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); 707 fnhe->fnhe_genid = genid;
704 fnhe->fnhe_daddr = daddr; 708 fnhe->fnhe_daddr = daddr;
705 fnhe->fnhe_gw = gw; 709 fnhe->fnhe_gw = gw;
706 fnhe->fnhe_pmtu = pmtu; 710 fnhe->fnhe_pmtu = pmtu;
@@ -1250,7 +1254,7 @@ static void set_class_tag(struct rtable *rt, u32 tag)
1250static unsigned int ipv4_default_advmss(const struct dst_entry *dst) 1254static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1251{ 1255{
1252 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); 1256 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
1253 unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size, 1257 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
1254 ip_rt_min_advmss); 1258 ip_rt_min_advmss);
1255 1259
1256 return min(advmss, IPV4_MAX_PMTU - header_size); 1260 return min(advmss, IPV4_MAX_PMTU - header_size);
@@ -3038,7 +3042,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3038 3042
3039int __init ip_rt_init(void) 3043int __init ip_rt_init(void)
3040{ 3044{
3041 int rc = 0;
3042 int cpu; 3045 int cpu;
3043 3046
3044 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); 3047 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
@@ -3095,7 +3098,7 @@ int __init ip_rt_init(void)
3095#endif 3098#endif
3096 register_pernet_subsys(&rt_genid_ops); 3099 register_pernet_subsys(&rt_genid_ops);
3097 register_pernet_subsys(&ipv4_inetpeer_ops); 3100 register_pernet_subsys(&ipv4_inetpeer_ops);
3098 return rc; 3101 return 0;
3099} 3102}
3100 3103
3101#ifdef CONFIG_SYSCTL 3104#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 77cf32a80952..fda37f2862c9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
385 /* Try to redo what tcp_v4_send_synack did. */ 385 /* Try to redo what tcp_v4_send_synack did. */
386 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); 386 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
387 387
388 tcp_select_initial_window(tcp_full_space(sk), req->mss, 388 tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
389 &req->rsk_rcv_wnd, &req->rsk_window_clamp, 389 &req->rsk_rcv_wnd, &req->rsk_window_clamp,
390 ireq->wscale_ok, &rcv_wscale, 390 ireq->wscale_ok, &rcv_wscale,
391 dst_metric(&rt->dst, RTAX_INITRWND)); 391 dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0989e739d098..93e172118a94 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,6 +26,7 @@
26#include <net/inet_frag.h> 26#include <net/inet_frag.h>
27#include <net/ping.h> 27#include <net/ping.h>
28#include <net/protocol.h> 28#include <net/protocol.h>
29#include <net/netevent.h>
29 30
30static int zero; 31static int zero;
31static int one = 1; 32static int one = 1;
@@ -200,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
200static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, 201static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
201 void __user *buffer, size_t *lenp, loff_t *ppos) 202 void __user *buffer, size_t *lenp, loff_t *ppos)
202{ 203{
204 struct net *net = container_of(ctl->data, struct net,
205 ipv4.tcp_congestion_control);
203 char val[TCP_CA_NAME_MAX]; 206 char val[TCP_CA_NAME_MAX];
204 struct ctl_table tbl = { 207 struct ctl_table tbl = {
205 .data = val, 208 .data = val,
@@ -207,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
207 }; 210 };
208 int ret; 211 int ret;
209 212
210 tcp_get_default_congestion_control(val); 213 tcp_get_default_congestion_control(net, val);
211 214
212 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 215 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
213 if (write && ret == 0) 216 if (write && ret == 0)
214 ret = tcp_set_default_congestion_control(val); 217 ret = tcp_set_default_congestion_control(net, val);
215 return ret; 218 return ret;
216} 219}
217 220
@@ -252,10 +255,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
252 return ret; 255 return ret;
253} 256}
254 257
255static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, 258static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
256 void __user *buffer, size_t *lenp, 259 void __user *buffer, size_t *lenp,
257 loff_t *ppos) 260 loff_t *ppos)
258{ 261{
262 struct net *net = container_of(table->data, struct net,
263 ipv4.sysctl_tcp_fastopen);
259 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; 264 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
260 struct tcp_fastopen_context *ctxt; 265 struct tcp_fastopen_context *ctxt;
261 int ret; 266 int ret;
@@ -266,7 +271,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
266 return -ENOMEM; 271 return -ENOMEM;
267 272
268 rcu_read_lock(); 273 rcu_read_lock();
269 ctxt = rcu_dereference(tcp_fastopen_ctx); 274 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
270 if (ctxt) 275 if (ctxt)
271 memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); 276 memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
272 else 277 else
@@ -283,12 +288,8 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
283 ret = -EINVAL; 288 ret = -EINVAL;
284 goto bad_key; 289 goto bad_key;
285 } 290 }
286 /* Generate a dummy secret but don't publish it. This 291 tcp_fastopen_reset_cipher(net, NULL, user_key,
287 * is needed so we don't regenerate a new key on the 292 TCP_FASTOPEN_KEY_LENGTH);
288 * first invocation of tcp_fastopen_cookie_gen
289 */
290 tcp_fastopen_init_key_once(false);
291 tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
292 } 293 }
293 294
294bad_key: 295bad_key:
@@ -359,11 +360,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
359 void __user *buffer, 360 void __user *buffer,
360 size_t *lenp, loff_t *ppos) 361 size_t *lenp, loff_t *ppos)
361{ 362{
363 struct net *net = container_of(table->data, struct net,
364 ipv4.sysctl_tcp_fastopen_blackhole_timeout);
362 int ret; 365 int ret;
363 366
364 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 367 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
365 if (write && ret == 0) 368 if (write && ret == 0)
366 tcp_fastopen_active_timeout_reset(); 369 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
367 370
368 return ret; 371 return ret;
369} 372}
@@ -386,15 +389,25 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
386 return ret; 389 return ret;
387} 390}
388 391
392#ifdef CONFIG_IP_ROUTE_MULTIPATH
393static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
394 void __user *buffer, size_t *lenp,
395 loff_t *ppos)
396{
397 struct net *net = container_of(table->data, struct net,
398 ipv4.sysctl_fib_multipath_hash_policy);
399 int ret;
400
401 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
402 if (write && ret == 0)
403 call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
404
405 return ret;
406}
407#endif
408
389static struct ctl_table ipv4_table[] = { 409static struct ctl_table ipv4_table[] = {
390 { 410 {
391 .procname = "tcp_retrans_collapse",
392 .data = &sysctl_tcp_retrans_collapse,
393 .maxlen = sizeof(int),
394 .mode = 0644,
395 .proc_handler = proc_dointvec
396 },
397 {
398 .procname = "tcp_max_orphans", 411 .procname = "tcp_max_orphans",
399 .data = &sysctl_tcp_max_orphans, 412 .data = &sysctl_tcp_max_orphans,
400 .maxlen = sizeof(int), 413 .maxlen = sizeof(int),
@@ -402,48 +415,6 @@ static struct ctl_table ipv4_table[] = {
402 .proc_handler = proc_dointvec 415 .proc_handler = proc_dointvec
403 }, 416 },
404 { 417 {
405 .procname = "tcp_fastopen",
406 .data = &sysctl_tcp_fastopen,
407 .maxlen = sizeof(int),
408 .mode = 0644,
409 .proc_handler = proc_dointvec,
410 },
411 {
412 .procname = "tcp_fastopen_key",
413 .mode = 0600,
414 .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
415 .proc_handler = proc_tcp_fastopen_key,
416 },
417 {
418 .procname = "tcp_fastopen_blackhole_timeout_sec",
419 .data = &sysctl_tcp_fastopen_blackhole_timeout,
420 .maxlen = sizeof(int),
421 .mode = 0644,
422 .proc_handler = proc_tfo_blackhole_detect_timeout,
423 .extra1 = &zero,
424 },
425 {
426 .procname = "tcp_abort_on_overflow",
427 .data = &sysctl_tcp_abort_on_overflow,
428 .maxlen = sizeof(int),
429 .mode = 0644,
430 .proc_handler = proc_dointvec
431 },
432 {
433 .procname = "tcp_stdurg",
434 .data = &sysctl_tcp_stdurg,
435 .maxlen = sizeof(int),
436 .mode = 0644,
437 .proc_handler = proc_dointvec
438 },
439 {
440 .procname = "tcp_rfc1337",
441 .data = &sysctl_tcp_rfc1337,
442 .maxlen = sizeof(int),
443 .mode = 0644,
444 .proc_handler = proc_dointvec
445 },
446 {
447 .procname = "inet_peer_threshold", 418 .procname = "inet_peer_threshold",
448 .data = &inet_peer_threshold, 419 .data = &inet_peer_threshold,
449 .maxlen = sizeof(int), 420 .maxlen = sizeof(int),
@@ -465,34 +436,6 @@ static struct ctl_table ipv4_table[] = {
465 .proc_handler = proc_dointvec_jiffies, 436 .proc_handler = proc_dointvec_jiffies,
466 }, 437 },
467 { 438 {
468 .procname = "tcp_fack",
469 .data = &sysctl_tcp_fack,
470 .maxlen = sizeof(int),
471 .mode = 0644,
472 .proc_handler = proc_dointvec
473 },
474 {
475 .procname = "tcp_recovery",
476 .data = &sysctl_tcp_recovery,
477 .maxlen = sizeof(int),
478 .mode = 0644,
479 .proc_handler = proc_dointvec,
480 },
481 {
482 .procname = "tcp_max_reordering",
483 .data = &sysctl_tcp_max_reordering,
484 .maxlen = sizeof(int),
485 .mode = 0644,
486 .proc_handler = proc_dointvec
487 },
488 {
489 .procname = "tcp_dsack",
490 .data = &sysctl_tcp_dsack,
491 .maxlen = sizeof(int),
492 .mode = 0644,
493 .proc_handler = proc_dointvec
494 },
495 {
496 .procname = "tcp_mem", 439 .procname = "tcp_mem",
497 .maxlen = sizeof(sysctl_tcp_mem), 440 .maxlen = sizeof(sysctl_tcp_mem),
498 .data = &sysctl_tcp_mem, 441 .data = &sysctl_tcp_mem,
@@ -500,113 +443,12 @@ static struct ctl_table ipv4_table[] = {
500 .proc_handler = proc_doulongvec_minmax, 443 .proc_handler = proc_doulongvec_minmax,
501 }, 444 },
502 { 445 {
503 .procname = "tcp_wmem",
504 .data = &sysctl_tcp_wmem,
505 .maxlen = sizeof(sysctl_tcp_wmem),
506 .mode = 0644,
507 .proc_handler = proc_dointvec_minmax,
508 .extra1 = &one,
509 },
510 {
511 .procname = "tcp_rmem",
512 .data = &sysctl_tcp_rmem,
513 .maxlen = sizeof(sysctl_tcp_rmem),
514 .mode = 0644,
515 .proc_handler = proc_dointvec_minmax,
516 .extra1 = &one,
517 },
518 {
519 .procname = "tcp_app_win",
520 .data = &sysctl_tcp_app_win,
521 .maxlen = sizeof(int),
522 .mode = 0644,
523 .proc_handler = proc_dointvec
524 },
525 {
526 .procname = "tcp_adv_win_scale",
527 .data = &sysctl_tcp_adv_win_scale,
528 .maxlen = sizeof(int),
529 .mode = 0644,
530 .proc_handler = proc_dointvec_minmax,
531 .extra1 = &tcp_adv_win_scale_min,
532 .extra2 = &tcp_adv_win_scale_max,
533 },
534 {
535 .procname = "tcp_frto",
536 .data = &sysctl_tcp_frto,
537 .maxlen = sizeof(int),
538 .mode = 0644,
539 .proc_handler = proc_dointvec
540 },
541 {
542 .procname = "tcp_min_rtt_wlen",
543 .data = &sysctl_tcp_min_rtt_wlen,
544 .maxlen = sizeof(int),
545 .mode = 0644,
546 .proc_handler = proc_dointvec
547 },
548 {
549 .procname = "tcp_low_latency", 446 .procname = "tcp_low_latency",
550 .data = &sysctl_tcp_low_latency, 447 .data = &sysctl_tcp_low_latency,
551 .maxlen = sizeof(int), 448 .maxlen = sizeof(int),
552 .mode = 0644, 449 .mode = 0644,
553 .proc_handler = proc_dointvec 450 .proc_handler = proc_dointvec
554 }, 451 },
555 {
556 .procname = "tcp_no_metrics_save",
557 .data = &sysctl_tcp_nometrics_save,
558 .maxlen = sizeof(int),
559 .mode = 0644,
560 .proc_handler = proc_dointvec,
561 },
562 {
563 .procname = "tcp_moderate_rcvbuf",
564 .data = &sysctl_tcp_moderate_rcvbuf,
565 .maxlen = sizeof(int),
566 .mode = 0644,
567 .proc_handler = proc_dointvec,
568 },
569 {
570 .procname = "tcp_tso_win_divisor",
571 .data = &sysctl_tcp_tso_win_divisor,
572 .maxlen = sizeof(int),
573 .mode = 0644,
574 .proc_handler = proc_dointvec,
575 },
576 {
577 .procname = "tcp_congestion_control",
578 .mode = 0644,
579 .maxlen = TCP_CA_NAME_MAX,
580 .proc_handler = proc_tcp_congestion_control,
581 },
582 {
583 .procname = "tcp_workaround_signed_windows",
584 .data = &sysctl_tcp_workaround_signed_windows,
585 .maxlen = sizeof(int),
586 .mode = 0644,
587 .proc_handler = proc_dointvec
588 },
589 {
590 .procname = "tcp_limit_output_bytes",
591 .data = &sysctl_tcp_limit_output_bytes,
592 .maxlen = sizeof(int),
593 .mode = 0644,
594 .proc_handler = proc_dointvec
595 },
596 {
597 .procname = "tcp_challenge_ack_limit",
598 .data = &sysctl_tcp_challenge_ack_limit,
599 .maxlen = sizeof(int),
600 .mode = 0644,
601 .proc_handler = proc_dointvec
602 },
603 {
604 .procname = "tcp_slow_start_after_idle",
605 .data = &sysctl_tcp_slow_start_after_idle,
606 .maxlen = sizeof(int),
607 .mode = 0644,
608 .proc_handler = proc_dointvec
609 },
610#ifdef CONFIG_NETLABEL 452#ifdef CONFIG_NETLABEL
611 { 453 {
612 .procname = "cipso_cache_enable", 454 .procname = "cipso_cache_enable",
@@ -650,65 +492,6 @@ static struct ctl_table ipv4_table[] = {
650 .proc_handler = proc_allowed_congestion_control, 492 .proc_handler = proc_allowed_congestion_control,
651 }, 493 },
652 { 494 {
653 .procname = "tcp_thin_linear_timeouts",
654 .data = &sysctl_tcp_thin_linear_timeouts,
655 .maxlen = sizeof(int),
656 .mode = 0644,
657 .proc_handler = proc_dointvec
658 },
659 {
660 .procname = "tcp_early_retrans",
661 .data = &sysctl_tcp_early_retrans,
662 .maxlen = sizeof(int),
663 .mode = 0644,
664 .proc_handler = proc_dointvec_minmax,
665 .extra1 = &zero,
666 .extra2 = &four,
667 },
668 {
669 .procname = "tcp_min_tso_segs",
670 .data = &sysctl_tcp_min_tso_segs,
671 .maxlen = sizeof(int),
672 .mode = 0644,
673 .proc_handler = proc_dointvec_minmax,
674 .extra1 = &one,
675 .extra2 = &gso_max_segs,
676 },
677 {
678 .procname = "tcp_pacing_ss_ratio",
679 .data = &sysctl_tcp_pacing_ss_ratio,
680 .maxlen = sizeof(int),
681 .mode = 0644,
682 .proc_handler = proc_dointvec_minmax,
683 .extra1 = &zero,
684 .extra2 = &thousand,
685 },
686 {
687 .procname = "tcp_pacing_ca_ratio",
688 .data = &sysctl_tcp_pacing_ca_ratio,
689 .maxlen = sizeof(int),
690 .mode = 0644,
691 .proc_handler = proc_dointvec_minmax,
692 .extra1 = &zero,
693 .extra2 = &thousand,
694 },
695 {
696 .procname = "tcp_autocorking",
697 .data = &sysctl_tcp_autocorking,
698 .maxlen = sizeof(int),
699 .mode = 0644,
700 .proc_handler = proc_dointvec_minmax,
701 .extra1 = &zero,
702 .extra2 = &one,
703 },
704 {
705 .procname = "tcp_invalid_ratelimit",
706 .data = &sysctl_tcp_invalid_ratelimit,
707 .maxlen = sizeof(int),
708 .mode = 0644,
709 .proc_handler = proc_dointvec_ms_jiffies,
710 },
711 {
712 .procname = "tcp_available_ulp", 495 .procname = "tcp_available_ulp",
713 .maxlen = TCP_ULP_BUF_MAX, 496 .maxlen = TCP_ULP_BUF_MAX,
714 .mode = 0444, 497 .mode = 0444,
@@ -977,6 +760,13 @@ static struct ctl_table ipv4_net_table[] = {
977 }, 760 },
978#endif 761#endif
979 { 762 {
763 .procname = "tcp_congestion_control",
764 .data = &init_net.ipv4.tcp_congestion_control,
765 .mode = 0644,
766 .maxlen = TCP_CA_NAME_MAX,
767 .proc_handler = proc_tcp_congestion_control,
768 },
769 {
980 .procname = "tcp_keepalive_time", 770 .procname = "tcp_keepalive_time",
981 .data = &init_net.ipv4.sysctl_tcp_keepalive_time, 771 .data = &init_net.ipv4.sysctl_tcp_keepalive_time,
982 .maxlen = sizeof(int), 772 .maxlen = sizeof(int),
@@ -1086,6 +876,28 @@ static struct ctl_table ipv4_net_table[] = {
1086 .mode = 0644, 876 .mode = 0644,
1087 .proc_handler = proc_dointvec 877 .proc_handler = proc_dointvec
1088 }, 878 },
879 {
880 .procname = "tcp_fastopen",
881 .data = &init_net.ipv4.sysctl_tcp_fastopen,
882 .maxlen = sizeof(int),
883 .mode = 0644,
884 .proc_handler = proc_dointvec,
885 },
886 {
887 .procname = "tcp_fastopen_key",
888 .mode = 0600,
889 .data = &init_net.ipv4.sysctl_tcp_fastopen,
890 .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
891 .proc_handler = proc_tcp_fastopen_key,
892 },
893 {
894 .procname = "tcp_fastopen_blackhole_timeout_sec",
895 .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
896 .maxlen = sizeof(int),
897 .mode = 0644,
898 .proc_handler = proc_tfo_blackhole_detect_timeout,
899 .extra1 = &zero,
900 },
1089#ifdef CONFIG_IP_ROUTE_MULTIPATH 901#ifdef CONFIG_IP_ROUTE_MULTIPATH
1090 { 902 {
1091 .procname = "fib_multipath_use_neigh", 903 .procname = "fib_multipath_use_neigh",
@@ -1101,7 +913,7 @@ static struct ctl_table ipv4_net_table[] = {
1101 .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, 913 .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
1102 .maxlen = sizeof(int), 914 .maxlen = sizeof(int),
1103 .mode = 0644, 915 .mode = 0644,
1104 .proc_handler = proc_dointvec_minmax, 916 .proc_handler = proc_fib_multipath_hash_policy,
1105 .extra1 = &zero, 917 .extra1 = &zero,
1106 .extra2 = &one, 918 .extra2 = &one,
1107 }, 919 },
@@ -1145,6 +957,216 @@ static struct ctl_table ipv4_net_table[] = {
1145 .mode = 0644, 957 .mode = 0644,
1146 .proc_handler = proc_dointvec 958 .proc_handler = proc_dointvec
1147 }, 959 },
960 {
961 .procname = "tcp_early_retrans",
962 .data = &init_net.ipv4.sysctl_tcp_early_retrans,
963 .maxlen = sizeof(int),
964 .mode = 0644,
965 .proc_handler = proc_dointvec_minmax,
966 .extra1 = &zero,
967 .extra2 = &four,
968 },
969 {
970 .procname = "tcp_recovery",
971 .data = &init_net.ipv4.sysctl_tcp_recovery,
972 .maxlen = sizeof(int),
973 .mode = 0644,
974 .proc_handler = proc_dointvec,
975 },
976 {
977 .procname = "tcp_thin_linear_timeouts",
978 .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
979 .maxlen = sizeof(int),
980 .mode = 0644,
981 .proc_handler = proc_dointvec
982 },
983 {
984 .procname = "tcp_slow_start_after_idle",
985 .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
986 .maxlen = sizeof(int),
987 .mode = 0644,
988 .proc_handler = proc_dointvec
989 },
990 {
991 .procname = "tcp_retrans_collapse",
992 .data = &init_net.ipv4.sysctl_tcp_retrans_collapse,
993 .maxlen = sizeof(int),
994 .mode = 0644,
995 .proc_handler = proc_dointvec
996 },
997 {
998 .procname = "tcp_stdurg",
999 .data = &init_net.ipv4.sysctl_tcp_stdurg,
1000 .maxlen = sizeof(int),
1001 .mode = 0644,
1002 .proc_handler = proc_dointvec
1003 },
1004 {
1005 .procname = "tcp_rfc1337",
1006 .data = &init_net.ipv4.sysctl_tcp_rfc1337,
1007 .maxlen = sizeof(int),
1008 .mode = 0644,
1009 .proc_handler = proc_dointvec
1010 },
1011 {
1012 .procname = "tcp_abort_on_overflow",
1013 .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow,
1014 .maxlen = sizeof(int),
1015 .mode = 0644,
1016 .proc_handler = proc_dointvec
1017 },
1018 {
1019 .procname = "tcp_fack",
1020 .data = &init_net.ipv4.sysctl_tcp_fack,
1021 .maxlen = sizeof(int),
1022 .mode = 0644,
1023 .proc_handler = proc_dointvec
1024 },
1025 {
1026 .procname = "tcp_max_reordering",
1027 .data = &init_net.ipv4.sysctl_tcp_max_reordering,
1028 .maxlen = sizeof(int),
1029 .mode = 0644,
1030 .proc_handler = proc_dointvec
1031 },
1032 {
1033 .procname = "tcp_dsack",
1034 .data = &init_net.ipv4.sysctl_tcp_dsack,
1035 .maxlen = sizeof(int),
1036 .mode = 0644,
1037 .proc_handler = proc_dointvec
1038 },
1039 {
1040 .procname = "tcp_app_win",
1041 .data = &init_net.ipv4.sysctl_tcp_app_win,
1042 .maxlen = sizeof(int),
1043 .mode = 0644,
1044 .proc_handler = proc_dointvec
1045 },
1046 {
1047 .procname = "tcp_adv_win_scale",
1048 .data = &init_net.ipv4.sysctl_tcp_adv_win_scale,
1049 .maxlen = sizeof(int),
1050 .mode = 0644,
1051 .proc_handler = proc_dointvec_minmax,
1052 .extra1 = &tcp_adv_win_scale_min,
1053 .extra2 = &tcp_adv_win_scale_max,
1054 },
1055 {
1056 .procname = "tcp_frto",
1057 .data = &init_net.ipv4.sysctl_tcp_frto,
1058 .maxlen = sizeof(int),
1059 .mode = 0644,
1060 .proc_handler = proc_dointvec
1061 },
1062 {
1063 .procname = "tcp_no_metrics_save",
1064 .data = &init_net.ipv4.sysctl_tcp_nometrics_save,
1065 .maxlen = sizeof(int),
1066 .mode = 0644,
1067 .proc_handler = proc_dointvec,
1068 },
1069 {
1070 .procname = "tcp_moderate_rcvbuf",
1071 .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
1072 .maxlen = sizeof(int),
1073 .mode = 0644,
1074 .proc_handler = proc_dointvec,
1075 },
1076 {
1077 .procname = "tcp_tso_win_divisor",
1078 .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
1079 .maxlen = sizeof(int),
1080 .mode = 0644,
1081 .proc_handler = proc_dointvec,
1082 },
1083 {
1084 .procname = "tcp_workaround_signed_windows",
1085 .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
1086 .maxlen = sizeof(int),
1087 .mode = 0644,
1088 .proc_handler = proc_dointvec
1089 },
1090 {
1091 .procname = "tcp_limit_output_bytes",
1092 .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes,
1093 .maxlen = sizeof(int),
1094 .mode = 0644,
1095 .proc_handler = proc_dointvec
1096 },
1097 {
1098 .procname = "tcp_challenge_ack_limit",
1099 .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
1100 .maxlen = sizeof(int),
1101 .mode = 0644,
1102 .proc_handler = proc_dointvec
1103 },
1104 {
1105 .procname = "tcp_min_tso_segs",
1106 .data = &init_net.ipv4.sysctl_tcp_min_tso_segs,
1107 .maxlen = sizeof(int),
1108 .mode = 0644,
1109 .proc_handler = proc_dointvec_minmax,
1110 .extra1 = &one,
1111 .extra2 = &gso_max_segs,
1112 },
1113 {
1114 .procname = "tcp_min_rtt_wlen",
1115 .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
1116 .maxlen = sizeof(int),
1117 .mode = 0644,
1118 .proc_handler = proc_dointvec
1119 },
1120 {
1121 .procname = "tcp_autocorking",
1122 .data = &init_net.ipv4.sysctl_tcp_autocorking,
1123 .maxlen = sizeof(int),
1124 .mode = 0644,
1125 .proc_handler = proc_dointvec_minmax,
1126 .extra1 = &zero,
1127 .extra2 = &one,
1128 },
1129 {
1130 .procname = "tcp_invalid_ratelimit",
1131 .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit,
1132 .maxlen = sizeof(int),
1133 .mode = 0644,
1134 .proc_handler = proc_dointvec_ms_jiffies,
1135 },
1136 {
1137 .procname = "tcp_pacing_ss_ratio",
1138 .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio,
1139 .maxlen = sizeof(int),
1140 .mode = 0644,
1141 .proc_handler = proc_dointvec_minmax,
1142 .extra1 = &zero,
1143 .extra2 = &thousand,
1144 },
1145 {
1146 .procname = "tcp_pacing_ca_ratio",
1147 .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio,
1148 .maxlen = sizeof(int),
1149 .mode = 0644,
1150 .proc_handler = proc_dointvec_minmax,
1151 .extra1 = &zero,
1152 .extra2 = &thousand,
1153 },
1154 {
1155 .procname = "tcp_wmem",
1156 .data = &init_net.ipv4.sysctl_tcp_wmem,
1157 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
1158 .mode = 0644,
1159 .proc_handler = proc_dointvec_minmax,
1160 .extra1 = &one,
1161 },
1162 {
1163 .procname = "tcp_rmem",
1164 .data = &init_net.ipv4.sysctl_tcp_rmem,
1165 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
1166 .mode = 0644,
1167 .proc_handler = proc_dointvec_minmax,
1168 .extra1 = &one,
1169 },
1148 { } 1170 { }
1149}; 1171};
1150 1172
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402720ab..bf97317e6c97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -270,6 +270,7 @@
270#include <linux/time.h> 270#include <linux/time.h>
271#include <linux/slab.h> 271#include <linux/slab.h>
272#include <linux/errqueue.h> 272#include <linux/errqueue.h>
273#include <linux/static_key.h>
273 274
274#include <net/icmp.h> 275#include <net/icmp.h>
275#include <net/inet_common.h> 276#include <net/inet_common.h>
@@ -282,24 +283,22 @@
282#include <asm/ioctls.h> 283#include <asm/ioctls.h>
283#include <net/busy_poll.h> 284#include <net/busy_poll.h>
284 285
285int sysctl_tcp_min_tso_segs __read_mostly = 2; 286#include <trace/events/tcp.h>
286
287int sysctl_tcp_autocorking __read_mostly = 1;
288 287
289struct percpu_counter tcp_orphan_count; 288struct percpu_counter tcp_orphan_count;
290EXPORT_SYMBOL_GPL(tcp_orphan_count); 289EXPORT_SYMBOL_GPL(tcp_orphan_count);
291 290
292long sysctl_tcp_mem[3] __read_mostly; 291long sysctl_tcp_mem[3] __read_mostly;
293int sysctl_tcp_wmem[3] __read_mostly;
294int sysctl_tcp_rmem[3] __read_mostly;
295
296EXPORT_SYMBOL(sysctl_tcp_mem); 292EXPORT_SYMBOL(sysctl_tcp_mem);
297EXPORT_SYMBOL(sysctl_tcp_rmem);
298EXPORT_SYMBOL(sysctl_tcp_wmem);
299 293
300atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ 294atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
301EXPORT_SYMBOL(tcp_memory_allocated); 295EXPORT_SYMBOL(tcp_memory_allocated);
302 296
297#if IS_ENABLED(CONFIG_SMC)
298DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
299EXPORT_SYMBOL(tcp_have_smc);
300#endif
301
303/* 302/*
304 * Current number of TCP sockets. 303 * Current number of TCP sockets.
305 */ 304 */
@@ -413,8 +412,10 @@ void tcp_init_sock(struct sock *sk)
413 struct tcp_sock *tp = tcp_sk(sk); 412 struct tcp_sock *tp = tcp_sk(sk);
414 413
415 tp->out_of_order_queue = RB_ROOT; 414 tp->out_of_order_queue = RB_ROOT;
415 sk->tcp_rtx_queue = RB_ROOT;
416 tcp_init_xmit_timers(sk); 416 tcp_init_xmit_timers(sk);
417 INIT_LIST_HEAD(&tp->tsq_node); 417 INIT_LIST_HEAD(&tp->tsq_node);
418 INIT_LIST_HEAD(&tp->tsorted_sent_queue);
418 419
419 icsk->icsk_rto = TCP_TIMEOUT_INIT; 420 icsk->icsk_rto = TCP_TIMEOUT_INIT;
420 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); 421 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -441,6 +442,7 @@ void tcp_init_sock(struct sock *sk)
441 tcp_assign_congestion_control(sk); 442 tcp_assign_congestion_control(sk);
442 443
443 tp->tsoffset = 0; 444 tp->tsoffset = 0;
445 tp->rack.reo_wnd_steps = 1;
444 446
445 sk->sk_state = TCP_CLOSE; 447 sk->sk_state = TCP_CLOSE;
446 448
@@ -449,15 +451,29 @@ void tcp_init_sock(struct sock *sk)
449 451
450 icsk->icsk_sync_mss = tcp_sync_mss; 452 icsk->icsk_sync_mss = tcp_sync_mss;
451 453
452 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 454 sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
453 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 455 sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
454 456
455 sk_sockets_allocated_inc(sk); 457 sk_sockets_allocated_inc(sk);
456} 458}
457EXPORT_SYMBOL(tcp_init_sock); 459EXPORT_SYMBOL(tcp_init_sock);
458 460
459static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) 461void tcp_init_transfer(struct sock *sk, int bpf_op)
460{ 462{
463 struct inet_connection_sock *icsk = inet_csk(sk);
464
465 tcp_mtup_init(sk);
466 icsk->icsk_af_ops->rebuild_header(sk);
467 tcp_init_metrics(sk);
468 tcp_call_bpf(sk, bpf_op);
469 tcp_init_congestion_control(sk);
470 tcp_init_buffer_space(sk);
471}
472
473static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
474{
475 struct sk_buff *skb = tcp_write_queue_tail(sk);
476
461 if (tsflags && skb) { 477 if (tsflags && skb) {
462 struct skb_shared_info *shinfo = skb_shinfo(skb); 478 struct skb_shared_info *shinfo = skb_shinfo(skb);
463 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 479 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -675,7 +691,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
675 int size_goal) 691 int size_goal)
676{ 692{
677 return skb->len < size_goal && 693 return skb->len < size_goal &&
678 sysctl_tcp_autocorking && 694 sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
679 skb != tcp_write_queue_head(sk) && 695 skb != tcp_write_queue_head(sk) &&
680 refcount_read(&sk->sk_wmem_alloc) > skb->truesize; 696 refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
681} 697}
@@ -686,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
686 struct tcp_sock *tp = tcp_sk(sk); 702 struct tcp_sock *tp = tcp_sk(sk);
687 struct sk_buff *skb; 703 struct sk_buff *skb;
688 704
689 if (!tcp_send_head(sk))
690 return;
691
692 skb = tcp_write_queue_tail(sk); 705 skb = tcp_write_queue_tail(sk);
706 if (!skb)
707 return;
693 if (!(flags & MSG_MORE) || forced_push(tp)) 708 if (!(flags & MSG_MORE) || forced_push(tp))
694 tcp_mark_push(tp, skb); 709 tcp_mark_push(tp, skb);
695 710
@@ -869,6 +884,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
869 * available to the caller, no more, no less. 884 * available to the caller, no more, no less.
870 */ 885 */
871 skb->reserved_tailroom = skb->end - skb->tail - size; 886 skb->reserved_tailroom = skb->end - skb->tail - size;
887 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
872 return skb; 888 return skb;
873 } 889 }
874 __kfree_skb(skb); 890 __kfree_skb(skb);
@@ -948,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
948 int copy, i; 964 int copy, i;
949 bool can_coalesce; 965 bool can_coalesce;
950 966
951 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 || 967 if (!skb || (copy = size_goal - skb->len) <= 0 ||
952 !tcp_skb_can_collapse_to(skb)) { 968 !tcp_skb_can_collapse_to(skb)) {
953new_segment: 969new_segment:
954 if (!sk_stream_memory_free(sk)) 970 if (!sk_stream_memory_free(sk))
955 goto wait_for_sndbuf; 971 goto wait_for_sndbuf;
956 972
957 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, 973 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
958 skb_queue_empty(&sk->sk_write_queue)); 974 tcp_rtx_and_write_queues_empty(sk));
959 if (!skb) 975 if (!skb)
960 goto wait_for_memory; 976 goto wait_for_memory;
961 977
@@ -1027,7 +1043,7 @@ wait_for_memory:
1027 1043
1028out: 1044out:
1029 if (copied) { 1045 if (copied) {
1030 tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk)); 1046 tcp_tx_timestamp(sk, sk->sk_tsflags);
1031 if (!(flags & MSG_SENDPAGE_NOTLAST)) 1047 if (!(flags & MSG_SENDPAGE_NOTLAST))
1032 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); 1048 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
1033 } 1049 }
@@ -1126,7 +1142,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1126 struct sockaddr *uaddr = msg->msg_name; 1142 struct sockaddr *uaddr = msg->msg_name;
1127 int err, flags; 1143 int err, flags;
1128 1144
1129 if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || 1145 if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
1130 (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && 1146 (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
1131 uaddr->sa_family == AF_UNSPEC)) 1147 uaddr->sa_family == AF_UNSPEC))
1132 return -EOPNOTSUPP; 1148 return -EOPNOTSUPP;
@@ -1183,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
1183 goto out_err; 1199 goto out_err;
1184 } 1200 }
1185 1201
1186 skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL; 1202 skb = tcp_write_queue_tail(sk);
1187 uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); 1203 uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
1188 if (!uarg) { 1204 if (!uarg) {
1189 err = -ENOBUFS; 1205 err = -ENOBUFS;
@@ -1259,7 +1275,7 @@ restart:
1259 int max = size_goal; 1275 int max = size_goal;
1260 1276
1261 skb = tcp_write_queue_tail(sk); 1277 skb = tcp_write_queue_tail(sk);
1262 if (tcp_send_head(sk)) { 1278 if (skb) {
1263 if (skb->ip_summed == CHECKSUM_NONE) 1279 if (skb->ip_summed == CHECKSUM_NONE)
1264 max = mss_now; 1280 max = mss_now;
1265 copy = max - skb->len; 1281 copy = max - skb->len;
@@ -1279,7 +1295,7 @@ new_segment:
1279 process_backlog = false; 1295 process_backlog = false;
1280 goto restart; 1296 goto restart;
1281 } 1297 }
1282 first_skb = skb_queue_empty(&sk->sk_write_queue); 1298 first_skb = tcp_rtx_and_write_queues_empty(sk);
1283 skb = sk_stream_alloc_skb(sk, 1299 skb = sk_stream_alloc_skb(sk,
1284 select_size(sk, sg, first_skb), 1300 select_size(sk, sg, first_skb),
1285 sk->sk_allocation, 1301 sk->sk_allocation,
@@ -1404,7 +1420,7 @@ wait_for_memory:
1404 1420
1405out: 1421out:
1406 if (copied) { 1422 if (copied) {
1407 tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk)); 1423 tcp_tx_timestamp(sk, sockc.tsflags);
1408 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); 1424 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
1409 } 1425 }
1410out_nopush: 1426out_nopush:
@@ -1505,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
1505 1521
1506 /* XXX -- need to support SO_PEEK_OFF */ 1522 /* XXX -- need to support SO_PEEK_OFF */
1507 1523
1524 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
1525 err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
1526 if (err)
1527 return err;
1528 copied += skb->len;
1529 }
1530
1508 skb_queue_walk(&sk->sk_write_queue, skb) { 1531 skb_queue_walk(&sk->sk_write_queue, skb) {
1509 err = skb_copy_datagram_msg(skb, 0, msg, skb->len); 1532 err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
1510 if (err) 1533 if (err)
@@ -2017,6 +2040,8 @@ void tcp_set_state(struct sock *sk, int state)
2017{ 2040{
2018 int oldstate = sk->sk_state; 2041 int oldstate = sk->sk_state;
2019 2042
2043 trace_tcp_set_state(sk, oldstate, state);
2044
2020 switch (state) { 2045 switch (state) {
2021 case TCP_ESTABLISHED: 2046 case TCP_ESTABLISHED:
2022 if (oldstate != TCP_ESTABLISHED) 2047 if (oldstate != TCP_ESTABLISHED)
@@ -2304,6 +2329,37 @@ static inline bool tcp_need_reset(int state)
2304 TCPF_FIN_WAIT2 | TCPF_SYN_RECV); 2329 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2305} 2330}
2306 2331
2332static void tcp_rtx_queue_purge(struct sock *sk)
2333{
2334 struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
2335
2336 while (p) {
2337 struct sk_buff *skb = rb_to_skb(p);
2338
2339 p = rb_next(p);
2340 /* Since we are deleting whole queue, no need to
2341 * list_del(&skb->tcp_tsorted_anchor)
2342 */
2343 tcp_rtx_queue_unlink(skb, sk);
2344 sk_wmem_free_skb(sk, skb);
2345 }
2346}
2347
2348void tcp_write_queue_purge(struct sock *sk)
2349{
2350 struct sk_buff *skb;
2351
2352 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
2353 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
2354 tcp_skb_tsorted_anchor_cleanup(skb);
2355 sk_wmem_free_skb(sk, skb);
2356 }
2357 tcp_rtx_queue_purge(sk);
2358 INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
2359 sk_mem_reclaim(sk);
2360 tcp_clear_all_retrans_hints(tcp_sk(sk));
2361}
2362
2307int tcp_disconnect(struct sock *sk, int flags) 2363int tcp_disconnect(struct sock *sk, int flags)
2308{ 2364{
2309 struct inet_sock *inet = inet_sk(sk); 2365 struct inet_sock *inet = inet_sk(sk);
@@ -2362,7 +2418,6 @@ int tcp_disconnect(struct sock *sk, int flags)
2362 * issue in __tcp_select_window() 2418 * issue in __tcp_select_window()
2363 */ 2419 */
2364 icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; 2420 icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
2365 tcp_init_send_head(sk);
2366 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); 2421 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2367 __sk_dst_reset(sk); 2422 __sk_dst_reset(sk);
2368 dst_release(sk->sk_rx_dst); 2423 dst_release(sk->sk_rx_dst);
@@ -2454,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk,
2454 return -EINVAL; 2509 return -EINVAL;
2455 2510
2456 tp->rx_opt.sack_ok |= TCP_SACK_SEEN; 2511 tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
2457 if (sysctl_tcp_fack)
2458 tcp_enable_fack(tp);
2459 break; 2512 break;
2460 case TCPOPT_TIMESTAMP: 2513 case TCPOPT_TIMESTAMP:
2461 if (opt.opt_val != 0) 2514 if (opt.opt_val != 0)
@@ -2518,6 +2571,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2518 release_sock(sk); 2571 release_sock(sk);
2519 return err; 2572 return err;
2520 } 2573 }
2574 case TCP_FASTOPEN_KEY: {
2575 __u8 key[TCP_FASTOPEN_KEY_LENGTH];
2576
2577 if (optlen != sizeof(key))
2578 return -EINVAL;
2579
2580 if (copy_from_user(key, optval, optlen))
2581 return -EFAULT;
2582
2583 return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
2584 }
2521 default: 2585 default:
2522 /* fallthru */ 2586 /* fallthru */
2523 break; 2587 break;
@@ -2749,7 +2813,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2749 case TCP_FASTOPEN: 2813 case TCP_FASTOPEN:
2750 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | 2814 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
2751 TCPF_LISTEN))) { 2815 TCPF_LISTEN))) {
2752 tcp_fastopen_init_key_once(true); 2816 tcp_fastopen_init_key_once(net);
2753 2817
2754 fastopen_queue_tune(sk, val); 2818 fastopen_queue_tune(sk, val);
2755 } else { 2819 } else {
@@ -2759,7 +2823,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2759 case TCP_FASTOPEN_CONNECT: 2823 case TCP_FASTOPEN_CONNECT:
2760 if (val > 1 || val < 0) { 2824 if (val > 1 || val < 0) {
2761 err = -EINVAL; 2825 err = -EINVAL;
2762 } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { 2826 } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
2763 if (sk->sk_state == TCP_CLOSE) 2827 if (sk->sk_state == TCP_CLOSE)
2764 tp->fastopen_connect = val; 2828 tp->fastopen_connect = val;
2765 else 2829 else
@@ -2768,6 +2832,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2768 err = -EOPNOTSUPP; 2832 err = -EOPNOTSUPP;
2769 } 2833 }
2770 break; 2834 break;
2835 case TCP_FASTOPEN_NO_COOKIE:
2836 if (val > 1 || val < 0)
2837 err = -EINVAL;
2838 else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2839 err = -EINVAL;
2840 else
2841 tp->fastopen_no_cookie = val;
2842 break;
2771 case TCP_TIMESTAMP: 2843 case TCP_TIMESTAMP:
2772 if (!tp->repair) 2844 if (!tp->repair)
2773 err = -EPERM; 2845 err = -EPERM;
@@ -2905,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2905 2977
2906 info->tcpi_lost = tp->lost_out; 2978 info->tcpi_lost = tp->lost_out;
2907 info->tcpi_retrans = tp->retrans_out; 2979 info->tcpi_retrans = tp->retrans_out;
2908 info->tcpi_fackets = tp->fackets_out;
2909 2980
2910 now = tcp_jiffies32; 2981 now = tcp_jiffies32;
2911 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); 2982 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
@@ -3104,6 +3175,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
3104 return -EFAULT; 3175 return -EFAULT;
3105 return 0; 3176 return 0;
3106 3177
3178 case TCP_FASTOPEN_KEY: {
3179 __u8 key[TCP_FASTOPEN_KEY_LENGTH];
3180 struct tcp_fastopen_context *ctx;
3181
3182 if (get_user(len, optlen))
3183 return -EFAULT;
3184
3185 rcu_read_lock();
3186 ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
3187 if (ctx)
3188 memcpy(key, ctx->key, sizeof(key));
3189 else
3190 len = 0;
3191 rcu_read_unlock();
3192
3193 len = min_t(unsigned int, len, sizeof(key));
3194 if (put_user(len, optlen))
3195 return -EFAULT;
3196 if (copy_to_user(optval, key, len))
3197 return -EFAULT;
3198 return 0;
3199 }
3107 case TCP_THIN_LINEAR_TIMEOUTS: 3200 case TCP_THIN_LINEAR_TIMEOUTS:
3108 val = tp->thin_lto; 3201 val = tp->thin_lto;
3109 break; 3202 break;
@@ -3166,6 +3259,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
3166 val = tp->fastopen_connect; 3259 val = tp->fastopen_connect;
3167 break; 3260 break;
3168 3261
3262 case TCP_FASTOPEN_NO_COOKIE:
3263 val = tp->fastopen_no_cookie;
3264 break;
3265
3169 case TCP_TIMESTAMP: 3266 case TCP_TIMESTAMP:
3170 val = tcp_time_stamp_raw() + tp->tsoffset; 3267 val = tcp_time_stamp_raw() + tp->tsoffset;
3171 break; 3268 break;
@@ -3531,13 +3628,13 @@ void __init tcp_init(void)
3531 max_wshare = min(4UL*1024*1024, limit); 3628 max_wshare = min(4UL*1024*1024, limit);
3532 max_rshare = min(6UL*1024*1024, limit); 3629 max_rshare = min(6UL*1024*1024, limit);
3533 3630
3534 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; 3631 init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
3535 sysctl_tcp_wmem[1] = 16*1024; 3632 init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
3536 sysctl_tcp_wmem[2] = max(64*1024, max_wshare); 3633 init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
3537 3634
3538 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; 3635 init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
3539 sysctl_tcp_rmem[1] = 87380; 3636 init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
3540 sysctl_tcp_rmem[2] = max(87380, max_rshare); 3637 init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
3541 3638
3542 pr_info("Hash tables configured (established %u bind %u)\n", 3639 pr_info("Hash tables configured (established %u bind %u)\n",
3543 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); 3640 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 66ac69f7bd19..06fbe102a425 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -389,7 +389,7 @@ static void tcp_cdg_release(struct sock *sk)
389 kfree(ca->gradients); 389 kfree(ca->gradients);
390} 390}
391 391
392struct tcp_congestion_ops tcp_cdg __read_mostly = { 392static struct tcp_congestion_ops tcp_cdg __read_mostly = {
393 .cong_avoid = tcp_cdg_cong_avoid, 393 .cong_avoid = tcp_cdg_cong_avoid,
394 .cwnd_event = tcp_cdg_cwnd_event, 394 .cwnd_event = tcp_cdg_cwnd_event,
395 .pkts_acked = tcp_cdg_acked, 395 .pkts_acked = tcp_cdg_acked,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2f26124fd160..bc6c02f16243 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
33} 33}
34 34
35/* Must be called with rcu lock held */ 35/* Must be called with rcu lock held */
36static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) 36static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
37 const char *name)
37{ 38{
38 const struct tcp_congestion_ops *ca = tcp_ca_find(name); 39 struct tcp_congestion_ops *ca = tcp_ca_find(name);
40
39#ifdef CONFIG_MODULES 41#ifdef CONFIG_MODULES
40 if (!ca && capable(CAP_NET_ADMIN)) { 42 if (!ca && capable(CAP_NET_ADMIN)) {
41 rcu_read_unlock(); 43 rcu_read_unlock();
@@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
115} 117}
116EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 118EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
117 119
118u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) 120u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
119{ 121{
120 const struct tcp_congestion_ops *ca; 122 const struct tcp_congestion_ops *ca;
121 u32 key = TCP_CA_UNSPEC; 123 u32 key = TCP_CA_UNSPEC;
@@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
123 might_sleep(); 125 might_sleep();
124 126
125 rcu_read_lock(); 127 rcu_read_lock();
126 ca = __tcp_ca_find_autoload(name); 128 ca = tcp_ca_find_autoload(net, name);
127 if (ca) { 129 if (ca) {
128 key = ca->key; 130 key = ca->key;
129 *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; 131 *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
@@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
153/* Assign choice of congestion control. */ 155/* Assign choice of congestion control. */
154void tcp_assign_congestion_control(struct sock *sk) 156void tcp_assign_congestion_control(struct sock *sk)
155{ 157{
158 struct net *net = sock_net(sk);
156 struct inet_connection_sock *icsk = inet_csk(sk); 159 struct inet_connection_sock *icsk = inet_csk(sk);
157 struct tcp_congestion_ops *ca; 160 const struct tcp_congestion_ops *ca;
158 161
159 rcu_read_lock(); 162 rcu_read_lock();
160 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 163 ca = rcu_dereference(net->ipv4.tcp_congestion_control);
161 if (likely(try_module_get(ca->owner))) { 164 if (unlikely(!try_module_get(ca->owner)))
162 icsk->icsk_ca_ops = ca; 165 ca = &tcp_reno;
163 goto out; 166 icsk->icsk_ca_ops = ca;
164 }
165 /* Fallback to next available. The last really
166 * guaranteed fallback is Reno from this list.
167 */
168 }
169out:
170 rcu_read_unlock(); 167 rcu_read_unlock();
171 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
172 168
169 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
173 if (ca->flags & TCP_CONG_NEEDS_ECN) 170 if (ca->flags & TCP_CONG_NEEDS_ECN)
174 INET_ECN_xmit(sk); 171 INET_ECN_xmit(sk);
175 else 172 else
@@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk)
214} 211}
215 212
216/* Used by sysctl to change default congestion control */ 213/* Used by sysctl to change default congestion control */
217int tcp_set_default_congestion_control(const char *name) 214int tcp_set_default_congestion_control(struct net *net, const char *name)
218{ 215{
219 struct tcp_congestion_ops *ca; 216 struct tcp_congestion_ops *ca;
220 int ret = -ENOENT; 217 const struct tcp_congestion_ops *prev;
221 218 int ret;
222 spin_lock(&tcp_cong_list_lock);
223 ca = tcp_ca_find(name);
224#ifdef CONFIG_MODULES
225 if (!ca && capable(CAP_NET_ADMIN)) {
226 spin_unlock(&tcp_cong_list_lock);
227 219
228 request_module("tcp_%s", name); 220 rcu_read_lock();
229 spin_lock(&tcp_cong_list_lock); 221 ca = tcp_ca_find_autoload(net, name);
230 ca = tcp_ca_find(name); 222 if (!ca) {
231 } 223 ret = -ENOENT;
232#endif 224 } else if (!try_module_get(ca->owner)) {
225 ret = -EBUSY;
226 } else {
227 prev = xchg(&net->ipv4.tcp_congestion_control, ca);
228 if (prev)
229 module_put(prev->owner);
233 230
234 if (ca) { 231 ca->flags |= TCP_CONG_NON_RESTRICTED;
235 ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
236 list_move(&ca->list, &tcp_cong_list);
237 ret = 0; 232 ret = 0;
238 } 233 }
239 spin_unlock(&tcp_cong_list_lock); 234 rcu_read_unlock();
240 235
241 return ret; 236 return ret;
242} 237}
@@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name)
244/* Set default value from kernel configuration at bootup */ 239/* Set default value from kernel configuration at bootup */
245static int __init tcp_congestion_default(void) 240static int __init tcp_congestion_default(void)
246{ 241{
247 return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); 242 return tcp_set_default_congestion_control(&init_net,
243 CONFIG_DEFAULT_TCP_CONG);
248} 244}
249late_initcall(tcp_congestion_default); 245late_initcall(tcp_congestion_default);
250 246
@@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
264} 260}
265 261
266/* Get current default congestion control */ 262/* Get current default congestion control */
267void tcp_get_default_congestion_control(char *name) 263void tcp_get_default_congestion_control(struct net *net, char *name)
268{ 264{
269 struct tcp_congestion_ops *ca; 265 const struct tcp_congestion_ops *ca;
270 /* We will always have reno... */
271 BUG_ON(list_empty(&tcp_cong_list));
272 266
273 rcu_read_lock(); 267 rcu_read_lock();
274 ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); 268 ca = rcu_dereference(net->ipv4.tcp_congestion_control);
275 strncpy(name, ca->name, TCP_CA_NAME_MAX); 269 strncpy(name, ca->name, TCP_CA_NAME_MAX);
276 rcu_read_unlock(); 270 rcu_read_unlock();
277} 271}
@@ -351,12 +345,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
351 if (!load) 345 if (!load)
352 ca = tcp_ca_find(name); 346 ca = tcp_ca_find(name);
353 else 347 else
354 ca = __tcp_ca_find_autoload(name); 348 ca = tcp_ca_find_autoload(sock_net(sk), name);
349
355 /* No change asking for existing value */ 350 /* No change asking for existing value */
356 if (ca == icsk->icsk_ca_ops) { 351 if (ca == icsk->icsk_ca_ops) {
357 icsk->icsk_ca_setsockopt = 1; 352 icsk->icsk_ca_setsockopt = 1;
358 goto out; 353 goto out;
359 } 354 }
355
360 if (!ca) { 356 if (!ca) {
361 err = -ENOENT; 357 err = -ENOENT;
362 } else if (!load) { 358 } else if (!load) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fbbeda647774..78c192ee03a4 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -10,15 +10,18 @@
10#include <net/inetpeer.h> 10#include <net/inetpeer.h>
11#include <net/tcp.h> 11#include <net/tcp.h>
12 12
13int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE; 13void tcp_fastopen_init_key_once(struct net *net)
14
15struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
16
17static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
18
19void tcp_fastopen_init_key_once(bool publish)
20{ 14{
21 static u8 key[TCP_FASTOPEN_KEY_LENGTH]; 15 u8 key[TCP_FASTOPEN_KEY_LENGTH];
16 struct tcp_fastopen_context *ctxt;
17
18 rcu_read_lock();
19 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
20 if (ctxt) {
21 rcu_read_unlock();
22 return;
23 }
24 rcu_read_unlock();
22 25
23 /* tcp_fastopen_reset_cipher publishes the new context 26 /* tcp_fastopen_reset_cipher publishes the new context
24 * atomically, so we allow this race happening here. 27 * atomically, so we allow this race happening here.
@@ -26,8 +29,8 @@ void tcp_fastopen_init_key_once(bool publish)
26 * All call sites of tcp_fastopen_cookie_gen also check 29 * All call sites of tcp_fastopen_cookie_gen also check
27 * for a valid cookie, so this is an acceptable risk. 30 * for a valid cookie, so this is an acceptable risk.
28 */ 31 */
29 if (net_get_random_once(key, sizeof(key)) && publish) 32 get_random_bytes(key, sizeof(key));
30 tcp_fastopen_reset_cipher(key, sizeof(key)); 33 tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
31} 34}
32 35
33static void tcp_fastopen_ctx_free(struct rcu_head *head) 36static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -38,10 +41,37 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
38 kfree(ctx); 41 kfree(ctx);
39} 42}
40 43
41int tcp_fastopen_reset_cipher(void *key, unsigned int len) 44void tcp_fastopen_destroy_cipher(struct sock *sk)
45{
46 struct tcp_fastopen_context *ctx;
47
48 ctx = rcu_dereference_protected(
49 inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1);
50 if (ctx)
51 call_rcu(&ctx->rcu, tcp_fastopen_ctx_free);
52}
53
54void tcp_fastopen_ctx_destroy(struct net *net)
55{
56 struct tcp_fastopen_context *ctxt;
57
58 spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
59
60 ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
61 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
62 rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
63 spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
64
65 if (ctxt)
66 call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
67}
68
69int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
70 void *key, unsigned int len)
42{ 71{
43 int err;
44 struct tcp_fastopen_context *ctx, *octx; 72 struct tcp_fastopen_context *ctx, *octx;
73 struct fastopen_queue *q;
74 int err;
45 75
46 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 76 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
47 if (!ctx) 77 if (!ctx)
@@ -62,26 +92,37 @@ error: kfree(ctx);
62 } 92 }
63 memcpy(ctx->key, key, len); 93 memcpy(ctx->key, key, len);
64 94
65 spin_lock(&tcp_fastopen_ctx_lock);
66 95
67 octx = rcu_dereference_protected(tcp_fastopen_ctx, 96 spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
68 lockdep_is_held(&tcp_fastopen_ctx_lock)); 97 if (sk) {
69 rcu_assign_pointer(tcp_fastopen_ctx, ctx); 98 q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
70 spin_unlock(&tcp_fastopen_ctx_lock); 99 octx = rcu_dereference_protected(q->ctx,
100 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
101 rcu_assign_pointer(q->ctx, ctx);
102 } else {
103 octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
104 lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
105 rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
106 }
107 spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
71 108
72 if (octx) 109 if (octx)
73 call_rcu(&octx->rcu, tcp_fastopen_ctx_free); 110 call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
74 return err; 111 return err;
75} 112}
76 113
77static bool __tcp_fastopen_cookie_gen(const void *path, 114static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
78 struct tcp_fastopen_cookie *foc) 115 struct tcp_fastopen_cookie *foc)
79{ 116{
80 struct tcp_fastopen_context *ctx; 117 struct tcp_fastopen_context *ctx;
81 bool ok = false; 118 bool ok = false;
82 119
83 rcu_read_lock(); 120 rcu_read_lock();
84 ctx = rcu_dereference(tcp_fastopen_ctx); 121
122 ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
123 if (!ctx)
124 ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
125
85 if (ctx) { 126 if (ctx) {
86 crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); 127 crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
87 foc->len = TCP_FASTOPEN_COOKIE_SIZE; 128 foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -97,7 +138,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
97 * 138 *
98 * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. 139 * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
99 */ 140 */
100static bool tcp_fastopen_cookie_gen(struct request_sock *req, 141static bool tcp_fastopen_cookie_gen(struct sock *sk,
142 struct request_sock *req,
101 struct sk_buff *syn, 143 struct sk_buff *syn,
102 struct tcp_fastopen_cookie *foc) 144 struct tcp_fastopen_cookie *foc)
103{ 145{
@@ -105,7 +147,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
105 const struct iphdr *iph = ip_hdr(syn); 147 const struct iphdr *iph = ip_hdr(syn);
106 148
107 __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; 149 __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
108 return __tcp_fastopen_cookie_gen(path, foc); 150 return __tcp_fastopen_cookie_gen(sk, path, foc);
109 } 151 }
110 152
111#if IS_ENABLED(CONFIG_IPV6) 153#if IS_ENABLED(CONFIG_IPV6)
@@ -113,13 +155,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
113 const struct ipv6hdr *ip6h = ipv6_hdr(syn); 155 const struct ipv6hdr *ip6h = ipv6_hdr(syn);
114 struct tcp_fastopen_cookie tmp; 156 struct tcp_fastopen_cookie tmp;
115 157
116 if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { 158 if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
117 struct in6_addr *buf = &tmp.addr; 159 struct in6_addr *buf = &tmp.addr;
118 int i; 160 int i;
119 161
120 for (i = 0; i < 4; i++) 162 for (i = 0; i < 4; i++)
121 buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; 163 buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
122 return __tcp_fastopen_cookie_gen(buf, foc); 164 return __tcp_fastopen_cookie_gen(sk, buf, foc);
123 } 165 }
124 } 166 }
125#endif 167#endif
@@ -217,12 +259,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
217 refcount_set(&req->rsk_refcnt, 2); 259 refcount_set(&req->rsk_refcnt, 2);
218 260
219 /* Now finish processing the fastopen child socket. */ 261 /* Now finish processing the fastopen child socket. */
220 inet_csk(child)->icsk_af_ops->rebuild_header(child); 262 tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
221 tcp_init_congestion_control(child);
222 tcp_mtup_init(child);
223 tcp_init_metrics(child);
224 tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
225 tcp_init_buffer_space(child);
226 263
227 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 264 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
228 265
@@ -272,33 +309,45 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
272 return true; 309 return true;
273} 310}
274 311
312static bool tcp_fastopen_no_cookie(const struct sock *sk,
313 const struct dst_entry *dst,
314 int flag)
315{
316 return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
317 tcp_sk(sk)->fastopen_no_cookie ||
318 (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
319}
320
275/* Returns true if we should perform Fast Open on the SYN. The cookie (foc) 321/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
276 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open 322 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
277 * cookie request (foc->len == 0). 323 * cookie request (foc->len == 0).
278 */ 324 */
279struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, 325struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
280 struct request_sock *req, 326 struct request_sock *req,
281 struct tcp_fastopen_cookie *foc) 327 struct tcp_fastopen_cookie *foc,
328 const struct dst_entry *dst)
282{ 329{
283 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
284 bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; 330 bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
331 int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
332 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
285 struct sock *child; 333 struct sock *child;
286 334
287 if (foc->len == 0) /* Client requests a cookie */ 335 if (foc->len == 0) /* Client requests a cookie */
288 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); 336 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
289 337
290 if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && 338 if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
291 (syn_data || foc->len >= 0) && 339 (syn_data || foc->len >= 0) &&
292 tcp_fastopen_queue_check(sk))) { 340 tcp_fastopen_queue_check(sk))) {
293 foc->len = -1; 341 foc->len = -1;
294 return NULL; 342 return NULL;
295 } 343 }
296 344
297 if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) 345 if (syn_data &&
346 tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
298 goto fastopen; 347 goto fastopen;
299 348
300 if (foc->len >= 0 && /* Client presents or requests a cookie */ 349 if (foc->len >= 0 && /* Client presents or requests a cookie */
301 tcp_fastopen_cookie_gen(req, skb, &valid_foc) && 350 tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
302 foc->len == TCP_FASTOPEN_COOKIE_SIZE && 351 foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
303 foc->len == valid_foc.len && 352 foc->len == valid_foc.len &&
304 !memcmp(foc->val, valid_foc.val, foc->len)) { 353 !memcmp(foc->val, valid_foc.val, foc->len)) {
@@ -331,6 +380,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
331 struct tcp_fastopen_cookie *cookie) 380 struct tcp_fastopen_cookie *cookie)
332{ 381{
333 unsigned long last_syn_loss = 0; 382 unsigned long last_syn_loss = 0;
383 const struct dst_entry *dst;
334 int syn_loss = 0; 384 int syn_loss = 0;
335 385
336 tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); 386 tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss);
@@ -348,7 +398,9 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
348 return false; 398 return false;
349 } 399 }
350 400
351 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { 401 dst = __sk_dst_get(sk);
402
403 if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) {
352 cookie->len = -1; 404 cookie->len = -1;
353 return true; 405 return true;
354 } 406 }
@@ -402,25 +454,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
402 * TFO connection with data exchanges. 454 * TFO connection with data exchanges.
403 */ 455 */
404 456
405/* Default to 1hr */
406unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
407static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
408static unsigned long tfo_active_disable_stamp __read_mostly;
409
410/* Disable active TFO and record current jiffies and 457/* Disable active TFO and record current jiffies and
411 * tfo_active_disable_times 458 * tfo_active_disable_times
412 */ 459 */
413void tcp_fastopen_active_disable(struct sock *sk) 460void tcp_fastopen_active_disable(struct sock *sk)
414{ 461{
415 atomic_inc(&tfo_active_disable_times); 462 struct net *net = sock_net(sk);
416 tfo_active_disable_stamp = jiffies;
417 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
418}
419 463
420/* Reset tfo_active_disable_times to 0 */ 464 atomic_inc(&net->ipv4.tfo_active_disable_times);
421void tcp_fastopen_active_timeout_reset(void) 465 net->ipv4.tfo_active_disable_stamp = jiffies;
422{ 466 NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
423 atomic_set(&tfo_active_disable_times, 0);
424} 467}
425 468
426/* Calculate timeout for tfo active disable 469/* Calculate timeout for tfo active disable
@@ -429,17 +472,18 @@ void tcp_fastopen_active_timeout_reset(void)
429 */ 472 */
430bool tcp_fastopen_active_should_disable(struct sock *sk) 473bool tcp_fastopen_active_should_disable(struct sock *sk)
431{ 474{
432 int tfo_da_times = atomic_read(&tfo_active_disable_times); 475 unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
433 int multiplier; 476 int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
434 unsigned long timeout; 477 unsigned long timeout;
478 int multiplier;
435 479
436 if (!tfo_da_times) 480 if (!tfo_da_times)
437 return false; 481 return false;
438 482
439 /* Limit timout to max: 2^6 * initial timeout */ 483 /* Limit timout to max: 2^6 * initial timeout */
440 multiplier = 1 << min(tfo_da_times - 1, 6); 484 multiplier = 1 << min(tfo_da_times - 1, 6);
441 timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ; 485 timeout = multiplier * tfo_bh_timeout * HZ;
442 if (time_before(jiffies, tfo_active_disable_stamp + timeout)) 486 if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
443 return true; 487 return true;
444 488
445 /* Mark check bit so we can check for successful active TFO 489 /* Mark check bit so we can check for successful active TFO
@@ -458,27 +502,25 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
458void tcp_fastopen_active_disable_ofo_check(struct sock *sk) 502void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
459{ 503{
460 struct tcp_sock *tp = tcp_sk(sk); 504 struct tcp_sock *tp = tcp_sk(sk);
461 struct rb_node *p;
462 struct sk_buff *skb;
463 struct dst_entry *dst; 505 struct dst_entry *dst;
506 struct sk_buff *skb;
464 507
465 if (!tp->syn_fastopen) 508 if (!tp->syn_fastopen)
466 return; 509 return;
467 510
468 if (!tp->data_segs_in) { 511 if (!tp->data_segs_in) {
469 p = rb_first(&tp->out_of_order_queue); 512 skb = skb_rb_first(&tp->out_of_order_queue);
470 if (p && !rb_next(p)) { 513 if (skb && !skb_rb_next(skb)) {
471 skb = rb_entry(p, struct sk_buff, rbnode);
472 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { 514 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
473 tcp_fastopen_active_disable(sk); 515 tcp_fastopen_active_disable(sk);
474 return; 516 return;
475 } 517 }
476 } 518 }
477 } else if (tp->syn_fastopen_ch && 519 } else if (tp->syn_fastopen_ch &&
478 atomic_read(&tfo_active_disable_times)) { 520 atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
479 dst = sk_dst_get(sk); 521 dst = sk_dst_get(sk);
480 if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) 522 if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
481 tcp_fastopen_active_timeout_reset(); 523 atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
482 dst_release(dst); 524 dst_release(dst);
483 } 525 }
484} 526}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 887585045b27..734cfc8ff76e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -76,25 +76,10 @@
76#include <linux/ipsec.h> 76#include <linux/ipsec.h>
77#include <asm/unaligned.h> 77#include <asm/unaligned.h>
78#include <linux/errqueue.h> 78#include <linux/errqueue.h>
79#include <trace/events/tcp.h>
80#include <linux/static_key.h>
79 81
80int sysctl_tcp_fack __read_mostly;
81int sysctl_tcp_max_reordering __read_mostly = 300;
82int sysctl_tcp_dsack __read_mostly = 1;
83int sysctl_tcp_app_win __read_mostly = 31;
84int sysctl_tcp_adv_win_scale __read_mostly = 1;
85EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
86
87/* rfc5961 challenge ack rate limiting */
88int sysctl_tcp_challenge_ack_limit = 1000;
89
90int sysctl_tcp_stdurg __read_mostly;
91int sysctl_tcp_rfc1337 __read_mostly;
92int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 82int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
93int sysctl_tcp_frto __read_mostly = 2;
94int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
95int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
96int sysctl_tcp_early_retrans __read_mostly = 3;
97int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
98 83
99#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 84#define FLAG_DATA 0x01 /* Incoming frame contained data. */
100#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 85#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -335,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
335 sndmem *= nr_segs * per_mss; 320 sndmem *= nr_segs * per_mss;
336 321
337 if (sk->sk_sndbuf < sndmem) 322 if (sk->sk_sndbuf < sndmem)
338 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); 323 sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
339} 324}
340 325
341/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 326/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -368,8 +353,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
368{ 353{
369 struct tcp_sock *tp = tcp_sk(sk); 354 struct tcp_sock *tp = tcp_sk(sk);
370 /* Optimize this! */ 355 /* Optimize this! */
371 int truesize = tcp_win_from_space(skb->truesize) >> 1; 356 int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
372 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; 357 int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
373 358
374 while (tp->rcv_ssthresh <= window) { 359 while (tp->rcv_ssthresh <= window) {
375 if (truesize <= skb->len) 360 if (truesize <= skb->len)
@@ -394,7 +379,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
394 /* Check #2. Increase window, if skb with such overhead 379 /* Check #2. Increase window, if skb with such overhead
395 * will fit to rcvbuf in future. 380 * will fit to rcvbuf in future.
396 */ 381 */
397 if (tcp_win_from_space(skb->truesize) <= skb->len) 382 if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
398 incr = 2 * tp->advmss; 383 incr = 2 * tp->advmss;
399 else 384 else
400 incr = __tcp_grow_window(sk, skb); 385 incr = __tcp_grow_window(sk, skb);
@@ -420,11 +405,11 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
420 /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency 405 /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
421 * Allow enough cushion so that sender is not limited by our window 406 * Allow enough cushion so that sender is not limited by our window
422 */ 407 */
423 if (sysctl_tcp_moderate_rcvbuf) 408 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
424 rcvmem <<= 2; 409 rcvmem <<= 2;
425 410
426 if (sk->sk_rcvbuf < rcvmem) 411 if (sk->sk_rcvbuf < rcvmem)
427 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); 412 sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
428} 413}
429 414
430/* 4. Try to fixup all. It is made immediately after connection enters 415/* 4. Try to fixup all. It is made immediately after connection enters
@@ -432,6 +417,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
432 */ 417 */
433void tcp_init_buffer_space(struct sock *sk) 418void tcp_init_buffer_space(struct sock *sk)
434{ 419{
420 int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
435 struct tcp_sock *tp = tcp_sk(sk); 421 struct tcp_sock *tp = tcp_sk(sk);
436 int maxwin; 422 int maxwin;
437 423
@@ -450,14 +436,14 @@ void tcp_init_buffer_space(struct sock *sk)
450 if (tp->window_clamp >= maxwin) { 436 if (tp->window_clamp >= maxwin) {
451 tp->window_clamp = maxwin; 437 tp->window_clamp = maxwin;
452 438
453 if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss) 439 if (tcp_app_win && maxwin > 4 * tp->advmss)
454 tp->window_clamp = max(maxwin - 440 tp->window_clamp = max(maxwin -
455 (maxwin >> sysctl_tcp_app_win), 441 (maxwin >> tcp_app_win),
456 4 * tp->advmss); 442 4 * tp->advmss);
457 } 443 }
458 444
459 /* Force reservation of one segment. */ 445 /* Force reservation of one segment. */
460 if (sysctl_tcp_app_win && 446 if (tcp_app_win &&
461 tp->window_clamp > 2 * tp->advmss && 447 tp->window_clamp > 2 * tp->advmss &&
462 tp->window_clamp + tp->advmss > maxwin) 448 tp->window_clamp + tp->advmss > maxwin)
463 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); 449 tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@ -471,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk)
471{ 457{
472 struct tcp_sock *tp = tcp_sk(sk); 458 struct tcp_sock *tp = tcp_sk(sk);
473 struct inet_connection_sock *icsk = inet_csk(sk); 459 struct inet_connection_sock *icsk = inet_csk(sk);
460 struct net *net = sock_net(sk);
474 461
475 icsk->icsk_ack.quick = 0; 462 icsk->icsk_ack.quick = 0;
476 463
477 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 464 if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
478 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 465 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
479 !tcp_under_memory_pressure(sk) && 466 !tcp_under_memory_pressure(sk) &&
480 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { 467 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
481 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 468 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
482 sysctl_tcp_rmem[2]); 469 net->ipv4.sysctl_tcp_rmem[2]);
483 } 470 }
484 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) 471 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
485 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); 472 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -610,7 +597,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
610 * <prev RTT . ><current RTT .. ><next RTT .... > 597 * <prev RTT . ><current RTT .. ><next RTT .... >
611 */ 598 */
612 599
613 if (sysctl_tcp_moderate_rcvbuf && 600 if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
614 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { 601 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
615 int rcvwin, rcvmem, rcvbuf; 602 int rcvwin, rcvmem, rcvbuf;
616 603
@@ -634,10 +621,11 @@ void tcp_rcv_space_adjust(struct sock *sk)
634 } 621 }
635 622
636 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); 623 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
637 while (tcp_win_from_space(rcvmem) < tp->advmss) 624 while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
638 rcvmem += 128; 625 rcvmem += 128;
639 626
640 rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); 627 rcvbuf = min(rcvwin / tp->advmss * rcvmem,
628 sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
641 if (rcvbuf > sk->sk_rcvbuf) { 629 if (rcvbuf > sk->sk_rcvbuf) {
642 sk->sk_rcvbuf = rcvbuf; 630 sk->sk_rcvbuf = rcvbuf;
643 631
@@ -781,15 +769,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
781 tp->srtt_us = max(1U, srtt); 769 tp->srtt_us = max(1U, srtt);
782} 770}
783 771
784/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
785 * Note: TCP stack does not yet implement pacing.
786 * FQ packet scheduler can be used to implement cheap but effective
787 * TCP pacing, to smooth the burst on large writes when packets
788 * in flight is significantly lower than cwnd (or rwin)
789 */
790int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
791int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
792
793static void tcp_update_pacing_rate(struct sock *sk) 772static void tcp_update_pacing_rate(struct sock *sk)
794{ 773{
795 const struct tcp_sock *tp = tcp_sk(sk); 774 const struct tcp_sock *tp = tcp_sk(sk);
@@ -807,9 +786,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
807 * end of slow start and should slow down. 786 * end of slow start and should slow down.
808 */ 787 */
809 if (tp->snd_cwnd < tp->snd_ssthresh / 2) 788 if (tp->snd_cwnd < tp->snd_ssthresh / 2)
810 rate *= sysctl_tcp_pacing_ss_ratio; 789 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
811 else 790 else
812 rate *= sysctl_tcp_pacing_ca_ratio; 791 rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
813 792
814 rate *= max(tp->snd_cwnd, tp->packets_out); 793 rate *= max(tp->snd_cwnd, tp->packets_out);
815 794
@@ -863,60 +842,46 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
863 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 842 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
864} 843}
865 844
866/*
867 * Packet counting of FACK is based on in-order assumptions, therefore TCP
868 * disables it when reordering is detected
869 */
870void tcp_disable_fack(struct tcp_sock *tp)
871{
872 /* RFC3517 uses different metric in lost marker => reset on change */
873 if (tcp_is_fack(tp))
874 tp->lost_skb_hint = NULL;
875 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
876}
877
878/* Take a notice that peer is sending D-SACKs */ 845/* Take a notice that peer is sending D-SACKs */
879static void tcp_dsack_seen(struct tcp_sock *tp) 846static void tcp_dsack_seen(struct tcp_sock *tp)
880{ 847{
881 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; 848 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
849 tp->rack.dsack_seen = 1;
882} 850}
883 851
884static void tcp_update_reordering(struct sock *sk, const int metric, 852/* It's reordering when higher sequence was delivered (i.e. sacked) before
885 const int ts) 853 * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
854 * distance is approximated in full-mss packet distance ("reordering").
855 */
856static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
857 const int ts)
886{ 858{
887 struct tcp_sock *tp = tcp_sk(sk); 859 struct tcp_sock *tp = tcp_sk(sk);
888 int mib_idx; 860 const u32 mss = tp->mss_cache;
861 u32 fack, metric;
889 862
890 if (WARN_ON_ONCE(metric < 0)) 863 fack = tcp_highest_sack_seq(tp);
864 if (!before(low_seq, fack))
891 return; 865 return;
892 866
893 if (metric > tp->reordering) { 867 metric = fack - low_seq;
894 tp->reordering = min(sysctl_tcp_max_reordering, metric); 868 if ((metric > tp->reordering * mss) && mss) {
895
896#if FASTRETRANS_DEBUG > 1 869#if FASTRETRANS_DEBUG > 1
897 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", 870 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
898 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, 871 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
899 tp->reordering, 872 tp->reordering,
900 tp->fackets_out, 873 0,
901 tp->sacked_out, 874 tp->sacked_out,
902 tp->undo_marker ? tp->undo_retrans : 0); 875 tp->undo_marker ? tp->undo_retrans : 0);
903#endif 876#endif
904 tcp_disable_fack(tp); 877 tp->reordering = min_t(u32, (metric + mss - 1) / mss,
878 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
905 } 879 }
906 880
907 tp->rack.reord = 1; 881 tp->rack.reord = 1;
908
909 /* This exciting event is worth to be remembered. 8) */ 882 /* This exciting event is worth to be remembered. 8) */
910 if (ts) 883 NET_INC_STATS(sock_net(sk),
911 mib_idx = LINUX_MIB_TCPTSREORDER; 884 ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
912 else if (tcp_is_reno(tp))
913 mib_idx = LINUX_MIB_TCPRENOREORDER;
914 else if (tcp_is_fack(tp))
915 mib_idx = LINUX_MIB_TCPFACKREORDER;
916 else
917 mib_idx = LINUX_MIB_TCPSACKREORDER;
918
919 NET_INC_STATS(sock_net(sk), mib_idx);
920} 885}
921 886
922/* This must be called before lost_out is incremented */ 887/* This must be called before lost_out is incremented */
@@ -990,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
990 * 3. Loss detection event of two flavors: 955 * 3. Loss detection event of two flavors:
991 * A. Scoreboard estimator decided the packet is lost. 956 * A. Scoreboard estimator decided the packet is lost.
992 * A'. Reno "three dupacks" marks head of queue lost. 957 * A'. Reno "three dupacks" marks head of queue lost.
993 * A''. Its FACK modification, head until snd.fack is lost.
994 * B. SACK arrives sacking SND.NXT at the moment, when the 958 * B. SACK arrives sacking SND.NXT at the moment, when the
995 * segment was retransmitted. 959 * segment was retransmitted.
996 * 4. D-SACK added new rule: D-SACK changes any tag to S. 960 * 4. D-SACK added new rule: D-SACK changes any tag to S.
@@ -1133,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1133} 1097}
1134 1098
1135struct tcp_sacktag_state { 1099struct tcp_sacktag_state {
1136 int reord; 1100 u32 reord;
1137 int fack_count;
1138 /* Timestamps for earliest and latest never-retransmitted segment 1101 /* Timestamps for earliest and latest never-retransmitted segment
1139 * that was SACKed. RTO needs the earliest RTT to stay conservative, 1102 * that was SACKed. RTO needs the earliest RTT to stay conservative,
1140 * but congestion control should still get an accurate delay signal. 1103 * but congestion control should still get an accurate delay signal.
@@ -1143,6 +1106,7 @@ struct tcp_sacktag_state {
1143 u64 last_sackt; 1106 u64 last_sackt;
1144 struct rate_sample *rate; 1107 struct rate_sample *rate;
1145 int flag; 1108 int flag;
1109 unsigned int mss_now;
1146}; 1110};
1147 1111
1148/* Check if skb is fully within the SACK block. In presence of GSO skbs, 1112/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1192,7 +1156,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1192 if (pkt_len >= skb->len && !in_sack) 1156 if (pkt_len >= skb->len && !in_sack)
1193 return 0; 1157 return 0;
1194 1158
1195 err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); 1159 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1160 pkt_len, mss, GFP_ATOMIC);
1196 if (err < 0) 1161 if (err < 0)
1197 return err; 1162 return err;
1198 } 1163 }
@@ -1208,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk,
1208 u64 xmit_time) 1173 u64 xmit_time)
1209{ 1174{
1210 struct tcp_sock *tp = tcp_sk(sk); 1175 struct tcp_sock *tp = tcp_sk(sk);
1211 int fack_count = state->fack_count;
1212 1176
1213 /* Account D-SACK for retransmitted packet. */ 1177 /* Account D-SACK for retransmitted packet. */
1214 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1178 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1215 if (tp->undo_marker && tp->undo_retrans > 0 && 1179 if (tp->undo_marker && tp->undo_retrans > 0 &&
1216 after(end_seq, tp->undo_marker)) 1180 after(end_seq, tp->undo_marker))
1217 tp->undo_retrans--; 1181 tp->undo_retrans--;
1218 if (sacked & TCPCB_SACKED_ACKED) 1182 if ((sacked & TCPCB_SACKED_ACKED) &&
1219 state->reord = min(fack_count, state->reord); 1183 before(start_seq, state->reord))
1184 state->reord = start_seq;
1220 } 1185 }
1221 1186
1222 /* Nothing to do; acked frame is about to be dropped (was ACKed). */ 1187 /* Nothing to do; acked frame is about to be dropped (was ACKed). */
@@ -1242,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
1242 * which was in hole. It is reordering. 1207 * which was in hole. It is reordering.
1243 */ 1208 */
1244 if (before(start_seq, 1209 if (before(start_seq,
1245 tcp_highest_sack_seq(tp))) 1210 tcp_highest_sack_seq(tp)) &&
1246 state->reord = min(fack_count, 1211 before(start_seq, state->reord))
1247 state->reord); 1212 state->reord = start_seq;
1213
1248 if (!after(end_seq, tp->high_seq)) 1214 if (!after(end_seq, tp->high_seq))
1249 state->flag |= FLAG_ORIG_SACK_ACKED; 1215 state->flag |= FLAG_ORIG_SACK_ACKED;
1250 if (state->first_sackt == 0) 1216 if (state->first_sackt == 0)
@@ -1263,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
1263 tp->sacked_out += pcount; 1229 tp->sacked_out += pcount;
1264 tp->delivered += pcount; /* Out-of-order packets delivered */ 1230 tp->delivered += pcount; /* Out-of-order packets delivered */
1265 1231
1266 fack_count += pcount;
1267
1268 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ 1232 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1269 if (!tcp_is_fack(tp) && tp->lost_skb_hint && 1233 if (tp->lost_skb_hint &&
1270 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) 1234 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1271 tp->lost_cnt_hint += pcount; 1235 tp->lost_cnt_hint += pcount;
1272
1273 if (fack_count > tp->fackets_out)
1274 tp->fackets_out = fack_count;
1275 } 1236 }
1276 1237
1277 /* D-SACK. We can detect redundant retransmission in S|R and plain R 1238 /* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1289,13 +1250,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
1289/* Shift newly-SACKed bytes from this skb to the immediately previous 1250/* Shift newly-SACKed bytes from this skb to the immediately previous
1290 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. 1251 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
1291 */ 1252 */
1292static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, 1253static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1254 struct sk_buff *skb,
1293 struct tcp_sacktag_state *state, 1255 struct tcp_sacktag_state *state,
1294 unsigned int pcount, int shifted, int mss, 1256 unsigned int pcount, int shifted, int mss,
1295 bool dup_sack) 1257 bool dup_sack)
1296{ 1258{
1297 struct tcp_sock *tp = tcp_sk(sk); 1259 struct tcp_sock *tp = tcp_sk(sk);
1298 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1299 u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ 1260 u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
1300 u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ 1261 u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
1301 1262
@@ -1364,8 +1325,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1364 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp)) 1325 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1365 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0; 1326 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1366 1327
1367 tcp_unlink_write_queue(skb, sk); 1328 tcp_rtx_queue_unlink_and_free(skb, sk);
1368 sk_wmem_free_skb(sk, skb);
1369 1329
1370 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); 1330 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1371 1331
@@ -1415,9 +1375,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1415 goto fallback; 1375 goto fallback;
1416 1376
1417 /* Can only happen with delayed DSACK + discard craziness */ 1377 /* Can only happen with delayed DSACK + discard craziness */
1418 if (unlikely(skb == tcp_write_queue_head(sk))) 1378 prev = skb_rb_prev(skb);
1379 if (!prev)
1419 goto fallback; 1380 goto fallback;
1420 prev = tcp_write_queue_prev(sk, skb);
1421 1381
1422 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) 1382 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1423 goto fallback; 1383 goto fallback;
@@ -1496,18 +1456,17 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1496 1456
1497 if (!skb_shift(prev, skb, len)) 1457 if (!skb_shift(prev, skb, len))
1498 goto fallback; 1458 goto fallback;
1499 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) 1459 if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
1500 goto out; 1460 goto out;
1501 1461
1502 /* Hole filled allows collapsing with the next as well, this is very 1462 /* Hole filled allows collapsing with the next as well, this is very
1503 * useful when hole on every nth skb pattern happens 1463 * useful when hole on every nth skb pattern happens
1504 */ 1464 */
1505 if (prev == tcp_write_queue_tail(sk)) 1465 skb = skb_rb_next(prev);
1466 if (!skb)
1506 goto out; 1467 goto out;
1507 skb = tcp_write_queue_next(sk, prev);
1508 1468
1509 if (!skb_can_shift(skb) || 1469 if (!skb_can_shift(skb) ||
1510 (skb == tcp_send_head(sk)) ||
1511 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || 1470 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1512 (mss != tcp_skb_seglen(skb))) 1471 (mss != tcp_skb_seglen(skb)))
1513 goto out; 1472 goto out;
@@ -1515,11 +1474,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1515 len = skb->len; 1474 len = skb->len;
1516 if (skb_shift(prev, skb, len)) { 1475 if (skb_shift(prev, skb, len)) {
1517 pcount += tcp_skb_pcount(skb); 1476 pcount += tcp_skb_pcount(skb);
1518 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); 1477 tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
1478 len, mss, 0);
1519 } 1479 }
1520 1480
1521out: 1481out:
1522 state->fack_count += pcount;
1523 return prev; 1482 return prev;
1524 1483
1525noop: 1484noop:
@@ -1539,13 +1498,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1539 struct tcp_sock *tp = tcp_sk(sk); 1498 struct tcp_sock *tp = tcp_sk(sk);
1540 struct sk_buff *tmp; 1499 struct sk_buff *tmp;
1541 1500
1542 tcp_for_write_queue_from(skb, sk) { 1501 skb_rbtree_walk_from(skb) {
1543 int in_sack = 0; 1502 int in_sack = 0;
1544 bool dup_sack = dup_sack_in; 1503 bool dup_sack = dup_sack_in;
1545 1504
1546 if (skb == tcp_send_head(sk))
1547 break;
1548
1549 /* queue is in-order => we can short-circuit the walk early */ 1505 /* queue is in-order => we can short-circuit the walk early */
1550 if (!before(TCP_SKB_CB(skb)->seq, end_seq)) 1506 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1551 break; 1507 break;
@@ -1594,34 +1550,48 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1594 tcp_skb_pcount(skb), 1550 tcp_skb_pcount(skb),
1595 skb->skb_mstamp); 1551 skb->skb_mstamp);
1596 tcp_rate_skb_delivered(sk, skb, state->rate); 1552 tcp_rate_skb_delivered(sk, skb, state->rate);
1553 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1554 list_del_init(&skb->tcp_tsorted_anchor);
1597 1555
1598 if (!before(TCP_SKB_CB(skb)->seq, 1556 if (!before(TCP_SKB_CB(skb)->seq,
1599 tcp_highest_sack_seq(tp))) 1557 tcp_highest_sack_seq(tp)))
1600 tcp_advance_highest_sack(sk, skb); 1558 tcp_advance_highest_sack(sk, skb);
1601 } 1559 }
1602
1603 state->fack_count += tcp_skb_pcount(skb);
1604 } 1560 }
1605 return skb; 1561 return skb;
1606} 1562}
1607 1563
1608/* Avoid all extra work that is being done by sacktag while walking in 1564static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
1609 * a normal way 1565 struct tcp_sacktag_state *state,
1610 */ 1566 u32 seq)
1567{
1568 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1569 struct sk_buff *skb;
1570
1571 while (*p) {
1572 parent = *p;
1573 skb = rb_to_skb(parent);
1574 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1575 p = &parent->rb_left;
1576 continue;
1577 }
1578 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1579 p = &parent->rb_right;
1580 continue;
1581 }
1582 return skb;
1583 }
1584 return NULL;
1585}
1586
1611static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, 1587static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1612 struct tcp_sacktag_state *state, 1588 struct tcp_sacktag_state *state,
1613 u32 skip_to_seq) 1589 u32 skip_to_seq)
1614{ 1590{
1615 tcp_for_write_queue_from(skb, sk) { 1591 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1616 if (skb == tcp_send_head(sk)) 1592 return skb;
1617 break;
1618
1619 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1620 break;
1621 1593
1622 state->fack_count += tcp_skb_pcount(skb); 1594 return tcp_sacktag_bsearch(sk, state, skip_to_seq);
1623 }
1624 return skb;
1625} 1595}
1626 1596
1627static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, 1597static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1666,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1666 int first_sack_index; 1636 int first_sack_index;
1667 1637
1668 state->flag = 0; 1638 state->flag = 0;
1669 state->reord = tp->packets_out; 1639 state->reord = tp->snd_nxt;
1670 1640
1671 if (!tp->sacked_out) { 1641 if (!tp->sacked_out)
1672 if (WARN_ON(tp->fackets_out))
1673 tp->fackets_out = 0;
1674 tcp_highest_sack_reset(sk); 1642 tcp_highest_sack_reset(sk);
1675 }
1676 1643
1677 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, 1644 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1678 num_sacks, prior_snd_una); 1645 num_sacks, prior_snd_una);
@@ -1743,8 +1710,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1743 } 1710 }
1744 } 1711 }
1745 1712
1746 skb = tcp_write_queue_head(sk); 1713 state->mss_now = tcp_current_mss(sk);
1747 state->fack_count = 0; 1714 skb = NULL;
1748 i = 0; 1715 i = 0;
1749 1716
1750 if (!tp->sacked_out) { 1717 if (!tp->sacked_out) {
@@ -1801,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1801 skb = tcp_highest_sack(sk); 1768 skb = tcp_highest_sack(sk);
1802 if (!skb) 1769 if (!skb)
1803 break; 1770 break;
1804 state->fack_count = tp->fackets_out;
1805 cache++; 1771 cache++;
1806 goto walk; 1772 goto walk;
1807 } 1773 }
@@ -1816,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1816 skb = tcp_highest_sack(sk); 1782 skb = tcp_highest_sack(sk);
1817 if (!skb) 1783 if (!skb)
1818 break; 1784 break;
1819 state->fack_count = tp->fackets_out;
1820 } 1785 }
1821 skb = tcp_sacktag_skip(skb, sk, state, start_seq); 1786 skb = tcp_sacktag_skip(skb, sk, state, start_seq);
1822 1787
@@ -1836,9 +1801,8 @@ advance_sp:
1836 for (j = 0; j < used_sacks; j++) 1801 for (j = 0; j < used_sacks; j++)
1837 tp->recv_sack_cache[i++] = sp[j]; 1802 tp->recv_sack_cache[i++] = sp[j];
1838 1803
1839 if ((state->reord < tp->fackets_out) && 1804 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
1840 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1805 tcp_check_sack_reordering(sk, state->reord, 0);
1841 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1842 1806
1843 tcp_verify_left_out(tp); 1807 tcp_verify_left_out(tp);
1844out: 1808out:
@@ -1876,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
1876static void tcp_check_reno_reordering(struct sock *sk, const int addend) 1840static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1877{ 1841{
1878 struct tcp_sock *tp = tcp_sk(sk); 1842 struct tcp_sock *tp = tcp_sk(sk);
1879 if (tcp_limit_reno_sacked(tp)) 1843
1880 tcp_update_reordering(sk, tp->packets_out + addend, 0); 1844 if (!tcp_limit_reno_sacked(tp))
1845 return;
1846
1847 tp->reordering = min_t(u32, tp->packets_out + addend,
1848 sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
1849 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
1881} 1850}
1882 1851
1883/* Emulate SACKs for SACKless connection: account for a new dupack. */ 1852/* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -1923,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp)
1923 tp->lost_out = 0; 1892 tp->lost_out = 0;
1924 tp->undo_marker = 0; 1893 tp->undo_marker = 0;
1925 tp->undo_retrans = -1; 1894 tp->undo_retrans = -1;
1926 tp->fackets_out = 0;
1927 tp->sacked_out = 0; 1895 tp->sacked_out = 0;
1928} 1896}
1929 1897
@@ -1968,19 +1936,15 @@ void tcp_enter_loss(struct sock *sk)
1968 if (tcp_is_reno(tp)) 1936 if (tcp_is_reno(tp))
1969 tcp_reset_reno_sack(tp); 1937 tcp_reset_reno_sack(tp);
1970 1938
1971 skb = tcp_write_queue_head(sk); 1939 skb = tcp_rtx_queue_head(sk);
1972 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); 1940 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1973 if (is_reneg) { 1941 if (is_reneg) {
1974 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); 1942 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1975 tp->sacked_out = 0; 1943 tp->sacked_out = 0;
1976 tp->fackets_out = 0;
1977 } 1944 }
1978 tcp_clear_all_retrans_hints(tp); 1945 tcp_clear_all_retrans_hints(tp);
1979 1946
1980 tcp_for_write_queue(skb, sk) { 1947 skb_rbtree_walk_from(skb) {
1981 if (skb == tcp_send_head(sk))
1982 break;
1983
1984 mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || 1948 mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
1985 is_reneg); 1949 is_reneg);
1986 if (mark_lost) 1950 if (mark_lost)
@@ -2014,7 +1978,7 @@ void tcp_enter_loss(struct sock *sk)
2014 * falsely raise the receive window, which results in repeated 1978 * falsely raise the receive window, which results in repeated
2015 * timeouts and stop-and-go behavior. 1979 * timeouts and stop-and-go behavior.
2016 */ 1980 */
2017 tp->frto = sysctl_tcp_frto && 1981 tp->frto = net->ipv4.sysctl_tcp_frto &&
2018 (new_recovery || icsk->icsk_retransmits) && 1982 (new_recovery || icsk->icsk_retransmits) &&
2019 !inet_csk(sk)->icsk_mtup.probe_size; 1983 !inet_csk(sk)->icsk_mtup.probe_size;
2020} 1984}
@@ -2043,19 +2007,10 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
2043 return false; 2007 return false;
2044} 2008}
2045 2009
2046static inline int tcp_fackets_out(const struct tcp_sock *tp)
2047{
2048 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2049}
2050
2051/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs 2010/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
2052 * counter when SACK is enabled (without SACK, sacked_out is used for 2011 * counter when SACK is enabled (without SACK, sacked_out is used for
2053 * that purpose). 2012 * that purpose).
2054 * 2013 *
2055 * Instead, with FACK TCP uses fackets_out that includes both SACKed
2056 * segments up to the highest received SACK block so far and holes in
2057 * between them.
2058 *
2059 * With reordering, holes may still be in flight, so RFC3517 recovery 2014 * With reordering, holes may still be in flight, so RFC3517 recovery
2060 * uses pure sacked_out (total number of SACKed segments) even though 2015 * uses pure sacked_out (total number of SACKed segments) even though
2061 * it violates the RFC that uses duplicate ACKs, often these are equal 2016 * it violates the RFC that uses duplicate ACKs, often these are equal
@@ -2065,10 +2020,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp)
2065 */ 2020 */
2066static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) 2021static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2067{ 2022{
2068 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2023 return tp->sacked_out + 1;
2069} 2024}
2070 2025
2071/* Linux NewReno/SACK/FACK/ECN state machine. 2026/* Linux NewReno/SACK/ECN state machine.
2072 * -------------------------------------- 2027 * --------------------------------------
2073 * 2028 *
2074 * "Open" Normal state, no dubious events, fast path. 2029 * "Open" Normal state, no dubious events, fast path.
@@ -2133,16 +2088,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2133 * dynamically measured and adjusted. This is implemented in 2088 * dynamically measured and adjusted. This is implemented in
2134 * tcp_rack_mark_lost. 2089 * tcp_rack_mark_lost.
2135 * 2090 *
2136 * FACK (Disabled by default. Subsumbed by RACK):
2137 * It is the simplest heuristics. As soon as we decided
2138 * that something is lost, we decide that _all_ not SACKed
2139 * packets until the most forward SACK are lost. I.e.
2140 * lost_out = fackets_out - sacked_out and left_out = fackets_out.
2141 * It is absolutely correct estimate, if network does not reorder
2142 * packets. And it loses any connection to reality when reordering
2143 * takes place. We use FACK by default until reordering
2144 * is suspected on the path to this destination.
2145 *
2146 * If the receiver does not support SACK: 2091 * If the receiver does not support SACK:
2147 * 2092 *
2148 * NewReno (RFC6582): in Recovery we assume that one segment 2093 * NewReno (RFC6582): in Recovery we assume that one segment
@@ -2191,7 +2136,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2191} 2136}
2192 2137
2193/* Detect loss in event "A" above by marking head of queue up as lost. 2138/* Detect loss in event "A" above by marking head of queue up as lost.
2194 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments 2139 * For non-SACK(Reno) senders, the first "packets" number of segments
2195 * are considered lost. For RFC3517 SACK, a segment is considered lost if it 2140 * are considered lost. For RFC3517 SACK, a segment is considered lost if it
2196 * has at least tp->reordering SACKed seqments above it; "packets" refers to 2141 * has at least tp->reordering SACKed seqments above it; "packets" refers to
2197 * the maximum SACKed segments to pass before reaching this limit. 2142 * the maximum SACKed segments to pass before reaching this limit.
@@ -2206,20 +2151,18 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2206 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; 2151 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2207 2152
2208 WARN_ON(packets > tp->packets_out); 2153 WARN_ON(packets > tp->packets_out);
2209 if (tp->lost_skb_hint) { 2154 skb = tp->lost_skb_hint;
2210 skb = tp->lost_skb_hint; 2155 if (skb) {
2211 cnt = tp->lost_cnt_hint;
2212 /* Head already handled? */ 2156 /* Head already handled? */
2213 if (mark_head && skb != tcp_write_queue_head(sk)) 2157 if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
2214 return; 2158 return;
2159 cnt = tp->lost_cnt_hint;
2215 } else { 2160 } else {
2216 skb = tcp_write_queue_head(sk); 2161 skb = tcp_rtx_queue_head(sk);
2217 cnt = 0; 2162 cnt = 0;
2218 } 2163 }
2219 2164
2220 tcp_for_write_queue_from(skb, sk) { 2165 skb_rbtree_walk_from(skb) {
2221 if (skb == tcp_send_head(sk))
2222 break;
2223 /* TODO: do this better */ 2166 /* TODO: do this better */
2224 /* this is not the most efficient way to do this... */ 2167 /* this is not the most efficient way to do this... */
2225 tp->lost_skb_hint = skb; 2168 tp->lost_skb_hint = skb;
@@ -2229,12 +2172,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2229 break; 2172 break;
2230 2173
2231 oldcnt = cnt; 2174 oldcnt = cnt;
2232 if (tcp_is_fack(tp) || tcp_is_reno(tp) || 2175 if (tcp_is_reno(tp) ||
2233 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 2176 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2234 cnt += tcp_skb_pcount(skb); 2177 cnt += tcp_skb_pcount(skb);
2235 2178
2236 if (cnt > packets) { 2179 if (cnt > packets) {
2237 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || 2180 if (tcp_is_sack(tp) ||
2238 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || 2181 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2239 (oldcnt >= packets)) 2182 (oldcnt >= packets))
2240 break; 2183 break;
@@ -2243,7 +2186,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2243 /* If needed, chop off the prefix to mark as lost. */ 2186 /* If needed, chop off the prefix to mark as lost. */
2244 lost = (packets - oldcnt) * mss; 2187 lost = (packets - oldcnt) * mss;
2245 if (lost < skb->len && 2188 if (lost < skb->len &&
2246 tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) 2189 tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2190 lost, mss, GFP_ATOMIC) < 0)
2247 break; 2191 break;
2248 cnt = packets; 2192 cnt = packets;
2249 } 2193 }
@@ -2264,11 +2208,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2264 2208
2265 if (tcp_is_reno(tp)) { 2209 if (tcp_is_reno(tp)) {
2266 tcp_mark_head_lost(sk, 1, 1); 2210 tcp_mark_head_lost(sk, 1, 1);
2267 } else if (tcp_is_fack(tp)) {
2268 int lost = tp->fackets_out - tp->reordering;
2269 if (lost <= 0)
2270 lost = 1;
2271 tcp_mark_head_lost(sk, lost, 0);
2272 } else { 2211 } else {
2273 int sacked_upto = tp->sacked_out - tp->reordering; 2212 int sacked_upto = tp->sacked_out - tp->reordering;
2274 if (sacked_upto >= 0) 2213 if (sacked_upto >= 0)
@@ -2327,16 +2266,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
2327 if (tp->retrans_out) 2266 if (tp->retrans_out)
2328 return true; 2267 return true;
2329 2268
2330 skb = tcp_write_queue_head(sk); 2269 skb = tcp_rtx_queue_head(sk);
2331 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) 2270 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2332 return true; 2271 return true;
2333 2272
2334 return false; 2273 return false;
2335} 2274}
2336 2275
2337#if FASTRETRANS_DEBUG > 1
2338static void DBGUNDO(struct sock *sk, const char *msg) 2276static void DBGUNDO(struct sock *sk, const char *msg)
2339{ 2277{
2278#if FASTRETRANS_DEBUG > 1
2340 struct tcp_sock *tp = tcp_sk(sk); 2279 struct tcp_sock *tp = tcp_sk(sk);
2341 struct inet_sock *inet = inet_sk(sk); 2280 struct inet_sock *inet = inet_sk(sk);
2342 2281
@@ -2358,10 +2297,8 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2358 tp->packets_out); 2297 tp->packets_out);
2359 } 2298 }
2360#endif 2299#endif
2361}
2362#else
2363#define DBGUNDO(x...) do { } while (0)
2364#endif 2300#endif
2301}
2365 2302
2366static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) 2303static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2367{ 2304{
@@ -2370,9 +2307,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2370 if (unmark_loss) { 2307 if (unmark_loss) {
2371 struct sk_buff *skb; 2308 struct sk_buff *skb;
2372 2309
2373 tcp_for_write_queue(skb, sk) { 2310 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2374 if (skb == tcp_send_head(sk))
2375 break;
2376 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 2311 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2377 } 2312 }
2378 tp->lost_out = 0; 2313 tp->lost_out = 0;
@@ -2417,6 +2352,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2417 mib_idx = LINUX_MIB_TCPFULLUNDO; 2352 mib_idx = LINUX_MIB_TCPFULLUNDO;
2418 2353
2419 NET_INC_STATS(sock_net(sk), mib_idx); 2354 NET_INC_STATS(sock_net(sk), mib_idx);
2355 } else if (tp->rack.reo_wnd_persist) {
2356 tp->rack.reo_wnd_persist--;
2420 } 2357 }
2421 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { 2358 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2422 /* Hold old state until something *above* high_seq 2359 /* Hold old state until something *above* high_seq
@@ -2436,6 +2373,8 @@ static bool tcp_try_undo_dsack(struct sock *sk)
2436 struct tcp_sock *tp = tcp_sk(sk); 2373 struct tcp_sock *tp = tcp_sk(sk);
2437 2374
2438 if (tp->undo_marker && !tp->undo_retrans) { 2375 if (tp->undo_marker && !tp->undo_retrans) {
2376 tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
2377 tp->rack.reo_wnd_persist + 1);
2439 DBGUNDO(sk, "D-SACK"); 2378 DBGUNDO(sk, "D-SACK");
2440 tcp_undo_cwnd_reduction(sk, false); 2379 tcp_undo_cwnd_reduction(sk, false);
2441 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); 2380 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@ -2616,9 +2555,7 @@ void tcp_simple_retransmit(struct sock *sk)
2616 struct sk_buff *skb; 2555 struct sk_buff *skb;
2617 unsigned int mss = tcp_current_mss(sk); 2556 unsigned int mss = tcp_current_mss(sk);
2618 2557
2619 tcp_for_write_queue(skb, sk) { 2558 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2620 if (skb == tcp_send_head(sk))
2621 break;
2622 if (tcp_skb_seglen(skb) > mss && 2559 if (tcp_skb_seglen(skb) > mss &&
2623 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { 2560 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2624 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 2561 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@ -2712,7 +2649,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2712 * is updated in tcp_ack()). Otherwise fall back to 2649 * is updated in tcp_ack()). Otherwise fall back to
2713 * the conventional recovery. 2650 * the conventional recovery.
2714 */ 2651 */
2715 if (tcp_send_head(sk) && 2652 if (!tcp_write_queue_empty(sk) &&
2716 after(tcp_wnd_end(tp), tp->snd_nxt)) { 2653 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2717 *rexmit = REXMIT_NEW; 2654 *rexmit = REXMIT_NEW;
2718 return; 2655 return;
@@ -2739,15 +2676,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2739} 2676}
2740 2677
2741/* Undo during fast recovery after partial ACK. */ 2678/* Undo during fast recovery after partial ACK. */
2742static bool tcp_try_undo_partial(struct sock *sk, const int acked) 2679static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
2743{ 2680{
2744 struct tcp_sock *tp = tcp_sk(sk); 2681 struct tcp_sock *tp = tcp_sk(sk);
2745 2682
2746 if (tp->undo_marker && tcp_packet_delayed(tp)) { 2683 if (tp->undo_marker && tcp_packet_delayed(tp)) {
2747 /* Plain luck! Hole if filled with delayed 2684 /* Plain luck! Hole if filled with delayed
2748 * packet, rather than with a retransmit. 2685 * packet, rather than with a retransmit. Check reordering.
2749 */ 2686 */
2750 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); 2687 tcp_check_sack_reordering(sk, prior_snd_una, 1);
2751 2688
2752 /* We are getting evidence that the reordering degree is higher 2689 /* We are getting evidence that the reordering degree is higher
2753 * than we realized. If there are no retransmits out then we 2690 * than we realized. If there are no retransmits out then we
@@ -2774,7 +2711,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
2774 struct tcp_sock *tp = tcp_sk(sk); 2711 struct tcp_sock *tp = tcp_sk(sk);
2775 2712
2776 /* Use RACK to detect loss */ 2713 /* Use RACK to detect loss */
2777 if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { 2714 if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
2778 u32 prior_retrans = tp->retrans_out; 2715 u32 prior_retrans = tp->retrans_out;
2779 2716
2780 tcp_rack_mark_lost(sk); 2717 tcp_rack_mark_lost(sk);
@@ -2783,6 +2720,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
2783 } 2720 }
2784} 2721}
2785 2722
2723static bool tcp_force_fast_retransmit(struct sock *sk)
2724{
2725 struct tcp_sock *tp = tcp_sk(sk);
2726
2727 return after(tcp_highest_sack_seq(tp),
2728 tp->snd_una + tp->reordering * tp->mss_cache);
2729}
2730
2786/* Process an event, which can update packets-in-flight not trivially. 2731/* Process an event, which can update packets-in-flight not trivially.
2787 * Main goal of this function is to calculate new estimate for left_out, 2732 * Main goal of this function is to calculate new estimate for left_out,
2788 * taking into account both packets sitting in receiver's buffer and 2733 * taking into account both packets sitting in receiver's buffer and
@@ -2795,19 +2740,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
2795 * It does _not_ decide what to send, it is made in function 2740 * It does _not_ decide what to send, it is made in function
2796 * tcp_xmit_retransmit_queue(). 2741 * tcp_xmit_retransmit_queue().
2797 */ 2742 */
2798static void tcp_fastretrans_alert(struct sock *sk, const int acked, 2743static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2799 bool is_dupack, int *ack_flag, int *rexmit) 2744 bool is_dupack, int *ack_flag, int *rexmit)
2800{ 2745{
2801 struct inet_connection_sock *icsk = inet_csk(sk); 2746 struct inet_connection_sock *icsk = inet_csk(sk);
2802 struct tcp_sock *tp = tcp_sk(sk); 2747 struct tcp_sock *tp = tcp_sk(sk);
2803 int fast_rexmit = 0, flag = *ack_flag; 2748 int fast_rexmit = 0, flag = *ack_flag;
2804 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 2749 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2805 (tcp_fackets_out(tp) > tp->reordering)); 2750 tcp_force_fast_retransmit(sk));
2806 2751
2807 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 2752 if (!tp->packets_out && tp->sacked_out)
2808 tp->sacked_out = 0; 2753 tp->sacked_out = 0;
2809 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2810 tp->fackets_out = 0;
2811 2754
2812 /* Now state machine starts. 2755 /* Now state machine starts.
2813 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ 2756 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -2854,11 +2797,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2854 if (tcp_is_reno(tp) && is_dupack) 2797 if (tcp_is_reno(tp) && is_dupack)
2855 tcp_add_reno_sack(sk); 2798 tcp_add_reno_sack(sk);
2856 } else { 2799 } else {
2857 if (tcp_try_undo_partial(sk, acked)) 2800 if (tcp_try_undo_partial(sk, prior_snd_una))
2858 return; 2801 return;
2859 /* Partial ACK arrived. Force fast retransmit. */ 2802 /* Partial ACK arrived. Force fast retransmit. */
2860 do_lost = tcp_is_reno(tp) || 2803 do_lost = tcp_is_reno(tp) ||
2861 tcp_fackets_out(tp) > tp->reordering; 2804 tcp_force_fast_retransmit(sk);
2862 } 2805 }
2863 if (tcp_try_undo_dsack(sk)) { 2806 if (tcp_try_undo_dsack(sk)) {
2864 tcp_try_keep_open(sk); 2807 tcp_try_keep_open(sk);
@@ -2873,6 +2816,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2873 (*ack_flag & FLAG_LOST_RETRANS))) 2816 (*ack_flag & FLAG_LOST_RETRANS)))
2874 return; 2817 return;
2875 /* Change state if cwnd is undone or retransmits are lost */ 2818 /* Change state if cwnd is undone or retransmits are lost */
2819 /* fall through */
2876 default: 2820 default:
2877 if (tcp_is_reno(tp)) { 2821 if (tcp_is_reno(tp)) {
2878 if (flag & FLAG_SND_UNA_ADVANCED) 2822 if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2913,8 +2857,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2913 2857
2914static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) 2858static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
2915{ 2859{
2860 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2916 struct tcp_sock *tp = tcp_sk(sk); 2861 struct tcp_sock *tp = tcp_sk(sk);
2917 u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
2918 2862
2919 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, 2863 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
2920 rtt_us ? : jiffies_to_usecs(1)); 2864 rtt_us ? : jiffies_to_usecs(1));
@@ -3020,7 +2964,7 @@ void tcp_rearm_rto(struct sock *sk)
3020/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ 2964/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
3021static void tcp_set_xmit_timer(struct sock *sk) 2965static void tcp_set_xmit_timer(struct sock *sk)
3022{ 2966{
3023 if (!tcp_schedule_loss_probe(sk)) 2967 if (!tcp_schedule_loss_probe(sk, true))
3024 tcp_rearm_rto(sk); 2968 tcp_rearm_rto(sk);
3025} 2969}
3026 2970
@@ -3056,28 +3000,31 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3056 3000
3057 shinfo = skb_shinfo(skb); 3001 shinfo = skb_shinfo(skb);
3058 if (!before(shinfo->tskey, prior_snd_una) && 3002 if (!before(shinfo->tskey, prior_snd_una) &&
3059 before(shinfo->tskey, tcp_sk(sk)->snd_una)) 3003 before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
3060 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); 3004 tcp_skb_tsorted_save(skb) {
3005 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3006 } tcp_skb_tsorted_restore(skb);
3007 }
3061} 3008}
3062 3009
3063/* Remove acknowledged frames from the retransmission queue. If our packet 3010/* Remove acknowledged frames from the retransmission queue. If our packet
3064 * is before the ack sequence we can discard it as it's confirmed to have 3011 * is before the ack sequence we can discard it as it's confirmed to have
3065 * arrived at the other end. 3012 * arrived at the other end.
3066 */ 3013 */
3067static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 3014static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3068 u32 prior_snd_una, int *acked, 3015 u32 prior_snd_una,
3069 struct tcp_sacktag_state *sack) 3016 struct tcp_sacktag_state *sack)
3070{ 3017{
3071 const struct inet_connection_sock *icsk = inet_csk(sk); 3018 const struct inet_connection_sock *icsk = inet_csk(sk);
3072 u64 first_ackt, last_ackt; 3019 u64 first_ackt, last_ackt;
3073 struct tcp_sock *tp = tcp_sk(sk); 3020 struct tcp_sock *tp = tcp_sk(sk);
3074 u32 prior_sacked = tp->sacked_out; 3021 u32 prior_sacked = tp->sacked_out;
3075 u32 reord = tp->packets_out; 3022 u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
3023 struct sk_buff *skb, *next;
3076 bool fully_acked = true; 3024 bool fully_acked = true;
3077 long sack_rtt_us = -1L; 3025 long sack_rtt_us = -1L;
3078 long seq_rtt_us = -1L; 3026 long seq_rtt_us = -1L;
3079 long ca_rtt_us = -1L; 3027 long ca_rtt_us = -1L;
3080 struct sk_buff *skb;
3081 u32 pkts_acked = 0; 3028 u32 pkts_acked = 0;
3082 u32 last_in_flight = 0; 3029 u32 last_in_flight = 0;
3083 bool rtt_update; 3030 bool rtt_update;
@@ -3085,8 +3032,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3085 3032
3086 first_ackt = 0; 3033 first_ackt = 0;
3087 3034
3088 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 3035 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3089 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3036 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3037 const u32 start_seq = scb->seq;
3090 u8 sacked = scb->sacked; 3038 u8 sacked = scb->sacked;
3091 u32 acked_pcount; 3039 u32 acked_pcount;
3092 3040
@@ -3103,8 +3051,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3103 break; 3051 break;
3104 fully_acked = false; 3052 fully_acked = false;
3105 } else { 3053 } else {
3106 /* Speedup tcp_unlink_write_queue() and next loop */
3107 prefetchw(skb->next);
3108 acked_pcount = tcp_skb_pcount(skb); 3054 acked_pcount = tcp_skb_pcount(skb);
3109 } 3055 }
3110 3056
@@ -3119,7 +3065,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3119 first_ackt = last_ackt; 3065 first_ackt = last_ackt;
3120 3066
3121 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; 3067 last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
3122 reord = min(pkts_acked, reord); 3068 if (before(start_seq, reord))
3069 reord = start_seq;
3123 if (!after(scb->end_seq, tp->high_seq)) 3070 if (!after(scb->end_seq, tp->high_seq))
3124 flag |= FLAG_ORIG_SACK_ACKED; 3071 flag |= FLAG_ORIG_SACK_ACKED;
3125 } 3072 }
@@ -3156,12 +3103,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3156 if (!fully_acked) 3103 if (!fully_acked)
3157 break; 3104 break;
3158 3105
3159 tcp_unlink_write_queue(skb, sk); 3106 next = skb_rb_next(skb);
3160 sk_wmem_free_skb(sk, skb);
3161 if (unlikely(skb == tp->retransmit_skb_hint)) 3107 if (unlikely(skb == tp->retransmit_skb_hint))
3162 tp->retransmit_skb_hint = NULL; 3108 tp->retransmit_skb_hint = NULL;
3163 if (unlikely(skb == tp->lost_skb_hint)) 3109 if (unlikely(skb == tp->lost_skb_hint))
3164 tp->lost_skb_hint = NULL; 3110 tp->lost_skb_hint = NULL;
3111 tcp_rtx_queue_unlink_and_free(skb, sk);
3165 } 3112 }
3166 3113
3167 if (!skb) 3114 if (!skb)
@@ -3197,16 +3144,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3197 int delta; 3144 int delta;
3198 3145
3199 /* Non-retransmitted hole got filled? That's reordering */ 3146 /* Non-retransmitted hole got filled? That's reordering */
3200 if (reord < prior_fackets && reord <= tp->fackets_out) 3147 if (before(reord, prior_fack))
3201 tcp_update_reordering(sk, tp->fackets_out - reord, 0); 3148 tcp_check_sack_reordering(sk, reord, 0);
3202 3149
3203 delta = tcp_is_fack(tp) ? pkts_acked : 3150 delta = prior_sacked - tp->sacked_out;
3204 prior_sacked - tp->sacked_out;
3205 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); 3151 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3206 } 3152 }
3207
3208 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3209
3210 } else if (skb && rtt_update && sack_rtt_us >= 0 && 3153 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3211 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { 3154 sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
3212 /* Do not re-arm RTO if the sack RTT is measured from data sent 3155 /* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3247,18 +3190,19 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3247 } 3190 }
3248 } 3191 }
3249#endif 3192#endif
3250 *acked = pkts_acked;
3251 return flag; 3193 return flag;
3252} 3194}
3253 3195
3254static void tcp_ack_probe(struct sock *sk) 3196static void tcp_ack_probe(struct sock *sk)
3255{ 3197{
3256 const struct tcp_sock *tp = tcp_sk(sk);
3257 struct inet_connection_sock *icsk = inet_csk(sk); 3198 struct inet_connection_sock *icsk = inet_csk(sk);
3199 struct sk_buff *head = tcp_send_head(sk);
3200 const struct tcp_sock *tp = tcp_sk(sk);
3258 3201
3259 /* Was it a usable window open? */ 3202 /* Was it a usable window open? */
3260 3203 if (!head)
3261 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) { 3204 return;
3205 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3262 icsk->icsk_backoff = 0; 3206 icsk->icsk_backoff = 0;
3263 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); 3207 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3264 /* Socket must be waked up by subsequent tcp_data_snd_check(). 3208 /* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -3378,7 +3322,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3378 tp->pred_flags = 0; 3322 tp->pred_flags = 0;
3379 tcp_fast_path_check(sk); 3323 tcp_fast_path_check(sk);
3380 3324
3381 if (tcp_send_head(sk)) 3325 if (!tcp_write_queue_empty(sk))
3382 tcp_slow_start_after_idle_check(sk); 3326 tcp_slow_start_after_idle_check(sk);
3383 3327
3384 if (nwin > tp->max_window) { 3328 if (nwin > tp->max_window) {
@@ -3399,7 +3343,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
3399 if (*last_oow_ack_time) { 3343 if (*last_oow_ack_time) {
3400 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); 3344 s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
3401 3345
3402 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { 3346 if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
3403 NET_INC_STATS(net, mib_idx); 3347 NET_INC_STATS(net, mib_idx);
3404 return true; /* rate-limited: don't send yet! */ 3348 return true; /* rate-limited: don't send yet! */
3405 } 3349 }
@@ -3435,10 +3379,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3435 static u32 challenge_timestamp; 3379 static u32 challenge_timestamp;
3436 static unsigned int challenge_count; 3380 static unsigned int challenge_count;
3437 struct tcp_sock *tp = tcp_sk(sk); 3381 struct tcp_sock *tp = tcp_sk(sk);
3382 struct net *net = sock_net(sk);
3438 u32 count, now; 3383 u32 count, now;
3439 3384
3440 /* First check our per-socket dupack rate limit. */ 3385 /* First check our per-socket dupack rate limit. */
3441 if (__tcp_oow_rate_limited(sock_net(sk), 3386 if (__tcp_oow_rate_limited(net,
3442 LINUX_MIB_TCPACKSKIPPEDCHALLENGE, 3387 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
3443 &tp->last_oow_ack_time)) 3388 &tp->last_oow_ack_time))
3444 return; 3389 return;
@@ -3446,16 +3391,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3446 /* Then check host-wide RFC 5961 rate limit. */ 3391 /* Then check host-wide RFC 5961 rate limit. */
3447 now = jiffies / HZ; 3392 now = jiffies / HZ;
3448 if (now != challenge_timestamp) { 3393 if (now != challenge_timestamp) {
3449 u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; 3394 u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
3395 u32 half = (ack_limit + 1) >> 1;
3450 3396
3451 challenge_timestamp = now; 3397 challenge_timestamp = now;
3452 WRITE_ONCE(challenge_count, half + 3398 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
3453 prandom_u32_max(sysctl_tcp_challenge_ack_limit));
3454 } 3399 }
3455 count = READ_ONCE(challenge_count); 3400 count = READ_ONCE(challenge_count);
3456 if (count > 0) { 3401 if (count > 0) {
3457 WRITE_ONCE(challenge_count, count - 1); 3402 WRITE_ONCE(challenge_count, count - 1);
3458 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); 3403 NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
3459 tcp_send_ack(sk); 3404 tcp_send_ack(sk);
3460 } 3405 }
3461} 3406}
@@ -3553,18 +3498,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3553 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3498 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3554 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3499 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3555 bool is_dupack = false; 3500 bool is_dupack = false;
3556 u32 prior_fackets;
3557 int prior_packets = tp->packets_out; 3501 int prior_packets = tp->packets_out;
3558 u32 delivered = tp->delivered; 3502 u32 delivered = tp->delivered;
3559 u32 lost = tp->lost; 3503 u32 lost = tp->lost;
3560 int acked = 0; /* Number of packets newly acked */
3561 int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ 3504 int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
3505 u32 prior_fack;
3562 3506
3563 sack_state.first_sackt = 0; 3507 sack_state.first_sackt = 0;
3564 sack_state.rate = &rs; 3508 sack_state.rate = &rs;
3565 3509
3566 /* We very likely will need to access write queue head. */ 3510 /* We very likely will need to access rtx queue. */
3567 prefetchw(sk->sk_write_queue.next); 3511 prefetch(sk->tcp_rtx_queue.rb_node);
3568 3512
3569 /* If the ack is older than previous acks 3513 /* If the ack is older than previous acks
3570 * then we can probably ignore it. 3514 * then we can probably ignore it.
@@ -3590,7 +3534,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3590 icsk->icsk_retransmits = 0; 3534 icsk->icsk_retransmits = 0;
3591 } 3535 }
3592 3536
3593 prior_fackets = tp->fackets_out; 3537 prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
3594 rs.prior_in_flight = tcp_packets_in_flight(tp); 3538 rs.prior_in_flight = tcp_packets_in_flight(tp);
3595 3539
3596 /* ts_recent update must be made after we are sure that the packet 3540 /* ts_recent update must be made after we are sure that the packet
@@ -3646,8 +3590,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3646 goto no_queue; 3590 goto no_queue;
3647 3591
3648 /* See if we can take anything off of the retransmit queue. */ 3592 /* See if we can take anything off of the retransmit queue. */
3649 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, 3593 flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
3650 &sack_state); 3594
3595 tcp_rack_update_reo_wnd(sk, &rs);
3651 3596
3652 if (tp->tlp_high_seq) 3597 if (tp->tlp_high_seq)
3653 tcp_process_tlp_ack(sk, ack, flag); 3598 tcp_process_tlp_ack(sk, ack, flag);
@@ -3657,7 +3602,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3657 3602
3658 if (tcp_ack_is_dubious(sk, flag)) { 3603 if (tcp_ack_is_dubious(sk, flag)) {
3659 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3604 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3660 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); 3605 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3606 &rexmit);
3661 } 3607 }
3662 3608
3663 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) 3609 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
@@ -3673,13 +3619,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3673no_queue: 3619no_queue:
3674 /* If data was DSACKed, see if we can undo a cwnd reduction. */ 3620 /* If data was DSACKed, see if we can undo a cwnd reduction. */
3675 if (flag & FLAG_DSACKING_ACK) 3621 if (flag & FLAG_DSACKING_ACK)
3676 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); 3622 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3623 &rexmit);
3677 /* If this ack opens up a zero window, clear backoff. It was 3624 /* If this ack opens up a zero window, clear backoff. It was
3678 * being used to time the probes, and is probably far higher than 3625 * being used to time the probes, and is probably far higher than
3679 * it needs to be for normal retransmission. 3626 * it needs to be for normal retransmission.
3680 */ 3627 */
3681 if (tcp_send_head(sk)) 3628 tcp_ack_probe(sk);
3682 tcp_ack_probe(sk);
3683 3629
3684 if (tp->tlp_high_seq) 3630 if (tp->tlp_high_seq)
3685 tcp_process_tlp_ack(sk, ack, flag); 3631 tcp_process_tlp_ack(sk, ack, flag);
@@ -3696,7 +3642,8 @@ old_ack:
3696 if (TCP_SKB_CB(skb)->sacked) { 3642 if (TCP_SKB_CB(skb)->sacked) {
3697 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3643 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3698 &sack_state); 3644 &sack_state);
3699 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); 3645 tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
3646 &rexmit);
3700 tcp_xmit_recovery(sk, rexmit); 3647 tcp_xmit_recovery(sk, rexmit);
3701 } 3648 }
3702 3649
@@ -3721,6 +3668,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3721 foc->exp = exp_opt; 3668 foc->exp = exp_opt;
3722} 3669}
3723 3670
3671static void smc_parse_options(const struct tcphdr *th,
3672 struct tcp_options_received *opt_rx,
3673 const unsigned char *ptr,
3674 int opsize)
3675{
3676#if IS_ENABLED(CONFIG_SMC)
3677 if (static_branch_unlikely(&tcp_have_smc)) {
3678 if (th->syn && !(opsize & 1) &&
3679 opsize >= TCPOLEN_EXP_SMC_BASE &&
3680 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
3681 opt_rx->smc_ok = 1;
3682 }
3683#endif
3684}
3685
3724/* Look for tcp options. Normally only called on SYN and SYNACK packets. 3686/* Look for tcp options. Normally only called on SYN and SYNACK packets.
3725 * But, this can also be called on packets in the established flow when 3687 * But, this can also be called on packets in the established flow when
3726 * the fast version below fails. 3688 * the fast version below fails.
@@ -3828,6 +3790,9 @@ void tcp_parse_options(const struct net *net,
3828 tcp_parse_fastopen_option(opsize - 3790 tcp_parse_fastopen_option(opsize -
3829 TCPOLEN_EXP_FASTOPEN_BASE, 3791 TCPOLEN_EXP_FASTOPEN_BASE,
3830 ptr + 2, th->syn, foc, true); 3792 ptr + 2, th->syn, foc, true);
3793 else
3794 smc_parse_options(th, opt_rx, ptr,
3795 opsize);
3831 break; 3796 break;
3832 3797
3833 } 3798 }
@@ -3995,6 +3960,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
3995/* When we get a reset we do this. */ 3960/* When we get a reset we do this. */
3996void tcp_reset(struct sock *sk) 3961void tcp_reset(struct sock *sk)
3997{ 3962{
3963 trace_tcp_receive_reset(sk);
3964
3998 /* We want the right error as BSD sees it (and indeed as we do). */ 3965 /* We want the right error as BSD sees it (and indeed as we do). */
3999 switch (sk->sk_state) { 3966 switch (sk->sk_state) {
4000 case TCP_SYN_SENT: 3967 case TCP_SYN_SENT:
@@ -4117,7 +4084,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4117{ 4084{
4118 struct tcp_sock *tp = tcp_sk(sk); 4085 struct tcp_sock *tp = tcp_sk(sk);
4119 4086
4120 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4087 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4121 int mib_idx; 4088 int mib_idx;
4122 4089
4123 if (before(seq, tp->rcv_nxt)) 4090 if (before(seq, tp->rcv_nxt))
@@ -4152,7 +4119,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4152 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4119 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4153 tcp_enter_quickack_mode(sk); 4120 tcp_enter_quickack_mode(sk);
4154 4121
4155 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4122 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4156 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4123 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4157 4124
4158 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 4125 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4268,11 +4235,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4268 tp->rx_opt.num_sacks = num_sacks; 4235 tp->rx_opt.num_sacks = num_sacks;
4269} 4236}
4270 4237
4271enum tcp_queue {
4272 OOO_QUEUE,
4273 RCV_QUEUE,
4274};
4275
4276/** 4238/**
4277 * tcp_try_coalesce - try to merge skb to prior one 4239 * tcp_try_coalesce - try to merge skb to prior one
4278 * @sk: socket 4240 * @sk: socket
@@ -4288,7 +4250,6 @@ enum tcp_queue {
4288 * Returns true if caller should free @from instead of queueing it 4250 * Returns true if caller should free @from instead of queueing it
4289 */ 4251 */
4290static bool tcp_try_coalesce(struct sock *sk, 4252static bool tcp_try_coalesce(struct sock *sk,
4291 enum tcp_queue dest,
4292 struct sk_buff *to, 4253 struct sk_buff *to,
4293 struct sk_buff *from, 4254 struct sk_buff *from,
4294 bool *fragstolen) 4255 bool *fragstolen)
@@ -4313,10 +4274,7 @@ static bool tcp_try_coalesce(struct sock *sk,
4313 4274
4314 if (TCP_SKB_CB(from)->has_rxtstamp) { 4275 if (TCP_SKB_CB(from)->has_rxtstamp) {
4315 TCP_SKB_CB(to)->has_rxtstamp = true; 4276 TCP_SKB_CB(to)->has_rxtstamp = true;
4316 if (dest == OOO_QUEUE) 4277 to->tstamp = from->tstamp;
4317 TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
4318 else
4319 to->tstamp = from->tstamp;
4320 } 4278 }
4321 4279
4322 return true; 4280 return true;
@@ -4341,7 +4299,7 @@ static void tcp_ofo_queue(struct sock *sk)
4341 4299
4342 p = rb_first(&tp->out_of_order_queue); 4300 p = rb_first(&tp->out_of_order_queue);
4343 while (p) { 4301 while (p) {
4344 skb = rb_entry(p, struct sk_buff, rbnode); 4302 skb = rb_to_skb(p);
4345 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) 4303 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
4346 break; 4304 break;
4347 4305
@@ -4353,9 +4311,6 @@ static void tcp_ofo_queue(struct sock *sk)
4353 } 4311 }
4354 p = rb_next(p); 4312 p = rb_next(p);
4355 rb_erase(&skb->rbnode, &tp->out_of_order_queue); 4313 rb_erase(&skb->rbnode, &tp->out_of_order_queue);
4356 /* Replace tstamp which was stomped by rbnode */
4357 if (TCP_SKB_CB(skb)->has_rxtstamp)
4358 skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
4359 4314
4360 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { 4315 if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
4361 SOCK_DEBUG(sk, "ofo packet was already received\n"); 4316 SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4367,8 +4322,7 @@ static void tcp_ofo_queue(struct sock *sk)
4367 TCP_SKB_CB(skb)->end_seq); 4322 TCP_SKB_CB(skb)->end_seq);
4368 4323
4369 tail = skb_peek_tail(&sk->sk_receive_queue); 4324 tail = skb_peek_tail(&sk->sk_receive_queue);
4370 eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, 4325 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4371 tail, skb, &fragstolen);
4372 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); 4326 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4373 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 4327 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
4374 if (!eaten) 4328 if (!eaten)
@@ -4409,7 +4363,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4409static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) 4363static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4410{ 4364{
4411 struct tcp_sock *tp = tcp_sk(sk); 4365 struct tcp_sock *tp = tcp_sk(sk);
4412 struct rb_node **p, *q, *parent; 4366 struct rb_node **p, *parent;
4413 struct sk_buff *skb1; 4367 struct sk_buff *skb1;
4414 u32 seq, end_seq; 4368 u32 seq, end_seq;
4415 bool fragstolen; 4369 bool fragstolen;
@@ -4422,10 +4376,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4422 return; 4376 return;
4423 } 4377 }
4424 4378
4425 /* Stash tstamp to avoid being stomped on by rbnode */
4426 if (TCP_SKB_CB(skb)->has_rxtstamp)
4427 TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
4428
4429 /* Disable header prediction. */ 4379 /* Disable header prediction. */
4430 tp->pred_flags = 0; 4380 tp->pred_flags = 0;
4431 inet_csk_schedule_ack(sk); 4381 inet_csk_schedule_ack(sk);
@@ -4453,7 +4403,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4453 /* In the typical case, we are adding an skb to the end of the list. 4403 /* In the typical case, we are adding an skb to the end of the list.
4454 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. 4404 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
4455 */ 4405 */
4456 if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, 4406 if (tcp_try_coalesce(sk, tp->ooo_last_skb,
4457 skb, &fragstolen)) { 4407 skb, &fragstolen)) {
4458coalesce_done: 4408coalesce_done:
4459 tcp_grow_window(sk, skb); 4409 tcp_grow_window(sk, skb);
@@ -4472,7 +4422,7 @@ coalesce_done:
4472 parent = NULL; 4422 parent = NULL;
4473 while (*p) { 4423 while (*p) {
4474 parent = *p; 4424 parent = *p;
4475 skb1 = rb_entry(parent, struct sk_buff, rbnode); 4425 skb1 = rb_to_skb(parent);
4476 if (before(seq, TCP_SKB_CB(skb1)->seq)) { 4426 if (before(seq, TCP_SKB_CB(skb1)->seq)) {
4477 p = &parent->rb_left; 4427 p = &parent->rb_left;
4478 continue; 4428 continue;
@@ -4504,7 +4454,7 @@ coalesce_done:
4504 __kfree_skb(skb1); 4454 __kfree_skb(skb1);
4505 goto merge_right; 4455 goto merge_right;
4506 } 4456 }
4507 } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, 4457 } else if (tcp_try_coalesce(sk, skb1,
4508 skb, &fragstolen)) { 4458 skb, &fragstolen)) {
4509 goto coalesce_done; 4459 goto coalesce_done;
4510 } 4460 }
@@ -4517,9 +4467,7 @@ insert:
4517 4467
4518merge_right: 4468merge_right:
4519 /* Remove other segments covered by skb. */ 4469 /* Remove other segments covered by skb. */
4520 while ((q = rb_next(&skb->rbnode)) != NULL) { 4470 while ((skb1 = skb_rb_next(skb)) != NULL) {
4521 skb1 = rb_entry(q, struct sk_buff, rbnode);
4522
4523 if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) 4471 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4524 break; 4472 break;
4525 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4473 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@ -4534,7 +4482,7 @@ merge_right:
4534 tcp_drop(sk, skb1); 4482 tcp_drop(sk, skb1);
4535 } 4483 }
4536 /* If there is no skb after us, we are the last_skb ! */ 4484 /* If there is no skb after us, we are the last_skb ! */
4537 if (!q) 4485 if (!skb1)
4538 tp->ooo_last_skb = skb; 4486 tp->ooo_last_skb = skb;
4539 4487
4540add_sack: 4488add_sack:
@@ -4556,7 +4504,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
4556 4504
4557 __skb_pull(skb, hdrlen); 4505 __skb_pull(skb, hdrlen);
4558 eaten = (tail && 4506 eaten = (tail &&
4559 tcp_try_coalesce(sk, RCV_QUEUE, tail, 4507 tcp_try_coalesce(sk, tail,
4560 skb, fragstolen)) ? 1 : 0; 4508 skb, fragstolen)) ? 1 : 0;
4561 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); 4509 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4562 if (!eaten) { 4510 if (!eaten) {
@@ -4720,7 +4668,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
4720 if (list) 4668 if (list)
4721 return !skb_queue_is_last(list, skb) ? skb->next : NULL; 4669 return !skb_queue_is_last(list, skb) ? skb->next : NULL;
4722 4670
4723 return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); 4671 return skb_rb_next(skb);
4724} 4672}
4725 4673
4726static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, 4674static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4741,7 +4689,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4741} 4689}
4742 4690
4743/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ 4691/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
4744static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) 4692void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4745{ 4693{
4746 struct rb_node **p = &root->rb_node; 4694 struct rb_node **p = &root->rb_node;
4747 struct rb_node *parent = NULL; 4695 struct rb_node *parent = NULL;
@@ -4749,7 +4697,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4749 4697
4750 while (*p) { 4698 while (*p) {
4751 parent = *p; 4699 parent = *p;
4752 skb1 = rb_entry(parent, struct sk_buff, rbnode); 4700 skb1 = rb_to_skb(parent);
4753 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) 4701 if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
4754 p = &parent->rb_left; 4702 p = &parent->rb_left;
4755 else 4703 else
@@ -4796,7 +4744,7 @@ restart:
4796 * overlaps to the next one. 4744 * overlaps to the next one.
4797 */ 4745 */
4798 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && 4746 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4799 (tcp_win_from_space(skb->truesize) > skb->len || 4747 (tcp_win_from_space(sk, skb->truesize) > skb->len ||
4800 before(TCP_SKB_CB(skb)->seq, start))) { 4748 before(TCP_SKB_CB(skb)->seq, start))) {
4801 end_of_skbs = false; 4749 end_of_skbs = false;
4802 break; 4750 break;
@@ -4868,26 +4816,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4868{ 4816{
4869 struct tcp_sock *tp = tcp_sk(sk); 4817 struct tcp_sock *tp = tcp_sk(sk);
4870 struct sk_buff *skb, *head; 4818 struct sk_buff *skb, *head;
4871 struct rb_node *p;
4872 u32 start, end; 4819 u32 start, end;
4873 4820
4874 p = rb_first(&tp->out_of_order_queue); 4821 skb = skb_rb_first(&tp->out_of_order_queue);
4875 skb = rb_entry_safe(p, struct sk_buff, rbnode);
4876new_range: 4822new_range:
4877 if (!skb) { 4823 if (!skb) {
4878 p = rb_last(&tp->out_of_order_queue); 4824 tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
4879 /* Note: This is possible p is NULL here. We do not
4880 * use rb_entry_safe(), as ooo_last_skb is valid only
4881 * if rbtree is not empty.
4882 */
4883 tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
4884 return; 4825 return;
4885 } 4826 }
4886 start = TCP_SKB_CB(skb)->seq; 4827 start = TCP_SKB_CB(skb)->seq;
4887 end = TCP_SKB_CB(skb)->end_seq; 4828 end = TCP_SKB_CB(skb)->end_seq;
4888 4829
4889 for (head = skb;;) { 4830 for (head = skb;;) {
4890 skb = tcp_skb_next(skb, NULL); 4831 skb = skb_rb_next(skb);
4891 4832
4892 /* Range is terminated when we see a gap or when 4833 /* Range is terminated when we see a gap or when
4893 * we are at the queue end. 4834 * we are at the queue end.
@@ -4930,14 +4871,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
4930 do { 4871 do {
4931 prev = rb_prev(node); 4872 prev = rb_prev(node);
4932 rb_erase(node, &tp->out_of_order_queue); 4873 rb_erase(node, &tp->out_of_order_queue);
4933 tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); 4874 tcp_drop(sk, rb_to_skb(node));
4934 sk_mem_reclaim(sk); 4875 sk_mem_reclaim(sk);
4935 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 4876 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
4936 !tcp_under_memory_pressure(sk)) 4877 !tcp_under_memory_pressure(sk))
4937 break; 4878 break;
4938 node = prev; 4879 node = prev;
4939 } while (node); 4880 } while (node);
4940 tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); 4881 tp->ooo_last_skb = rb_to_skb(prev);
4941 4882
4942 /* Reset SACK state. A conforming SACK implementation will 4883 /* Reset SACK state. A conforming SACK implementation will
4943 * do the same at a timeout based retransmit. When a connection 4884 * do the same at a timeout based retransmit. When a connection
@@ -5112,7 +5053,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
5112 struct tcp_sock *tp = tcp_sk(sk); 5053 struct tcp_sock *tp = tcp_sk(sk);
5113 u32 ptr = ntohs(th->urg_ptr); 5054 u32 ptr = ntohs(th->urg_ptr);
5114 5055
5115 if (ptr && !sysctl_tcp_stdurg) 5056 if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
5116 ptr--; 5057 ptr--;
5117 ptr += ntohl(th->seq); 5058 ptr += ntohl(th->seq);
5118 5059
@@ -5532,20 +5473,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5532 security_inet_conn_established(sk, skb); 5473 security_inet_conn_established(sk, skb);
5533 } 5474 }
5534 5475
5535 /* Make sure socket is routed, for correct metrics. */ 5476 tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
5536 icsk->icsk_af_ops->rebuild_header(sk);
5537
5538 tcp_init_metrics(sk);
5539 tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
5540 tcp_init_congestion_control(sk);
5541 5477
5542 /* Prevent spurious tcp_cwnd_restart() on first data 5478 /* Prevent spurious tcp_cwnd_restart() on first data
5543 * packet. 5479 * packet.
5544 */ 5480 */
5545 tp->lsndtime = tcp_jiffies32; 5481 tp->lsndtime = tcp_jiffies32;
5546 5482
5547 tcp_init_buffer_space(sk);
5548
5549 if (sock_flag(sk, SOCK_KEEPOPEN)) 5483 if (sock_flag(sk, SOCK_KEEPOPEN))
5550 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); 5484 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5551 5485
@@ -5559,7 +5493,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5559 struct tcp_fastopen_cookie *cookie) 5493 struct tcp_fastopen_cookie *cookie)
5560{ 5494{
5561 struct tcp_sock *tp = tcp_sk(sk); 5495 struct tcp_sock *tp = tcp_sk(sk);
5562 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; 5496 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
5563 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; 5497 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5564 bool syn_drop = false; 5498 bool syn_drop = false;
5565 5499
@@ -5594,9 +5528,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5594 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); 5528 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5595 5529
5596 if (data) { /* Retransmit unacked data in SYN */ 5530 if (data) { /* Retransmit unacked data in SYN */
5597 tcp_for_write_queue_from(data, sk) { 5531 skb_rbtree_walk_from(data) {
5598 if (data == tcp_send_head(sk) || 5532 if (__tcp_retransmit_skb(sk, data, 1))
5599 __tcp_retransmit_skb(sk, data, 1))
5600 break; 5533 break;
5601 } 5534 }
5602 tcp_rearm_rto(sk); 5535 tcp_rearm_rto(sk);
@@ -5614,6 +5547,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5614 return false; 5547 return false;
5615} 5548}
5616 5549
5550static void smc_check_reset_syn(struct tcp_sock *tp)
5551{
5552#if IS_ENABLED(CONFIG_SMC)
5553 if (static_branch_unlikely(&tcp_have_smc)) {
5554 if (tp->syn_smc && !tp->rx_opt.smc_ok)
5555 tp->syn_smc = 0;
5556 }
5557#endif
5558}
5559
5617static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5560static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5618 const struct tcphdr *th) 5561 const struct tcphdr *th)
5619{ 5562{
@@ -5709,10 +5652,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5709 tp->tcp_header_len = sizeof(struct tcphdr); 5652 tp->tcp_header_len = sizeof(struct tcphdr);
5710 } 5653 }
5711 5654
5712 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5713 tcp_enable_fack(tp);
5714
5715 tcp_mtup_init(sk);
5716 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 5655 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5717 tcp_initialize_rcv_mss(sk); 5656 tcp_initialize_rcv_mss(sk);
5718 5657
@@ -5721,6 +5660,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5721 * is initialized. */ 5660 * is initialized. */
5722 tp->copied_seq = tp->rcv_nxt; 5661 tp->copied_seq = tp->rcv_nxt;
5723 5662
5663 smc_check_reset_syn(tp);
5664
5724 smp_mb(); 5665 smp_mb();
5725 5666
5726 tcp_finish_connect(sk, skb); 5667 tcp_finish_connect(sk, skb);
@@ -5938,15 +5879,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5938 if (req) { 5879 if (req) {
5939 inet_csk(sk)->icsk_retransmits = 0; 5880 inet_csk(sk)->icsk_retransmits = 0;
5940 reqsk_fastopen_remove(sk, req, false); 5881 reqsk_fastopen_remove(sk, req, false);
5882 /* Re-arm the timer because data may have been sent out.
5883 * This is similar to the regular data transmission case
5884 * when new data has just been ack'ed.
5885 *
5886 * (TFO) - we could try to be more aggressive and
5887 * retransmitting any data sooner based on when they
5888 * are sent out.
5889 */
5890 tcp_rearm_rto(sk);
5941 } else { 5891 } else {
5942 /* Make sure socket is routed, for correct metrics. */ 5892 tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
5943 icsk->icsk_af_ops->rebuild_header(sk);
5944 tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
5945 tcp_init_congestion_control(sk);
5946
5947 tcp_mtup_init(sk);
5948 tp->copied_seq = tp->rcv_nxt; 5893 tp->copied_seq = tp->rcv_nxt;
5949 tcp_init_buffer_space(sk);
5950 } 5894 }
5951 smp_mb(); 5895 smp_mb();
5952 tcp_set_state(sk, TCP_ESTABLISHED); 5896 tcp_set_state(sk, TCP_ESTABLISHED);
@@ -5966,19 +5910,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5966 if (tp->rx_opt.tstamp_ok) 5910 if (tp->rx_opt.tstamp_ok)
5967 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5911 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5968 5912
5969 if (req) {
5970 /* Re-arm the timer because data may have been sent out.
5971 * This is similar to the regular data transmission case
5972 * when new data has just been ack'ed.
5973 *
5974 * (TFO) - we could try to be more aggressive and
5975 * retransmitting any data sooner based on when they
5976 * are sent out.
5977 */
5978 tcp_rearm_rto(sk);
5979 } else
5980 tcp_init_metrics(sk);
5981
5982 if (!inet_csk(sk)->icsk_ca_ops->cong_control) 5913 if (!inet_csk(sk)->icsk_ca_ops->cong_control)
5983 tcp_update_pacing_rate(sk); 5914 tcp_update_pacing_rate(sk);
5984 5915
@@ -6075,6 +6006,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
6075 case TCP_LAST_ACK: 6006 case TCP_LAST_ACK:
6076 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) 6007 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
6077 break; 6008 break;
6009 /* fall through */
6078 case TCP_FIN_WAIT1: 6010 case TCP_FIN_WAIT1:
6079 case TCP_FIN_WAIT2: 6011 case TCP_FIN_WAIT2:
6080 /* RFC 793 says to queue data in these states, 6012 /* RFC 793 says to queue data in these states,
@@ -6183,6 +6115,9 @@ static void tcp_openreq_init(struct request_sock *req,
6183 ireq->ir_rmt_port = tcp_hdr(skb)->source; 6115 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6184 ireq->ir_num = ntohs(tcp_hdr(skb)->dest); 6116 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6185 ireq->ir_mark = inet_request_mark(sk, skb); 6117 ireq->ir_mark = inet_request_mark(sk, skb);
6118#if IS_ENABLED(CONFIG_SMC)
6119 ireq->smc_ok = rx_opt->smc_ok;
6120#endif
6186} 6121}
6187 6122
6188struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, 6123struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@ -6195,7 +6130,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6195 if (req) { 6130 if (req) {
6196 struct inet_request_sock *ireq = inet_rsk(req); 6131 struct inet_request_sock *ireq = inet_rsk(req);
6197 6132
6198 kmemcheck_annotate_bitfield(ireq, flags);
6199 ireq->ireq_opt = NULL; 6133 ireq->ireq_opt = NULL;
6200#if IS_ENABLED(CONFIG_IPV6) 6134#if IS_ENABLED(CONFIG_IPV6)
6201 ireq->pktopts = NULL; 6135 ireq->pktopts = NULL;
@@ -6358,7 +6292,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6358 tcp_openreq_init_rwin(req, sk, dst); 6292 tcp_openreq_init_rwin(req, sk, dst);
6359 if (!want_cookie) { 6293 if (!want_cookie) {
6360 tcp_reqsk_record_syn(sk, req, skb); 6294 tcp_reqsk_record_syn(sk, req, skb);
6361 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc); 6295 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6362 } 6296 }
6363 if (fastopen_sk) { 6297 if (fastopen_sk) {
6364 af_ops->send_synack(fastopen_sk, dst, &fl, req, 6298 af_ops->send_synack(fastopen_sk, dst, &fl, req,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5b027c69cbc5..c6bc0c4d19c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,6 +85,8 @@
85#include <crypto/hash.h> 85#include <crypto/hash.h>
86#include <linux/scatterlist.h> 86#include <linux/scatterlist.h>
87 87
88#include <trace/events/tcp.h>
89
88#ifdef CONFIG_TCP_MD5SIG 90#ifdef CONFIG_TCP_MD5SIG
89static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 91static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
90 __be32 daddr, __be32 saddr, const struct tcphdr *th); 92 __be32 daddr, __be32 saddr, const struct tcphdr *th);
@@ -480,7 +482,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
480 TCP_TIMEOUT_INIT; 482 TCP_TIMEOUT_INIT;
481 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 483 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
482 484
483 skb = tcp_write_queue_head(sk); 485 skb = tcp_rtx_queue_head(sk);
484 BUG_ON(!skb); 486 BUG_ON(!skb);
485 487
486 tcp_mstamp_refresh(tp); 488 tcp_mstamp_refresh(tp);
@@ -701,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
701 * routing might fail in this case. No choice here, if we choose to force 703 * routing might fail in this case. No choice here, if we choose to force
702 * input interface, we will misroute in case of asymmetric route. 704 * input interface, we will misroute in case of asymmetric route.
703 */ 705 */
704 if (sk) 706 if (sk) {
705 arg.bound_dev_if = sk->sk_bound_dev_if; 707 arg.bound_dev_if = sk->sk_bound_dev_if;
708 trace_tcp_send_reset(sk, skb);
709 }
706 710
707 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != 711 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
708 offsetof(struct inet_timewait_sock, tw_bound_dev_if)); 712 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -1783,8 +1787,9 @@ do_time_wait:
1783 refcounted = false; 1787 refcounted = false;
1784 goto process; 1788 goto process;
1785 } 1789 }
1786 /* Fall through to ACK */
1787 } 1790 }
1791 /* to ACK */
1792 /* fall through */
1788 case TCP_TW_ACK: 1793 case TCP_TW_ACK:
1789 tcp_v4_timewait_ack(sk, skb); 1794 tcp_v4_timewait_ack(sk, skb);
1790 break; 1795 break;
@@ -1864,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
1864{ 1869{
1865 struct tcp_sock *tp = tcp_sk(sk); 1870 struct tcp_sock *tp = tcp_sk(sk);
1866 1871
1872 trace_tcp_destroy_sock(sk);
1873
1867 tcp_clear_xmit_timers(sk); 1874 tcp_clear_xmit_timers(sk);
1868 1875
1869 tcp_cleanup_congestion_control(sk); 1876 tcp_cleanup_congestion_control(sk);
@@ -1896,6 +1903,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
1896 1903
1897 /* If socket is aborted during connect operation */ 1904 /* If socket is aborted during connect operation */
1898 tcp_free_fastopen_req(tp); 1905 tcp_free_fastopen_req(tp);
1906 tcp_fastopen_destroy_cipher(sk);
1899 tcp_saved_syn_free(tp); 1907 tcp_saved_syn_free(tp);
1900 1908
1901 sk_sockets_allocated_dec(sk); 1909 sk_sockets_allocated_dec(sk);
@@ -2401,8 +2409,8 @@ struct proto tcp_prot = {
2401 .memory_allocated = &tcp_memory_allocated, 2409 .memory_allocated = &tcp_memory_allocated,
2402 .memory_pressure = &tcp_memory_pressure, 2410 .memory_pressure = &tcp_memory_pressure,
2403 .sysctl_mem = sysctl_tcp_mem, 2411 .sysctl_mem = sysctl_tcp_mem,
2404 .sysctl_wmem = sysctl_tcp_wmem, 2412 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2405 .sysctl_rmem = sysctl_tcp_rmem, 2413 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2406 .max_header = MAX_TCP_HEADER, 2414 .max_header = MAX_TCP_HEADER,
2407 .obj_size = sizeof(struct tcp_sock), 2415 .obj_size = sizeof(struct tcp_sock),
2408 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2416 .slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -2422,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
2422{ 2430{
2423 int cpu; 2431 int cpu;
2424 2432
2433 module_put(net->ipv4.tcp_congestion_control->owner);
2434
2425 for_each_possible_cpu(cpu) 2435 for_each_possible_cpu(cpu)
2426 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); 2436 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2427 free_percpu(net->ipv4.tcp_sk); 2437 free_percpu(net->ipv4.tcp_sk);
@@ -2476,6 +2486,50 @@ static int __net_init tcp_sk_init(struct net *net)
2476 net->ipv4.sysctl_tcp_sack = 1; 2486 net->ipv4.sysctl_tcp_sack = 1;
2477 net->ipv4.sysctl_tcp_window_scaling = 1; 2487 net->ipv4.sysctl_tcp_window_scaling = 1;
2478 net->ipv4.sysctl_tcp_timestamps = 1; 2488 net->ipv4.sysctl_tcp_timestamps = 1;
2489 net->ipv4.sysctl_tcp_early_retrans = 3;
2490 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
2491 net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
2492 net->ipv4.sysctl_tcp_retrans_collapse = 1;
2493 net->ipv4.sysctl_tcp_max_reordering = 300;
2494 net->ipv4.sysctl_tcp_dsack = 1;
2495 net->ipv4.sysctl_tcp_app_win = 31;
2496 net->ipv4.sysctl_tcp_adv_win_scale = 1;
2497 net->ipv4.sysctl_tcp_frto = 2;
2498 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
2499 /* This limits the percentage of the congestion window which we
2500 * will allow a single TSO frame to consume. Building TSO frames
2501 * which are too large can cause TCP streams to be bursty.
2502 */
2503 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
2504 /* Default TSQ limit of four TSO segments */
2505 net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
2506 /* rfc5961 challenge ack rate limiting */
2507 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
2508 net->ipv4.sysctl_tcp_min_tso_segs = 2;
2509 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
2510 net->ipv4.sysctl_tcp_autocorking = 1;
2511 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
2512 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
2513 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
2514 if (net != &init_net) {
2515 memcpy(net->ipv4.sysctl_tcp_rmem,
2516 init_net.ipv4.sysctl_tcp_rmem,
2517 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2518 memcpy(net->ipv4.sysctl_tcp_wmem,
2519 init_net.ipv4.sysctl_tcp_wmem,
2520 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2521 }
2522 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
2523 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
2524 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2525 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
2526
2527 /* Reno is always built in */
2528 if (!net_eq(net, &init_net) &&
2529 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2530 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2531 else
2532 net->ipv4.tcp_congestion_control = &tcp_reno;
2479 2533
2480 return 0; 2534 return 0;
2481fail: 2535fail:
@@ -2486,7 +2540,12 @@ fail:
2486 2540
2487static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2541static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2488{ 2542{
2543 struct net *net;
2544
2489 inet_twsk_purge(&tcp_hashinfo, AF_INET); 2545 inet_twsk_purge(&tcp_hashinfo, AF_INET);
2546
2547 list_for_each_entry(net, net_exit_list, exit_list)
2548 tcp_fastopen_ctx_destroy(net);
2490} 2549}
2491 2550
2492static struct pernet_operations __net_initdata tcp_sk_ops = { 2551static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0f0d740f6c8b..7097f92d16e5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -21,8 +21,6 @@
21#include <net/tcp.h> 21#include <net/tcp.h>
22#include <net/genetlink.h> 22#include <net/genetlink.h>
23 23
24int sysctl_tcp_nometrics_save __read_mostly;
25
26static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, 24static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
27 const struct inetpeer_addr *daddr, 25 const struct inetpeer_addr *daddr,
28 struct net *net, unsigned int hash); 26 struct net *net, unsigned int hash);
@@ -331,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
331 int m; 329 int m;
332 330
333 sk_dst_confirm(sk); 331 sk_dst_confirm(sk);
334 if (sysctl_tcp_nometrics_save || !dst) 332 if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
335 return; 333 return;
336 334
337 rcu_read_lock(); 335 rcu_read_lock();
@@ -472,10 +470,8 @@ void tcp_init_metrics(struct sock *sk)
472 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 470 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
473 } 471 }
474 val = tcp_metric_get(tm, TCP_METRIC_REORDERING); 472 val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
475 if (val && tp->reordering != val) { 473 if (val && tp->reordering != val)
476 tcp_disable_fack(tp);
477 tp->reordering = val; 474 tp->reordering = val;
478 }
479 475
480 crtt = tcp_metric_get(tm, TCP_METRIC_RTT); 476 crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
481 rcu_read_unlock(); 477 rcu_read_unlock();
@@ -893,10 +889,14 @@ static void tcp_metrics_flush_all(struct net *net)
893 889
894 for (row = 0; row < max_rows; row++, hb++) { 890 for (row = 0; row < max_rows; row++, hb++) {
895 struct tcp_metrics_block __rcu **pp; 891 struct tcp_metrics_block __rcu **pp;
892 bool match;
893
896 spin_lock_bh(&tcp_metrics_lock); 894 spin_lock_bh(&tcp_metrics_lock);
897 pp = &hb->chain; 895 pp = &hb->chain;
898 for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { 896 for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
899 if (net_eq(tm_net(tm), net)) { 897 match = net ? net_eq(tm_net(tm), net) :
898 !atomic_read(&tm_net(tm)->count);
899 if (match) {
900 *pp = tm->tcpm_next; 900 *pp = tm->tcpm_next;
901 kfree_rcu(tm, rcu_head); 901 kfree_rcu(tm, rcu_head);
902 } else { 902 } else {
@@ -1019,14 +1019,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
1019 return 0; 1019 return 0;
1020} 1020}
1021 1021
1022static void __net_exit tcp_net_metrics_exit(struct net *net) 1022static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
1023{ 1023{
1024 tcp_metrics_flush_all(net); 1024 tcp_metrics_flush_all(NULL);
1025} 1025}
1026 1026
1027static __net_initdata struct pernet_operations tcp_net_metrics_ops = { 1027static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
1028 .init = tcp_net_metrics_init, 1028 .init = tcp_net_metrics_init,
1029 .exit = tcp_net_metrics_exit, 1029 .exit_batch = tcp_net_metrics_exit_batch,
1030}; 1030};
1031 1031
1032void __init tcp_metrics_init(void) 1032void __init tcp_metrics_init(void)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 188a6f31356d..e36eff0403f4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,13 +23,12 @@
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include <linux/static_key.h>
26#include <net/tcp.h> 27#include <net/tcp.h>
27#include <net/inet_common.h> 28#include <net/inet_common.h>
28#include <net/xfrm.h> 29#include <net/xfrm.h>
29#include <net/busy_poll.h> 30#include <net/busy_poll.h>
30 31
31int sysctl_tcp_abort_on_overflow __read_mostly;
32
33static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 32static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
34{ 33{
35 if (seq == s_win) 34 if (seq == s_win)
@@ -180,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
180 * Oh well... nobody has a sufficient solution to this 179 * Oh well... nobody has a sufficient solution to this
181 * protocol bug yet. 180 * protocol bug yet.
182 */ 181 */
183 if (sysctl_tcp_rfc1337 == 0) { 182 if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
184kill: 183kill:
185 inet_twsk_deschedule_put(tw); 184 inet_twsk_deschedule_put(tw);
186 return TCP_TW_SUCCESS; 185 return TCP_TW_SUCCESS;
@@ -298,8 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
298 key = tp->af_specific->md5_lookup(sk, sk); 297 key = tp->af_specific->md5_lookup(sk, sk);
299 if (key) { 298 if (key) {
300 tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); 299 tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
301 if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) 300 BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
302 BUG();
303 } 301 }
304 } while (0); 302 } while (0);
305#endif 303#endif
@@ -371,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
371 full_space = rcv_wnd * mss; 369 full_space = rcv_wnd * mss;
372 370
373 /* tcp_full_space because it is guaranteed to be the first packet */ 371 /* tcp_full_space because it is guaranteed to be the first packet */
374 tcp_select_initial_window(full_space, 372 tcp_select_initial_window(sk_listener, full_space,
375 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 373 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
376 &req->rsk_rcv_wnd, 374 &req->rsk_rcv_wnd,
377 &req->rsk_window_clamp, 375 &req->rsk_window_clamp,
@@ -417,6 +415,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
417} 415}
418EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); 416EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
419 417
418static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
419 struct request_sock *req,
420 struct tcp_sock *newtp)
421{
422#if IS_ENABLED(CONFIG_SMC)
423 struct inet_request_sock *ireq;
424
425 if (static_branch_unlikely(&tcp_have_smc)) {
426 ireq = inet_rsk(req);
427 if (oldtp->syn_smc && !ireq->smc_ok)
428 newtp->syn_smc = 0;
429 }
430#endif
431}
432
420/* This is not only more efficient than what we used to do, it eliminates 433/* This is not only more efficient than what we used to do, it eliminates
421 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM 434 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
422 * 435 *
@@ -434,6 +447,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
434 struct tcp_request_sock *treq = tcp_rsk(req); 447 struct tcp_request_sock *treq = tcp_rsk(req);
435 struct inet_connection_sock *newicsk = inet_csk(newsk); 448 struct inet_connection_sock *newicsk = inet_csk(newsk);
436 struct tcp_sock *newtp = tcp_sk(newsk); 449 struct tcp_sock *newtp = tcp_sk(newsk);
450 struct tcp_sock *oldtp = tcp_sk(sk);
451
452 smc_check_reset_syn_req(oldtp, req, newtp);
437 453
438 /* Now setup tcp_sock */ 454 /* Now setup tcp_sock */
439 newtp->pred_flags = 0; 455 newtp->pred_flags = 0;
@@ -446,6 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
446 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; 462 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
447 463
448 INIT_LIST_HEAD(&newtp->tsq_node); 464 INIT_LIST_HEAD(&newtp->tsq_node);
465 INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
449 466
450 tcp_init_wl(newtp, treq->rcv_isn); 467 tcp_init_wl(newtp, treq->rcv_isn);
451 468
@@ -458,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
458 newtp->packets_out = 0; 475 newtp->packets_out = 0;
459 newtp->retrans_out = 0; 476 newtp->retrans_out = 0;
460 newtp->sacked_out = 0; 477 newtp->sacked_out = 0;
461 newtp->fackets_out = 0;
462 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 478 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
463 newtp->tlp_high_seq = 0; 479 newtp->tlp_high_seq = 0;
464 newtp->lsndtime = tcp_jiffies32; 480 newtp->lsndtime = tcp_jiffies32;
@@ -492,10 +508,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
492 keepalive_time_when(newtp)); 508 keepalive_time_when(newtp));
493 509
494 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; 510 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
495 if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { 511 newtp->rx_opt.sack_ok = ireq->sack_ok;
496 if (sysctl_tcp_fack)
497 tcp_enable_fack(newtp);
498 }
499 newtp->window_clamp = req->rsk_window_clamp; 512 newtp->window_clamp = req->rsk_window_clamp;
500 newtp->rcv_ssthresh = req->rsk_rcv_wnd; 513 newtp->rcv_ssthresh = req->rsk_rcv_wnd;
501 newtp->rcv_wnd = req->rsk_rcv_wnd; 514 newtp->rcv_wnd = req->rsk_rcv_wnd;
@@ -534,6 +547,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
534 newtp->syn_data_acked = 0; 547 newtp->syn_data_acked = 0;
535 newtp->rack.mstamp = 0; 548 newtp->rack.mstamp = 0;
536 newtp->rack.advanced = 0; 549 newtp->rack.advanced = 0;
550 newtp->rack.reo_wnd_steps = 1;
551 newtp->rack.last_delivered = 0;
552 newtp->rack.reo_wnd_persist = 0;
553 newtp->rack.dsack_seen = 0;
537 554
538 __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); 555 __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
539 } 556 }
@@ -764,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
764 return inet_csk_complete_hashdance(sk, child, req, own_req); 781 return inet_csk_complete_hashdance(sk, child, req, own_req);
765 782
766listen_overflow: 783listen_overflow:
767 if (!sysctl_tcp_abort_on_overflow) { 784 if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
768 inet_rsk(req)->acked = 1; 785 inet_rsk(req)->acked = 1;
769 return NULL; 786 return NULL;
770 } 787 }
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 125fc1450b01..0b5a05bd82e3 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -39,7 +39,7 @@
39 * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected 39 * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected
40 * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8 40 * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8
41 * nv_rtt_factor RTT averaging factor 41 * nv_rtt_factor RTT averaging factor
42 * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur 42 * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur
43 * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd 43 * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd
44 * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd 44 * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd
45 * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping 45 * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping
@@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2;
61static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */ 61static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
62static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */ 62static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
63static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */ 63static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
64static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */ 64static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */
65static int nv_cwnd_growth_rate_neg __read_mostly = 8; 65static int nv_cwnd_growth_rate_neg __read_mostly = 8;
66static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */ 66static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
67static int nv_dec_eval_min_calls __read_mostly = 60; 67static int nv_dec_eval_min_calls __read_mostly = 60;
@@ -101,6 +101,11 @@ struct tcpnv {
101 u32 nv_last_rtt; /* last rtt */ 101 u32 nv_last_rtt; /* last rtt */
102 u32 nv_min_rtt; /* active min rtt. Used to determine slope */ 102 u32 nv_min_rtt; /* active min rtt. Used to determine slope */
103 u32 nv_min_rtt_new; /* min rtt for future use */ 103 u32 nv_min_rtt_new; /* min rtt for future use */
104 u32 nv_base_rtt; /* If non-zero it represents the threshold for
105 * congestion */
106 u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is
107 * set to 80% of nv_base_rtt. It helps reduce
108 * unfairness between flows */
104 u32 nv_rtt_max_rate; /* max rate seen during current RTT */ 109 u32 nv_rtt_max_rate; /* max rate seen during current RTT */
105 u32 nv_rtt_start_seq; /* current RTT ends when packet arrives 110 u32 nv_rtt_start_seq; /* current RTT ends when packet arrives
106 * acking beyond nv_rtt_start_seq */ 111 * acking beyond nv_rtt_start_seq */
@@ -132,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
132static void tcpnv_init(struct sock *sk) 137static void tcpnv_init(struct sock *sk)
133{ 138{
134 struct tcpnv *ca = inet_csk_ca(sk); 139 struct tcpnv *ca = inet_csk_ca(sk);
140 int base_rtt;
135 141
136 tcpnv_reset(ca, sk); 142 tcpnv_reset(ca, sk);
137 143
144 /* See if base_rtt is available from socket_ops bpf program.
145 * It is meant to be used in environments, such as communication
146 * within a datacenter, where we have reasonable estimates of
147 * RTTs
148 */
149 base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
150 if (base_rtt > 0) {
151 ca->nv_base_rtt = base_rtt;
152 ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
153 } else {
154 ca->nv_base_rtt = 0;
155 ca->nv_lower_bound_rtt = 0;
156 }
157
138 ca->nv_allow_cwnd_growth = 1; 158 ca->nv_allow_cwnd_growth = 1;
139 ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ; 159 ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
140 ca->nv_min_rtt = NV_INIT_RTT; 160 ca->nv_min_rtt = NV_INIT_RTT;
@@ -144,6 +164,19 @@ static void tcpnv_init(struct sock *sk)
144 ca->cwnd_growth_factor = 0; 164 ca->cwnd_growth_factor = 0;
145} 165}
146 166
167/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt)
168 * bounds to RTT.
169 */
170inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val)
171{
172 if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt)
173 return ca->nv_lower_bound_rtt;
174 else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt)
175 return ca->nv_base_rtt;
176 else
177 return val;
178}
179
147static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) 180static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
148{ 181{
149 struct tcp_sock *tp = tcp_sk(sk); 182 struct tcp_sock *tp = tcp_sk(sk);
@@ -209,7 +242,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
209 struct tcp_sock *tp = tcp_sk(sk); 242 struct tcp_sock *tp = tcp_sk(sk);
210 struct tcpnv *ca = inet_csk_ca(sk); 243 struct tcpnv *ca = inet_csk_ca(sk);
211 unsigned long now = jiffies; 244 unsigned long now = jiffies;
212 s64 rate64 = 0; 245 u64 rate64;
213 u32 rate, max_win, cwnd_by_slope; 246 u32 rate, max_win, cwnd_by_slope;
214 u32 avg_rtt; 247 u32 avg_rtt;
215 u32 bytes_acked = 0; 248 u32 bytes_acked = 0;
@@ -251,8 +284,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
251 } 284 }
252 285
253 /* rate in 100's bits per second */ 286 /* rate in 100's bits per second */
254 rate64 = ((u64)sample->in_flight) * 8000000; 287 rate64 = ((u64)sample->in_flight) * 80000;
255 rate = (u32)div64_u64(rate64, (u64)(avg_rtt ?: 1) * 100); 288 do_div(rate64, avg_rtt ?: 1);
289 rate = (u32)rate64;
256 290
257 /* Remember the maximum rate seen during this RTT 291 /* Remember the maximum rate seen during this RTT
258 * Note: It may be more than one RTT. This function should be 292 * Note: It may be more than one RTT. This function should be
@@ -265,6 +299,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
265 if (ca->nv_eval_call_cnt < 255) 299 if (ca->nv_eval_call_cnt < 255)
266 ca->nv_eval_call_cnt++; 300 ca->nv_eval_call_cnt++;
267 301
302 /* Apply bounds to rtt. Only used to update min_rtt */
303 avg_rtt = nv_get_bounded_rtt(ca, avg_rtt);
304
268 /* update min rtt if necessary */ 305 /* update min rtt if necessary */
269 if (avg_rtt < ca->nv_min_rtt) 306 if (avg_rtt < ca->nv_min_rtt)
270 ca->nv_min_rtt = avg_rtt; 307 ca->nv_min_rtt = avg_rtt;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5a42e873d44a..a4d214c7b506 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,40 +41,25 @@
41#include <linux/compiler.h> 41#include <linux/compiler.h>
42#include <linux/gfp.h> 42#include <linux/gfp.h>
43#include <linux/module.h> 43#include <linux/module.h>
44#include <linux/static_key.h>
44 45
45/* People can turn this off for buggy TCP's found in printers etc. */ 46#include <trace/events/tcp.h>
46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47
48/* People can turn this on to work with those rare, broken TCPs that
49 * interpret the window field as a signed quantity.
50 */
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52
53/* Default TSQ limit of four TSO segments */
54int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
55
56/* This limits the percentage of the congestion window which we
57 * will allow a single TSO frame to consume. Building TSO frames
58 * which are too large can cause TCP streams to be bursty.
59 */
60int sysctl_tcp_tso_win_divisor __read_mostly = 3;
61
62/* By default, RFC2861 behavior. */
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64 47
65static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 48static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
66 int push_one, gfp_t gfp); 49 int push_one, gfp_t gfp);
67 50
68/* Account for new data that has been sent to the network. */ 51/* Account for new data that has been sent to the network. */
69static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) 52static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
70{ 53{
71 struct inet_connection_sock *icsk = inet_csk(sk); 54 struct inet_connection_sock *icsk = inet_csk(sk);
72 struct tcp_sock *tp = tcp_sk(sk); 55 struct tcp_sock *tp = tcp_sk(sk);
73 unsigned int prior_packets = tp->packets_out; 56 unsigned int prior_packets = tp->packets_out;
74 57
75 tcp_advance_send_head(sk, skb);
76 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 58 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
77 59
60 __skb_unlink(skb, &sk->sk_write_queue);
61 tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
62
78 tp->packets_out += tcp_skb_pcount(skb); 63 tp->packets_out += tcp_skb_pcount(skb);
79 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) 64 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
80 tcp_rearm_rto(sk); 65 tcp_rearm_rto(sk);
@@ -203,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss)
203 * be a multiple of mss if possible. We assume here that mss >= 1. 188 * be a multiple of mss if possible. We assume here that mss >= 1.
204 * This MUST be enforced by all callers. 189 * This MUST be enforced by all callers.
205 */ 190 */
206void tcp_select_initial_window(int __space, __u32 mss, 191void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
207 __u32 *rcv_wnd, __u32 *window_clamp, 192 __u32 *rcv_wnd, __u32 *window_clamp,
208 int wscale_ok, __u8 *rcv_wscale, 193 int wscale_ok, __u8 *rcv_wscale,
209 __u32 init_rcv_wnd) 194 __u32 init_rcv_wnd)
@@ -227,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
227 * which we interpret as a sign the remote TCP is not 212 * which we interpret as a sign the remote TCP is not
228 * misinterpreting the window field as a signed quantity. 213 * misinterpreting the window field as a signed quantity.
229 */ 214 */
230 if (sysctl_tcp_workaround_signed_windows) 215 if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
231 (*rcv_wnd) = min(space, MAX_TCP_WINDOW); 216 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
232 else 217 else
233 (*rcv_wnd) = space; 218 (*rcv_wnd) = space;
@@ -235,7 +220,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
235 (*rcv_wscale) = 0; 220 (*rcv_wscale) = 0;
236 if (wscale_ok) { 221 if (wscale_ok) {
237 /* Set window scaling on max possible window */ 222 /* Set window scaling on max possible window */
238 space = max_t(u32, space, sysctl_tcp_rmem[2]); 223 space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
239 space = max_t(u32, space, sysctl_rmem_max); 224 space = max_t(u32, space, sysctl_rmem_max);
240 space = min_t(u32, space, *window_clamp); 225 space = min_t(u32, space, *window_clamp);
241 while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { 226 while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
@@ -287,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk)
287 /* Make sure we do not exceed the maximum possible 272 /* Make sure we do not exceed the maximum possible
288 * scaled window. 273 * scaled window.
289 */ 274 */
290 if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows) 275 if (!tp->rx_opt.rcv_wscale &&
276 sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
291 new_win = min(new_win, MAX_TCP_WINDOW); 277 new_win = min(new_win, MAX_TCP_WINDOW);
292 else 278 else
293 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); 279 new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -395,7 +381,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
395static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) 381static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
396{ 382{
397 skb->ip_summed = CHECKSUM_PARTIAL; 383 skb->ip_summed = CHECKSUM_PARTIAL;
398 skb->csum = 0;
399 384
400 TCP_SKB_CB(skb)->tcp_flags = flags; 385 TCP_SKB_CB(skb)->tcp_flags = flags;
401 TCP_SKB_CB(skb)->sacked = 0; 386 TCP_SKB_CB(skb)->sacked = 0;
@@ -418,6 +403,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
418#define OPTION_MD5 (1 << 2) 403#define OPTION_MD5 (1 << 2)
419#define OPTION_WSCALE (1 << 3) 404#define OPTION_WSCALE (1 << 3)
420#define OPTION_FAST_OPEN_COOKIE (1 << 8) 405#define OPTION_FAST_OPEN_COOKIE (1 << 8)
406#define OPTION_SMC (1 << 9)
407
408static void smc_options_write(__be32 *ptr, u16 *options)
409{
410#if IS_ENABLED(CONFIG_SMC)
411 if (static_branch_unlikely(&tcp_have_smc)) {
412 if (unlikely(OPTION_SMC & *options)) {
413 *ptr++ = htonl((TCPOPT_NOP << 24) |
414 (TCPOPT_NOP << 16) |
415 (TCPOPT_EXP << 8) |
416 (TCPOLEN_EXP_SMC_BASE));
417 *ptr++ = htonl(TCPOPT_SMC_MAGIC);
418 }
419 }
420#endif
421}
421 422
422struct tcp_out_options { 423struct tcp_out_options {
423 u16 options; /* bit field of OPTION_* */ 424 u16 options; /* bit field of OPTION_* */
@@ -536,6 +537,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
536 } 537 }
537 ptr += (len + 3) >> 2; 538 ptr += (len + 3) >> 2;
538 } 539 }
540
541 smc_options_write(ptr, &options);
542}
543
544static void smc_set_option(const struct tcp_sock *tp,
545 struct tcp_out_options *opts,
546 unsigned int *remaining)
547{
548#if IS_ENABLED(CONFIG_SMC)
549 if (static_branch_unlikely(&tcp_have_smc)) {
550 if (tp->syn_smc) {
551 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
552 opts->options |= OPTION_SMC;
553 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
554 }
555 }
556 }
557#endif
558}
559
560static void smc_set_option_cond(const struct tcp_sock *tp,
561 const struct inet_request_sock *ireq,
562 struct tcp_out_options *opts,
563 unsigned int *remaining)
564{
565#if IS_ENABLED(CONFIG_SMC)
566 if (static_branch_unlikely(&tcp_have_smc)) {
567 if (tp->syn_smc && ireq->smc_ok) {
568 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
569 opts->options |= OPTION_SMC;
570 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
571 }
572 }
573 }
574#endif
539} 575}
540 576
541/* Compute TCP options for SYN packets. This is not the final 577/* Compute TCP options for SYN packets. This is not the final
@@ -603,11 +639,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
603 } 639 }
604 } 640 }
605 641
642 smc_set_option(tp, opts, &remaining);
643
606 return MAX_TCP_OPTION_SPACE - remaining; 644 return MAX_TCP_OPTION_SPACE - remaining;
607} 645}
608 646
609/* Set up TCP options for SYN-ACKs. */ 647/* Set up TCP options for SYN-ACKs. */
610static unsigned int tcp_synack_options(struct request_sock *req, 648static unsigned int tcp_synack_options(const struct sock *sk,
649 struct request_sock *req,
611 unsigned int mss, struct sk_buff *skb, 650 unsigned int mss, struct sk_buff *skb,
612 struct tcp_out_options *opts, 651 struct tcp_out_options *opts,
613 const struct tcp_md5sig_key *md5, 652 const struct tcp_md5sig_key *md5,
@@ -663,6 +702,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
663 } 702 }
664 } 703 }
665 704
705 smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
706
666 return MAX_TCP_OPTION_SPACE - remaining; 707 return MAX_TCP_OPTION_SPACE - remaining;
667} 708}
668 709
@@ -973,6 +1014,12 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
973 HRTIMER_MODE_ABS_PINNED); 1014 HRTIMER_MODE_ABS_PINNED);
974} 1015}
975 1016
1017static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
1018{
1019 skb->skb_mstamp = tp->tcp_mstamp;
1020 list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
1021}
1022
976/* This routine actually transmits TCP packets queued in by 1023/* This routine actually transmits TCP packets queued in by
977 * tcp_do_sendmsg(). This is used by both the initial 1024 * tcp_do_sendmsg(). This is used by both the initial
978 * transmission and possible later retransmissions. 1025 * transmission and possible later retransmissions.
@@ -1005,10 +1052,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1005 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq 1052 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1006 - tp->snd_una; 1053 - tp->snd_una;
1007 oskb = skb; 1054 oskb = skb;
1008 if (unlikely(skb_cloned(skb))) 1055
1009 skb = pskb_copy(skb, gfp_mask); 1056 tcp_skb_tsorted_save(oskb) {
1010 else 1057 if (unlikely(skb_cloned(oskb)))
1011 skb = skb_clone(skb, gfp_mask); 1058 skb = pskb_copy(oskb, gfp_mask);
1059 else
1060 skb = skb_clone(oskb, gfp_mask);
1061 } tcp_skb_tsorted_restore(oskb);
1062
1012 if (unlikely(!skb)) 1063 if (unlikely(!skb))
1013 return -ENOBUFS; 1064 return -ENOBUFS;
1014 } 1065 }
@@ -1129,7 +1180,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1129 err = net_xmit_eval(err); 1180 err = net_xmit_eval(err);
1130 } 1181 }
1131 if (!err && oskb) { 1182 if (!err && oskb) {
1132 oskb->skb_mstamp = tp->tcp_mstamp; 1183 tcp_update_skb_after_send(tp, oskb);
1133 tcp_rate_skb_sent(sk, oskb); 1184 tcp_rate_skb_sent(sk, oskb);
1134 } 1185 }
1135 return err; 1186 return err;
@@ -1167,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
1167 } 1218 }
1168} 1219}
1169 1220
1170/* When a modification to fackets out becomes necessary, we need to check
1171 * skb is counted to fackets_out or not.
1172 */
1173static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1174 int decr)
1175{
1176 struct tcp_sock *tp = tcp_sk(sk);
1177
1178 if (!tp->sacked_out || tcp_is_reno(tp))
1179 return;
1180
1181 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
1182 tp->fackets_out -= decr;
1183}
1184
1185/* Pcount in the middle of the write queue got changed, we need to do various 1221/* Pcount in the middle of the write queue got changed, we need to do various
1186 * tweaks to fix counters 1222 * tweaks to fix counters
1187 */ 1223 */
@@ -1202,11 +1238,9 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
1202 if (tcp_is_reno(tp) && decr > 0) 1238 if (tcp_is_reno(tp) && decr > 0)
1203 tp->sacked_out -= min_t(u32, tp->sacked_out, decr); 1239 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
1204 1240
1205 tcp_adjust_fackets_out(sk, skb, decr);
1206
1207 if (tp->lost_skb_hint && 1241 if (tp->lost_skb_hint &&
1208 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && 1242 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
1209 (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) 1243 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1210 tp->lost_cnt_hint -= decr; 1244 tp->lost_cnt_hint -= decr;
1211 1245
1212 tcp_verify_left_out(tp); 1246 tcp_verify_left_out(tp);
@@ -1241,12 +1275,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1241 TCP_SKB_CB(skb)->eor = 0; 1275 TCP_SKB_CB(skb)->eor = 0;
1242} 1276}
1243 1277
1278/* Insert buff after skb on the write or rtx queue of sk. */
1279static void tcp_insert_write_queue_after(struct sk_buff *skb,
1280 struct sk_buff *buff,
1281 struct sock *sk,
1282 enum tcp_queue tcp_queue)
1283{
1284 if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
1285 __skb_queue_after(&sk->sk_write_queue, skb, buff);
1286 else
1287 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
1288}
1289
1244/* Function to create two new TCP segments. Shrinks the given segment 1290/* Function to create two new TCP segments. Shrinks the given segment
1245 * to the specified size and appends a new segment with the rest of the 1291 * to the specified size and appends a new segment with the rest of the
1246 * packet to the list. This won't be called frequently, I hope. 1292 * packet to the list. This won't be called frequently, I hope.
1247 * Remember, these are still headerless SKBs at this point. 1293 * Remember, these are still headerless SKBs at this point.
1248 */ 1294 */
1249int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, 1295int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1296 struct sk_buff *skb, u32 len,
1250 unsigned int mss_now, gfp_t gfp) 1297 unsigned int mss_now, gfp_t gfp)
1251{ 1298{
1252 struct tcp_sock *tp = tcp_sk(sk); 1299 struct tcp_sock *tp = tcp_sk(sk);
@@ -1329,7 +1376,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1329 1376
1330 /* Link BUFF into the send queue. */ 1377 /* Link BUFF into the send queue. */
1331 __skb_header_release(buff); 1378 __skb_header_release(buff);
1332 tcp_insert_write_queue_after(skb, buff, sk); 1379 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1380 if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
1381 list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
1333 1382
1334 return 0; 1383 return 0;
1335} 1384}
@@ -1607,7 +1656,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1607 if (tp->packets_out > tp->snd_cwnd_used) 1656 if (tp->packets_out > tp->snd_cwnd_used)
1608 tp->snd_cwnd_used = tp->packets_out; 1657 tp->snd_cwnd_used = tp->packets_out;
1609 1658
1610 if (sysctl_tcp_slow_start_after_idle && 1659 if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
1611 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && 1660 (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
1612 !ca_ops->cong_control) 1661 !ca_ops->cong_control)
1613 tcp_cwnd_application_limited(sk); 1662 tcp_cwnd_application_limited(sk);
@@ -1616,10 +1665,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1616 * is caused by insufficient sender buffer: 1665 * is caused by insufficient sender buffer:
1617 * 1) just sent some data (see tcp_write_xmit) 1666 * 1) just sent some data (see tcp_write_xmit)
1618 * 2) not cwnd limited (this else condition) 1667 * 2) not cwnd limited (this else condition)
1619 * 3) no more data to send (null tcp_send_head ) 1668 * 3) no more data to send (tcp_write_queue_empty())
1620 * 4) application is hitting buffer limit (SOCK_NOSPACE) 1669 * 4) application is hitting buffer limit (SOCK_NOSPACE)
1621 */ 1670 */
1622 if (!tcp_send_head(sk) && sk->sk_socket && 1671 if (tcp_write_queue_empty(sk) && sk->sk_socket &&
1623 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && 1672 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1624 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) 1673 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1625 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); 1674 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@ -1671,7 +1720,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
1671{ 1720{
1672 u32 bytes, segs; 1721 u32 bytes, segs;
1673 1722
1674 bytes = min(sk->sk_pacing_rate >> 10, 1723 bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
1675 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); 1724 sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
1676 1725
1677 /* Goal is to send at least one packet per ms, 1726 /* Goal is to send at least one packet per ms,
@@ -1694,7 +1743,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
1694 u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; 1743 u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
1695 1744
1696 return tso_segs ? : 1745 return tso_segs ? :
1697 tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); 1746 tcp_tso_autosize(sk, mss_now,
1747 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
1698} 1748}
1699 1749
1700/* Returns the portion of skb which can be sent right away */ 1750/* Returns the portion of skb which can be sent right away */
@@ -1815,7 +1865,8 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1815 * know that all the data is in scatter-gather pages, and that the 1865 * know that all the data is in scatter-gather pages, and that the
1816 * packet has never been sent out before (and thus is not cloned). 1866 * packet has never been sent out before (and thus is not cloned).
1817 */ 1867 */
1818static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, 1868static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1869 struct sk_buff *skb, unsigned int len,
1819 unsigned int mss_now, gfp_t gfp) 1870 unsigned int mss_now, gfp_t gfp)
1820{ 1871{
1821 struct sk_buff *buff; 1872 struct sk_buff *buff;
@@ -1824,7 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1824 1875
1825 /* All of a TSO frame must be composed of paged data. */ 1876 /* All of a TSO frame must be composed of paged data. */
1826 if (skb->len != skb->data_len) 1877 if (skb->len != skb->data_len)
1827 return tcp_fragment(sk, skb, len, mss_now, gfp); 1878 return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
1828 1879
1829 buff = sk_stream_alloc_skb(sk, 0, gfp, true); 1880 buff = sk_stream_alloc_skb(sk, 0, gfp, true);
1830 if (unlikely(!buff)) 1881 if (unlikely(!buff))
@@ -1860,7 +1911,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1860 1911
1861 /* Link BUFF into the send queue. */ 1912 /* Link BUFF into the send queue. */
1862 __skb_header_release(buff); 1913 __skb_header_release(buff);
1863 tcp_insert_write_queue_after(skb, buff, sk); 1914 tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
1864 1915
1865 return 0; 1916 return 0;
1866} 1917}
@@ -1910,7 +1961,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1910 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) 1961 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1911 goto send_now; 1962 goto send_now;
1912 1963
1913 win_divisor = READ_ONCE(sysctl_tcp_tso_win_divisor); 1964 win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
1914 if (win_divisor) { 1965 if (win_divisor) {
1915 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); 1966 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1916 1967
@@ -1930,8 +1981,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1930 goto send_now; 1981 goto send_now;
1931 } 1982 }
1932 1983
1933 head = tcp_write_queue_head(sk); 1984 /* TODO : use tsorted_sent_queue ? */
1934 1985 head = tcp_rtx_queue_head(sk);
1986 if (!head)
1987 goto send_now;
1935 age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); 1988 age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
1936 /* If next ACK is likely to come too late (half srtt), do not defer */ 1989 /* If next ACK is likely to come too late (half srtt), do not defer */
1937 if (age < (tp->srtt_us >> 4)) 1990 if (age < (tp->srtt_us >> 4))
@@ -2145,18 +2198,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2145{ 2198{
2146 unsigned int limit; 2199 unsigned int limit;
2147 2200
2148 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); 2201 limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
2149 limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); 2202 limit = min_t(u32, limit,
2203 sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
2150 limit <<= factor; 2204 limit <<= factor;
2151 2205
2152 if (refcount_read(&sk->sk_wmem_alloc) > limit) { 2206 if (refcount_read(&sk->sk_wmem_alloc) > limit) {
2153 /* Always send the 1st or 2nd skb in write queue. 2207 /* Always send skb if rtx queue is empty.
2154 * No need to wait for TX completion to call us back, 2208 * No need to wait for TX completion to call us back,
2155 * after softirq/tasklet schedule. 2209 * after softirq/tasklet schedule.
2156 * This helps when TX completions are delayed too much. 2210 * This helps when TX completions are delayed too much.
2157 */ 2211 */
2158 if (skb == sk->sk_write_queue.next || 2212 if (tcp_rtx_queue_empty(sk))
2159 skb->prev == sk->sk_write_queue.next)
2160 return false; 2213 return false;
2161 2214
2162 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); 2215 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2207,7 +2260,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
2207 * it's the "most interesting" or current chrono we are 2260 * it's the "most interesting" or current chrono we are
2208 * tracking and starts busy chrono if we have pending data. 2261 * tracking and starts busy chrono if we have pending data.
2209 */ 2262 */
2210 if (tcp_write_queue_empty(sk)) 2263 if (tcp_rtx_and_write_queues_empty(sk))
2211 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); 2264 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
2212 else if (type == tp->chrono_type) 2265 else if (type == tp->chrono_type)
2213 tcp_chrono_set(tp, TCP_CHRONO_BUSY); 2266 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@ -2263,7 +2316,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2263 2316
2264 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { 2317 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
2265 /* "skb_mstamp" is used as a start point for the retransmit timer */ 2318 /* "skb_mstamp" is used as a start point for the retransmit timer */
2266 skb->skb_mstamp = tp->tcp_mstamp; 2319 tcp_update_skb_after_send(tp, skb);
2267 goto repair; /* Skip network transmission */ 2320 goto repair; /* Skip network transmission */
2268 } 2321 }
2269 2322
@@ -2302,7 +2355,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2302 nonagle); 2355 nonagle);
2303 2356
2304 if (skb->len > limit && 2357 if (skb->len > limit &&
2305 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 2358 unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
2359 skb, limit, mss_now, gfp)))
2306 break; 2360 break;
2307 2361
2308 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) 2362 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@ -2337,19 +2391,20 @@ repair:
2337 2391
2338 /* Send one loss probe per tail loss episode. */ 2392 /* Send one loss probe per tail loss episode. */
2339 if (push_one != 2) 2393 if (push_one != 2)
2340 tcp_schedule_loss_probe(sk); 2394 tcp_schedule_loss_probe(sk, false);
2341 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); 2395 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2342 tcp_cwnd_validate(sk, is_cwnd_limited); 2396 tcp_cwnd_validate(sk, is_cwnd_limited);
2343 return false; 2397 return false;
2344 } 2398 }
2345 return !tp->packets_out && tcp_send_head(sk); 2399 return !tp->packets_out && !tcp_write_queue_empty(sk);
2346} 2400}
2347 2401
2348bool tcp_schedule_loss_probe(struct sock *sk) 2402bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
2349{ 2403{
2350 struct inet_connection_sock *icsk = inet_csk(sk); 2404 struct inet_connection_sock *icsk = inet_csk(sk);
2351 struct tcp_sock *tp = tcp_sk(sk); 2405 struct tcp_sock *tp = tcp_sk(sk);
2352 u32 timeout, rto_delta_us; 2406 u32 timeout, rto_delta_us;
2407 int early_retrans;
2353 2408
2354 /* Don't do any loss probe on a Fast Open connection before 3WHS 2409 /* Don't do any loss probe on a Fast Open connection before 3WHS
2355 * finishes. 2410 * finishes.
@@ -2357,16 +2412,17 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2357 if (tp->fastopen_rsk) 2412 if (tp->fastopen_rsk)
2358 return false; 2413 return false;
2359 2414
2415 early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
2360 /* Schedule a loss probe in 2*RTT for SACK capable connections 2416 /* Schedule a loss probe in 2*RTT for SACK capable connections
2361 * in Open state, that are either limited by cwnd or application. 2417 * in Open state, that are either limited by cwnd or application.
2362 */ 2418 */
2363 if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || 2419 if ((early_retrans != 3 && early_retrans != 4) ||
2364 !tp->packets_out || !tcp_is_sack(tp) || 2420 !tp->packets_out || !tcp_is_sack(tp) ||
2365 icsk->icsk_ca_state != TCP_CA_Open) 2421 icsk->icsk_ca_state != TCP_CA_Open)
2366 return false; 2422 return false;
2367 2423
2368 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && 2424 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
2369 tcp_send_head(sk)) 2425 !tcp_write_queue_empty(sk))
2370 return false; 2426 return false;
2371 2427
2372 /* Probe timeout is 2*rtt. Add minimum RTO to account 2428 /* Probe timeout is 2*rtt. Add minimum RTO to account
@@ -2384,7 +2440,9 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2384 } 2440 }
2385 2441
2386 /* If the RTO formula yields an earlier time, then use that time. */ 2442 /* If the RTO formula yields an earlier time, then use that time. */
2387 rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */ 2443 rto_delta_us = advancing_rto ?
2444 jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
2445 tcp_rto_delta_us(sk); /* How far in future is RTO? */
2388 if (rto_delta_us > 0) 2446 if (rto_delta_us > 0)
2389 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); 2447 timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
2390 2448
@@ -2419,18 +2477,14 @@ void tcp_send_loss_probe(struct sock *sk)
2419 int mss = tcp_current_mss(sk); 2477 int mss = tcp_current_mss(sk);
2420 2478
2421 skb = tcp_send_head(sk); 2479 skb = tcp_send_head(sk);
2422 if (skb) { 2480 if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
2423 if (tcp_snd_wnd_test(tp, skb, mss)) { 2481 pcount = tp->packets_out;
2424 pcount = tp->packets_out; 2482 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
2425 tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); 2483 if (tp->packets_out > pcount)
2426 if (tp->packets_out > pcount) 2484 goto probe_sent;
2427 goto probe_sent; 2485 goto rearm_timer;
2428 goto rearm_timer;
2429 }
2430 skb = tcp_write_queue_prev(sk, skb);
2431 } else {
2432 skb = tcp_write_queue_tail(sk);
2433 } 2486 }
2487 skb = skb_rb_last(&sk->tcp_rtx_queue);
2434 2488
2435 /* At most one outstanding TLP retransmission. */ 2489 /* At most one outstanding TLP retransmission. */
2436 if (tp->tlp_high_seq) 2490 if (tp->tlp_high_seq)
@@ -2448,10 +2502,11 @@ void tcp_send_loss_probe(struct sock *sk)
2448 goto rearm_timer; 2502 goto rearm_timer;
2449 2503
2450 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { 2504 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2451 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, 2505 if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2506 (pcount - 1) * mss, mss,
2452 GFP_ATOMIC))) 2507 GFP_ATOMIC)))
2453 goto rearm_timer; 2508 goto rearm_timer;
2454 skb = tcp_write_queue_next(sk, skb); 2509 skb = skb_rb_next(skb);
2455 } 2510 }
2456 2511
2457 if (WARN_ON(!skb || !tcp_skb_pcount(skb))) 2512 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@ -2651,7 +2706,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2651static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) 2706static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2652{ 2707{
2653 struct tcp_sock *tp = tcp_sk(sk); 2708 struct tcp_sock *tp = tcp_sk(sk);
2654 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 2709 struct sk_buff *next_skb = skb_rb_next(skb);
2655 int skb_size, next_skb_size; 2710 int skb_size, next_skb_size;
2656 2711
2657 skb_size = skb->len; 2712 skb_size = skb->len;
@@ -2668,8 +2723,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2668 } 2723 }
2669 tcp_highest_sack_replace(sk, next_skb, skb); 2724 tcp_highest_sack_replace(sk, next_skb, skb);
2670 2725
2671 tcp_unlink_write_queue(next_skb, sk);
2672
2673 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 2726 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2674 skb->ip_summed = CHECKSUM_PARTIAL; 2727 skb->ip_summed = CHECKSUM_PARTIAL;
2675 2728
@@ -2697,7 +2750,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2697 2750
2698 tcp_skb_collapse_tstamp(skb, next_skb); 2751 tcp_skb_collapse_tstamp(skb, next_skb);
2699 2752
2700 sk_wmem_free_skb(sk, next_skb); 2753 tcp_rtx_queue_unlink_and_free(next_skb, sk);
2701 return true; 2754 return true;
2702} 2755}
2703 2756
@@ -2708,8 +2761,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2708 return false; 2761 return false;
2709 if (skb_cloned(skb)) 2762 if (skb_cloned(skb))
2710 return false; 2763 return false;
2711 if (skb == tcp_send_head(sk))
2712 return false;
2713 /* Some heuristics for collapsing over SACK'd could be invented */ 2764 /* Some heuristics for collapsing over SACK'd could be invented */
2714 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) 2765 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2715 return false; 2766 return false;
@@ -2727,12 +2778,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2727 struct sk_buff *skb = to, *tmp; 2778 struct sk_buff *skb = to, *tmp;
2728 bool first = true; 2779 bool first = true;
2729 2780
2730 if (!sysctl_tcp_retrans_collapse) 2781 if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
2731 return; 2782 return;
2732 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 2783 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2733 return; 2784 return;
2734 2785
2735 tcp_for_write_queue_from_safe(skb, tmp, sk) { 2786 skb_rbtree_walk_from_safe(skb, tmp) {
2736 if (!tcp_can_collapse(sk, skb)) 2787 if (!tcp_can_collapse(sk, skb))
2737 break; 2788 break;
2738 2789
@@ -2807,7 +2858,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2807 2858
2808 len = cur_mss * segs; 2859 len = cur_mss * segs;
2809 if (skb->len > len) { 2860 if (skb->len > len) {
2810 if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) 2861 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
2862 cur_mss, GFP_ATOMIC))
2811 return -ENOMEM; /* We'll try again later. */ 2863 return -ENOMEM; /* We'll try again later. */
2812 } else { 2864 } else {
2813 if (skb_unclone(skb, GFP_ATOMIC)) 2865 if (skb_unclone(skb, GFP_ATOMIC))
@@ -2841,11 +2893,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2841 skb_headroom(skb) >= 0xFFFF)) { 2893 skb_headroom(skb) >= 0xFFFF)) {
2842 struct sk_buff *nskb; 2894 struct sk_buff *nskb;
2843 2895
2844 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); 2896 tcp_skb_tsorted_save(skb) {
2845 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2897 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2846 -ENOBUFS; 2898 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2899 -ENOBUFS;
2900 } tcp_skb_tsorted_restore(skb);
2901
2847 if (!err) { 2902 if (!err) {
2848 skb->skb_mstamp = tp->tcp_mstamp; 2903 tcp_update_skb_after_send(tp, skb);
2849 tcp_rate_skb_sent(sk, skb); 2904 tcp_rate_skb_sent(sk, skb);
2850 } 2905 }
2851 } else { 2906 } else {
@@ -2854,6 +2909,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2854 2909
2855 if (likely(!err)) { 2910 if (likely(!err)) {
2856 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; 2911 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2912 trace_tcp_retransmit_skb(sk, skb);
2857 } else if (err != -EBUSY) { 2913 } else if (err != -EBUSY) {
2858 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); 2914 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2859 } 2915 }
@@ -2890,36 +2946,25 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2890 * retransmitted data is acknowledged. It tries to continue 2946 * retransmitted data is acknowledged. It tries to continue
2891 * resending the rest of the retransmit queue, until either 2947 * resending the rest of the retransmit queue, until either
2892 * we've sent it all or the congestion window limit is reached. 2948 * we've sent it all or the congestion window limit is reached.
2893 * If doing SACK, the first ACK which comes back for a timeout
2894 * based retransmit packet might feed us FACK information again.
2895 * If so, we use it to avoid unnecessarily retransmissions.
2896 */ 2949 */
2897void tcp_xmit_retransmit_queue(struct sock *sk) 2950void tcp_xmit_retransmit_queue(struct sock *sk)
2898{ 2951{
2899 const struct inet_connection_sock *icsk = inet_csk(sk); 2952 const struct inet_connection_sock *icsk = inet_csk(sk);
2953 struct sk_buff *skb, *rtx_head, *hole = NULL;
2900 struct tcp_sock *tp = tcp_sk(sk); 2954 struct tcp_sock *tp = tcp_sk(sk);
2901 struct sk_buff *skb;
2902 struct sk_buff *hole = NULL;
2903 u32 max_segs; 2955 u32 max_segs;
2904 int mib_idx; 2956 int mib_idx;
2905 2957
2906 if (!tp->packets_out) 2958 if (!tp->packets_out)
2907 return; 2959 return;
2908 2960
2909 if (tp->retransmit_skb_hint) { 2961 rtx_head = tcp_rtx_queue_head(sk);
2910 skb = tp->retransmit_skb_hint; 2962 skb = tp->retransmit_skb_hint ?: rtx_head;
2911 } else {
2912 skb = tcp_write_queue_head(sk);
2913 }
2914
2915 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); 2963 max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
2916 tcp_for_write_queue_from(skb, sk) { 2964 skb_rbtree_walk_from(skb) {
2917 __u8 sacked; 2965 __u8 sacked;
2918 int segs; 2966 int segs;
2919 2967
2920 if (skb == tcp_send_head(sk))
2921 break;
2922
2923 if (tcp_pacing_check(sk)) 2968 if (tcp_pacing_check(sk))
2924 break; 2969 break;
2925 2970
@@ -2964,7 +3009,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2964 if (tcp_in_cwnd_reduction(sk)) 3009 if (tcp_in_cwnd_reduction(sk))
2965 tp->prr_out += tcp_skb_pcount(skb); 3010 tp->prr_out += tcp_skb_pcount(skb);
2966 3011
2967 if (skb == tcp_write_queue_head(sk) && 3012 if (skb == rtx_head &&
2968 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) 3013 icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
2969 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 3014 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2970 inet_csk(sk)->icsk_rto, 3015 inet_csk(sk)->icsk_rto,
@@ -3006,12 +3051,15 @@ void tcp_send_fin(struct sock *sk)
3006 * Note: in the latter case, FIN packet will be sent after a timeout, 3051 * Note: in the latter case, FIN packet will be sent after a timeout,
3007 * as TCP stack thinks it has already been transmitted. 3052 * as TCP stack thinks it has already been transmitted.
3008 */ 3053 */
3009 if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { 3054 if (!tskb && tcp_under_memory_pressure(sk))
3055 tskb = skb_rb_last(&sk->tcp_rtx_queue);
3056
3057 if (tskb) {
3010coalesce: 3058coalesce:
3011 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; 3059 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
3012 TCP_SKB_CB(tskb)->end_seq++; 3060 TCP_SKB_CB(tskb)->end_seq++;
3013 tp->write_seq++; 3061 tp->write_seq++;
3014 if (!tcp_send_head(sk)) { 3062 if (tcp_write_queue_empty(sk)) {
3015 /* This means tskb was already sent. 3063 /* This means tskb was already sent.
3016 * Pretend we included the FIN on previous transmit. 3064 * Pretend we included the FIN on previous transmit.
3017 * We need to set tp->snd_nxt to the value it would have 3065 * We need to set tp->snd_nxt to the value it would have
@@ -3028,6 +3076,7 @@ coalesce:
3028 goto coalesce; 3076 goto coalesce;
3029 return; 3077 return;
3030 } 3078 }
3079 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
3031 skb_reserve(skb, MAX_TCP_HEADER); 3080 skb_reserve(skb, MAX_TCP_HEADER);
3032 sk_forced_mem_schedule(sk, skb->truesize); 3081 sk_forced_mem_schedule(sk, skb->truesize);
3033 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 3082 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
@@ -3064,6 +3113,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
3064 /* Send it off. */ 3113 /* Send it off. */
3065 if (tcp_transmit_skb(sk, skb, 0, priority)) 3114 if (tcp_transmit_skb(sk, skb, 0, priority))
3066 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); 3115 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
3116
3117 /* skb of trace_tcp_send_reset() keeps the skb that caused RST,
3118 * skb here is different to the troublesome skb, so use NULL
3119 */
3120 trace_tcp_send_reset(sk, NULL);
3067} 3121}
3068 3122
3069/* Send a crossed SYN-ACK during socket establishment. 3123/* Send a crossed SYN-ACK during socket establishment.
@@ -3076,20 +3130,24 @@ int tcp_send_synack(struct sock *sk)
3076{ 3130{
3077 struct sk_buff *skb; 3131 struct sk_buff *skb;
3078 3132
3079 skb = tcp_write_queue_head(sk); 3133 skb = tcp_rtx_queue_head(sk);
3080 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { 3134 if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
3081 pr_debug("%s: wrong queue state\n", __func__); 3135 pr_err("%s: wrong queue state\n", __func__);
3082 return -EFAULT; 3136 return -EFAULT;
3083 } 3137 }
3084 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { 3138 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
3085 if (skb_cloned(skb)) { 3139 if (skb_cloned(skb)) {
3086 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 3140 struct sk_buff *nskb;
3141
3142 tcp_skb_tsorted_save(skb) {
3143 nskb = skb_copy(skb, GFP_ATOMIC);
3144 } tcp_skb_tsorted_restore(skb);
3087 if (!nskb) 3145 if (!nskb)
3088 return -ENOMEM; 3146 return -ENOMEM;
3089 tcp_unlink_write_queue(skb, sk); 3147 INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
3148 tcp_rtx_queue_unlink_and_free(skb, sk);
3090 __skb_header_release(nskb); 3149 __skb_header_release(nskb);
3091 __tcp_add_write_queue_head(sk, nskb); 3150 tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
3092 sk_wmem_free_skb(sk, skb);
3093 sk->sk_wmem_queued += nskb->truesize; 3151 sk->sk_wmem_queued += nskb->truesize;
3094 sk_mem_charge(sk, nskb->truesize); 3152 sk_mem_charge(sk, nskb->truesize);
3095 skb = nskb; 3153 skb = nskb;
@@ -3166,8 +3224,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3166 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); 3224 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
3167#endif 3225#endif
3168 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); 3226 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3169 tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + 3227 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3170 sizeof(*th); 3228 foc) + sizeof(*th);
3171 3229
3172 skb_push(skb, tcp_header_size); 3230 skb_push(skb, tcp_header_size);
3173 skb_reset_transport_header(skb); 3231 skb_reset_transport_header(skb);
@@ -3268,7 +3326,7 @@ static void tcp_connect_init(struct sock *sk)
3268 if (rcv_wnd == 0) 3326 if (rcv_wnd == 0)
3269 rcv_wnd = dst_metric(dst, RTAX_INITRWND); 3327 rcv_wnd = dst_metric(dst, RTAX_INITRWND);
3270 3328
3271 tcp_select_initial_window(tcp_full_space(sk), 3329 tcp_select_initial_window(sk, tcp_full_space(sk),
3272 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 3330 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
3273 &tp->rcv_wnd, 3331 &tp->rcv_wnd,
3274 &tp->window_clamp, 3332 &tp->window_clamp,
@@ -3307,7 +3365,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
3307 3365
3308 tcb->end_seq += skb->len; 3366 tcb->end_seq += skb->len;
3309 __skb_header_release(skb); 3367 __skb_header_release(skb);
3310 __tcp_add_write_queue_tail(sk, skb);
3311 sk->sk_wmem_queued += skb->truesize; 3368 sk->sk_wmem_queued += skb->truesize;
3312 sk_mem_charge(sk, skb->truesize); 3369 sk_mem_charge(sk, skb->truesize);
3313 tp->write_seq = tcb->end_seq; 3370 tp->write_seq = tcb->end_seq;
@@ -3355,6 +3412,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3355 int copied = copy_from_iter(skb_put(syn_data, space), space, 3412 int copied = copy_from_iter(skb_put(syn_data, space), space,
3356 &fo->data->msg_iter); 3413 &fo->data->msg_iter);
3357 if (unlikely(!copied)) { 3414 if (unlikely(!copied)) {
3415 tcp_skb_tsorted_anchor_cleanup(syn_data);
3358 kfree_skb(syn_data); 3416 kfree_skb(syn_data);
3359 goto fallback; 3417 goto fallback;
3360 } 3418 }
@@ -3385,12 +3443,13 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3385 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; 3443 TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
3386 if (!err) { 3444 if (!err) {
3387 tp->syn_data = (fo->copied > 0); 3445 tp->syn_data = (fo->copied > 0);
3446 tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
3388 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); 3447 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
3389 goto done; 3448 goto done;
3390 } 3449 }
3391 3450
3392 /* data was not sent, this is our new send_head */ 3451 /* data was not sent, put it in write_queue */
3393 sk->sk_send_head = syn_data; 3452 __skb_queue_tail(&sk->sk_write_queue, syn_data);
3394 tp->packets_out -= tcp_skb_pcount(syn_data); 3453 tp->packets_out -= tcp_skb_pcount(syn_data);
3395 3454
3396fallback: 3455fallback:
@@ -3433,6 +3492,7 @@ int tcp_connect(struct sock *sk)
3433 tp->retrans_stamp = tcp_time_stamp(tp); 3492 tp->retrans_stamp = tcp_time_stamp(tp);
3434 tcp_connect_queue_skb(sk, buff); 3493 tcp_connect_queue_skb(sk, buff);
3435 tcp_ecn_send_syn(sk, buff); 3494 tcp_ecn_send_syn(sk, buff);
3495 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
3436 3496
3437 /* Send off SYN; include data in Fast Open. */ 3497 /* Send off SYN; include data in Fast Open. */
3438 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 3498 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3627,7 +3687,8 @@ int tcp_write_wakeup(struct sock *sk, int mib)
3627 skb->len > mss) { 3687 skb->len > mss) {
3628 seg_size = min(seg_size, mss); 3688 seg_size = min(seg_size, mss);
3629 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; 3689 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3630 if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) 3690 if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
3691 skb, seg_size, mss, GFP_ATOMIC))
3631 return -1; 3692 return -1;
3632 } else if (!tcp_skb_pcount(skb)) 3693 } else if (!tcp_skb_pcount(skb))
3633 tcp_set_skb_tso_segs(skb, mss); 3694 tcp_set_skb_tso_segs(skb, mss);
@@ -3657,7 +3718,7 @@ void tcp_send_probe0(struct sock *sk)
3657 3718
3658 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); 3719 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
3659 3720
3660 if (tp->packets_out || !tcp_send_head(sk)) { 3721 if (tp->packets_out || tcp_write_queue_empty(sk)) {
3661 /* Cancel probe timer, if it is not required. */ 3722 /* Cancel probe timer, if it is not required. */
3662 icsk->icsk_probes_out = 0; 3723 icsk->icsk_probes_out = 0;
3663 icsk->icsk_backoff = 0; 3724 icsk->icsk_backoff = 0;
@@ -3698,6 +3759,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3698 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); 3759 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3699 if (unlikely(tcp_passive_fastopen(sk))) 3760 if (unlikely(tcp_passive_fastopen(sk)))
3700 tcp_sk(sk)->total_retrans++; 3761 tcp_sk(sk)->total_retrans++;
3762 trace_tcp_retransmit_synack(sk, req);
3701 } 3763 }
3702 return res; 3764 return res;
3703} 3765}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index be8ef1e5dfef..d3ea89020c69 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -2,8 +2,6 @@
2#include <linux/tcp.h> 2#include <linux/tcp.h>
3#include <net/tcp.h> 3#include <net/tcp.h>
4 4
5int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
6
7static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) 5static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
8{ 6{
9 struct tcp_sock *tp = tcp_sk(sk); 7 struct tcp_sock *tp = tcp_sk(sk);
@@ -46,7 +44,8 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
46static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) 44static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
47{ 45{
48 struct tcp_sock *tp = tcp_sk(sk); 46 struct tcp_sock *tp = tcp_sk(sk);
49 struct sk_buff *skb; 47 u32 min_rtt = tcp_min_rtt(tp);
48 struct sk_buff *skb, *n;
50 u32 reo_wnd; 49 u32 reo_wnd;
51 50
52 *reo_timeout = 0; 51 *reo_timeout = 0;
@@ -56,48 +55,36 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
56 * to queuing or delayed ACKs. 55 * to queuing or delayed ACKs.
57 */ 56 */
58 reo_wnd = 1000; 57 reo_wnd = 1000;
59 if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U) 58 if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
60 reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); 59 reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
60 reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
61 }
61 62
62 tcp_for_write_queue(skb, sk) { 63 list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
64 tcp_tsorted_anchor) {
63 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 65 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
66 s32 remaining;
64 67
65 if (skb == tcp_send_head(sk)) 68 /* Skip ones marked lost but not yet retransmitted */
66 break; 69 if ((scb->sacked & TCPCB_LOST) &&
67 70 !(scb->sacked & TCPCB_SACKED_RETRANS))
68 /* Skip ones already (s)acked */
69 if (!after(scb->end_seq, tp->snd_una) ||
70 scb->sacked & TCPCB_SACKED_ACKED)
71 continue; 71 continue;
72 72
73 if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, 73 if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
74 tp->rack.end_seq, scb->end_seq)) { 74 tp->rack.end_seq, scb->end_seq))
75 /* Step 3 in draft-cheng-tcpm-rack-00.txt: 75 break;
76 * A packet is lost if its elapsed time is beyond
77 * the recent RTT plus the reordering window.
78 */
79 u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
80 skb->skb_mstamp);
81 s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
82
83 if (remaining < 0) {
84 tcp_rack_mark_skb_lost(sk, skb);
85 continue;
86 }
87
88 /* Skip ones marked lost but not yet retransmitted */
89 if ((scb->sacked & TCPCB_LOST) &&
90 !(scb->sacked & TCPCB_SACKED_RETRANS))
91 continue;
92 76
77 /* A packet is lost if it has not been s/acked beyond
78 * the recent RTT plus the reordering window.
79 */
80 remaining = tp->rack.rtt_us + reo_wnd -
81 tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
82 if (remaining < 0) {
83 tcp_rack_mark_skb_lost(sk, skb);
84 list_del_init(&skb->tcp_tsorted_anchor);
85 } else {
93 /* Record maximum wait time (+1 to avoid 0) */ 86 /* Record maximum wait time (+1 to avoid 0) */
94 *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); 87 *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
95
96 } else if (!(scb->sacked & TCPCB_RETRANS)) {
97 /* Original data are sent sequentially so stop early
98 * b/c the rest are all sent after rack_sent
99 */
100 break;
101 } 88 }
102 } 89 }
103} 90}
@@ -176,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk)
176 if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) 163 if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS)
177 tcp_rearm_rto(sk); 164 tcp_rearm_rto(sk);
178} 165}
166
167/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
168 *
169 * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
170 * by srtt), since there is possibility that spurious retransmission was
171 * due to reordering delay longer than reo_wnd.
172 *
173 * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
174 * no. of successful recoveries (accounts for full DSACK-based loss
175 * recovery undo). After that, reset it to default (min_rtt/4).
176 *
177 * At max, reo_wnd is incremented only once per rtt. So that the new
178 * DSACK on which we are reacting, is due to the spurious retx (approx)
179 * after the reo_wnd has been updated last time.
180 *
181 * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
182 * absolute value to account for change in rtt.
183 */
184void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
185{
186 struct tcp_sock *tp = tcp_sk(sk);
187
188 if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
189 !rs->prior_delivered)
190 return;
191
192 /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
193 if (before(rs->prior_delivered, tp->rack.last_delivered))
194 tp->rack.dsack_seen = 0;
195
196 /* Adjust the reo_wnd if update is pending */
197 if (tp->rack.dsack_seen) {
198 tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
199 tp->rack.reo_wnd_steps + 1);
200 tp->rack.dsack_seen = 0;
201 tp->rack.last_delivered = tp->delivered;
202 tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
203 } else if (!tp->rack.reo_wnd_persist) {
204 tp->rack.reo_wnd_steps = 1;
205 }
206}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 655dd8d7f064..16df6dd44b98 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,8 +22,6 @@
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <net/tcp.h> 23#include <net/tcp.h>
24 24
25int sysctl_tcp_thin_linear_timeouts __read_mostly;
26
27/** 25/**
28 * tcp_write_err() - close socket and save error info 26 * tcp_write_err() - close socket and save error info
29 * @sk: The socket the error has appeared on. 27 * @sk: The socket the error has appeared on.
@@ -109,26 +107,23 @@ static int tcp_orphan_retries(struct sock *sk, bool alive)
109 107
110static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) 108static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
111{ 109{
112 struct net *net = sock_net(sk); 110 const struct net *net = sock_net(sk);
111 int mss;
113 112
114 /* Black hole detection */ 113 /* Black hole detection */
115 if (net->ipv4.sysctl_tcp_mtu_probing) { 114 if (!net->ipv4.sysctl_tcp_mtu_probing)
116 if (!icsk->icsk_mtup.enabled) { 115 return;
117 icsk->icsk_mtup.enabled = 1; 116
118 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; 117 if (!icsk->icsk_mtup.enabled) {
119 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 118 icsk->icsk_mtup.enabled = 1;
120 } else { 119 icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
121 struct net *net = sock_net(sk); 120 } else {
122 struct tcp_sock *tp = tcp_sk(sk); 121 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
123 int mss; 122 mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
124 123 mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
125 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; 124 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
126 mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
127 mss = max(mss, 68 - tp->tcp_header_len);
128 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
129 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
130 }
131 } 125 }
126 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
132} 127}
133 128
134 129
@@ -156,8 +151,13 @@ static bool retransmits_timed_out(struct sock *sk,
156 return false; 151 return false;
157 152
158 start_ts = tcp_sk(sk)->retrans_stamp; 153 start_ts = tcp_sk(sk)->retrans_stamp;
159 if (unlikely(!start_ts)) 154 if (unlikely(!start_ts)) {
160 start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk)); 155 struct sk_buff *head = tcp_rtx_queue_head(sk);
156
157 if (!head)
158 return false;
159 start_ts = tcp_skb_timestamp(head);
160 }
161 161
162 if (likely(timeout == 0)) { 162 if (likely(timeout == 0)) {
163 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); 163 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
@@ -283,15 +283,17 @@ out:
283 * 283 *
284 * Returns: Nothing (void) 284 * Returns: Nothing (void)
285 */ 285 */
286static void tcp_delack_timer(unsigned long data) 286static void tcp_delack_timer(struct timer_list *t)
287{ 287{
288 struct sock *sk = (struct sock *)data; 288 struct inet_connection_sock *icsk =
289 from_timer(icsk, t, icsk_delack_timer);
290 struct sock *sk = &icsk->icsk_inet.sk;
289 291
290 bh_lock_sock(sk); 292 bh_lock_sock(sk);
291 if (!sock_owned_by_user(sk)) { 293 if (!sock_owned_by_user(sk)) {
292 tcp_delack_timer_handler(sk); 294 tcp_delack_timer_handler(sk);
293 } else { 295 } else {
294 inet_csk(sk)->icsk_ack.blocked = 1; 296 icsk->icsk_ack.blocked = 1;
295 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); 297 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
296 /* deleguate our work to tcp_release_cb() */ 298 /* deleguate our work to tcp_release_cb() */
297 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) 299 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
@@ -304,11 +306,12 @@ static void tcp_delack_timer(unsigned long data)
304static void tcp_probe_timer(struct sock *sk) 306static void tcp_probe_timer(struct sock *sk)
305{ 307{
306 struct inet_connection_sock *icsk = inet_csk(sk); 308 struct inet_connection_sock *icsk = inet_csk(sk);
309 struct sk_buff *skb = tcp_send_head(sk);
307 struct tcp_sock *tp = tcp_sk(sk); 310 struct tcp_sock *tp = tcp_sk(sk);
308 int max_probes; 311 int max_probes;
309 u32 start_ts; 312 u32 start_ts;
310 313
311 if (tp->packets_out || !tcp_send_head(sk)) { 314 if (tp->packets_out || !skb) {
312 icsk->icsk_probes_out = 0; 315 icsk->icsk_probes_out = 0;
313 return; 316 return;
314 } 317 }
@@ -321,9 +324,9 @@ static void tcp_probe_timer(struct sock *sk)
321 * corresponding system limit. We also implement similar policy when 324 * corresponding system limit. We also implement similar policy when
322 * we use RTO to probe window in tcp_retransmit_timer(). 325 * we use RTO to probe window in tcp_retransmit_timer().
323 */ 326 */
324 start_ts = tcp_skb_timestamp(tcp_send_head(sk)); 327 start_ts = tcp_skb_timestamp(skb);
325 if (!start_ts) 328 if (!start_ts)
326 tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp; 329 skb->skb_mstamp = tp->tcp_mstamp;
327 else if (icsk->icsk_user_timeout && 330 else if (icsk->icsk_user_timeout &&
328 (s32)(tcp_time_stamp(tp) - start_ts) > 331 (s32)(tcp_time_stamp(tp) - start_ts) >
329 jiffies_to_msecs(icsk->icsk_user_timeout)) 332 jiffies_to_msecs(icsk->icsk_user_timeout))
@@ -408,7 +411,7 @@ void tcp_retransmit_timer(struct sock *sk)
408 if (!tp->packets_out) 411 if (!tp->packets_out)
409 goto out; 412 goto out;
410 413
411 WARN_ON(tcp_write_queue_empty(sk)); 414 WARN_ON(tcp_rtx_queue_empty(sk));
412 415
413 tp->tlp_high_seq = 0; 416 tp->tlp_high_seq = 0;
414 417
@@ -441,7 +444,7 @@ void tcp_retransmit_timer(struct sock *sk)
441 goto out; 444 goto out;
442 } 445 }
443 tcp_enter_loss(sk); 446 tcp_enter_loss(sk);
444 tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); 447 tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
445 __sk_dst_reset(sk); 448 __sk_dst_reset(sk);
446 goto out_reset_timer; 449 goto out_reset_timer;
447 } 450 }
@@ -473,7 +476,7 @@ void tcp_retransmit_timer(struct sock *sk)
473 476
474 tcp_enter_loss(sk); 477 tcp_enter_loss(sk);
475 478
476 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) { 479 if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
477 /* Retransmission failed because of local congestion, 480 /* Retransmission failed because of local congestion,
478 * do not backoff. 481 * do not backoff.
479 */ 482 */
@@ -514,7 +517,7 @@ out_reset_timer:
514 * linear-timeout retransmissions into a black hole 517 * linear-timeout retransmissions into a black hole
515 */ 518 */
516 if (sk->sk_state == TCP_ESTABLISHED && 519 if (sk->sk_state == TCP_ESTABLISHED &&
517 (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && 520 (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
518 tcp_stream_is_thin(tp) && 521 tcp_stream_is_thin(tp) &&
519 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { 522 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
520 icsk->icsk_backoff = 0; 523 icsk->icsk_backoff = 0;
@@ -570,9 +573,11 @@ out:
570 sk_mem_reclaim(sk); 573 sk_mem_reclaim(sk);
571} 574}
572 575
573static void tcp_write_timer(unsigned long data) 576static void tcp_write_timer(struct timer_list *t)
574{ 577{
575 struct sock *sk = (struct sock *)data; 578 struct inet_connection_sock *icsk =
579 from_timer(icsk, t, icsk_retransmit_timer);
580 struct sock *sk = &icsk->icsk_inet.sk;
576 581
577 bh_lock_sock(sk); 582 bh_lock_sock(sk);
578 if (!sock_owned_by_user(sk)) { 583 if (!sock_owned_by_user(sk)) {
@@ -607,9 +612,9 @@ void tcp_set_keepalive(struct sock *sk, int val)
607EXPORT_SYMBOL_GPL(tcp_set_keepalive); 612EXPORT_SYMBOL_GPL(tcp_set_keepalive);
608 613
609 614
610static void tcp_keepalive_timer (unsigned long data) 615static void tcp_keepalive_timer (struct timer_list *t)
611{ 616{
612 struct sock *sk = (struct sock *) data; 617 struct sock *sk = from_timer(sk, t, sk_timer);
613 struct inet_connection_sock *icsk = inet_csk(sk); 618 struct inet_connection_sock *icsk = inet_csk(sk);
614 struct tcp_sock *tp = tcp_sk(sk); 619 struct tcp_sock *tp = tcp_sk(sk);
615 u32 elapsed; 620 u32 elapsed;
@@ -647,7 +652,7 @@ static void tcp_keepalive_timer (unsigned long data)
647 elapsed = keepalive_time_when(tp); 652 elapsed = keepalive_time_when(tp);
648 653
649 /* It is alive without keepalive 8) */ 654 /* It is alive without keepalive 8) */
650 if (tp->packets_out || tcp_send_head(sk)) 655 if (tp->packets_out || !tcp_write_queue_empty(sk))
651 goto resched; 656 goto resched;
652 657
653 elapsed = keepalive_time_elapsed(tp); 658 elapsed = keepalive_time_elapsed(tp);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 218cfcc77650..ee113ff15fd0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
158 158
159static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) 159static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
160{ 160{
161 return min(tp->snd_ssthresh, tp->snd_cwnd-1); 161 return min(tp->snd_ssthresh, tp->snd_cwnd);
162} 162}
163 163
164static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) 164static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 02ec9a349303..e4ff25c947c5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1209,8 +1209,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
1209 if (likely(partial)) { 1209 if (likely(partial)) {
1210 up->forward_deficit += size; 1210 up->forward_deficit += size;
1211 size = up->forward_deficit; 1211 size = up->forward_deficit;
1212 if (size < (sk->sk_rcvbuf >> 2) && 1212 if (size < (sk->sk_rcvbuf >> 2))
1213 !skb_queue_empty(&up->reader_queue))
1214 return; 1213 return;
1215 } else { 1214 } else {
1216 size += up->forward_deficit; 1215 size += up->forward_deficit;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8a1c846d3df9..f49bd7897e95 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -152,11 +152,13 @@ static void ipv6_regen_rndid(struct inet6_dev *idev);
152static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 152static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
153 153
154static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); 154static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
155static int ipv6_count_addresses(struct inet6_dev *idev); 155static int ipv6_count_addresses(const struct inet6_dev *idev);
156static int ipv6_generate_stable_address(struct in6_addr *addr, 156static int ipv6_generate_stable_address(struct in6_addr *addr,
157 u8 dad_count, 157 u8 dad_count,
158 const struct inet6_dev *idev); 158 const struct inet6_dev *idev);
159 159
160#define IN6_ADDR_HSIZE_SHIFT 8
161#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
160/* 162/*
161 * Configured unicast address hash table 163 * Configured unicast address hash table
162 */ 164 */
@@ -186,14 +188,12 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
186static void addrconf_dad_work(struct work_struct *w); 188static void addrconf_dad_work(struct work_struct *w);
187static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); 189static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
188static void addrconf_dad_run(struct inet6_dev *idev); 190static void addrconf_dad_run(struct inet6_dev *idev);
189static void addrconf_rs_timer(unsigned long data); 191static void addrconf_rs_timer(struct timer_list *t);
190static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); 192static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
191static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); 193static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
192 194
193static void inet6_prefix_notify(int event, struct inet6_dev *idev, 195static void inet6_prefix_notify(int event, struct inet6_dev *idev,
194 struct prefix_info *pinfo); 196 struct prefix_info *pinfo);
195static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
196 struct net_device *dev);
197 197
198static struct ipv6_devconf ipv6_devconf __read_mostly = { 198static struct ipv6_devconf ipv6_devconf __read_mostly = {
199 .forwarding = 0, 199 .forwarding = 0,
@@ -231,7 +231,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
231 .proxy_ndp = 0, 231 .proxy_ndp = 0,
232 .accept_source_route = 0, /* we do not accept RH0 by default. */ 232 .accept_source_route = 0, /* we do not accept RH0 by default. */
233 .disable_ipv6 = 0, 233 .disable_ipv6 = 0,
234 .accept_dad = 1, 234 .accept_dad = 0,
235 .suppress_frag_ndisc = 1, 235 .suppress_frag_ndisc = 1,
236 .accept_ra_mtu = 1, 236 .accept_ra_mtu = 1,
237 .stable_secret = { 237 .stable_secret = {
@@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
303 .disable_policy = 0, 303 .disable_policy = 0,
304}; 304};
305 305
306/* Check if a valid qdisc is available */ 306/* Check if link is ready: is it up and is a valid qdisc available */
307static inline bool addrconf_qdisc_ok(const struct net_device *dev) 307static inline bool addrconf_link_ready(const struct net_device *dev)
308{ 308{
309 return !qdisc_tx_is_noop(dev); 309 return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
310} 310}
311 311
312static void addrconf_del_rs_timer(struct inet6_dev *idev) 312static void addrconf_del_rs_timer(struct inet6_dev *idev)
@@ -388,8 +388,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
388 rwlock_init(&ndev->lock); 388 rwlock_init(&ndev->lock);
389 ndev->dev = dev; 389 ndev->dev = dev;
390 INIT_LIST_HEAD(&ndev->addr_list); 390 INIT_LIST_HEAD(&ndev->addr_list);
391 setup_timer(&ndev->rs_timer, addrconf_rs_timer, 391 timer_setup(&ndev->rs_timer, addrconf_rs_timer, 0);
392 (unsigned long)ndev);
393 memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); 392 memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
394 393
395 if (ndev->cnf.stable_secret.initialized) 394 if (ndev->cnf.stable_secret.initialized)
@@ -451,7 +450,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
451 450
452 ndev->token = in6addr_any; 451 ndev->token = in6addr_any;
453 452
454 if (netif_running(dev) && addrconf_qdisc_ok(dev)) 453 if (netif_running(dev) && addrconf_link_ready(dev))
455 ndev->if_flags |= IF_READY; 454 ndev->if_flags |= IF_READY;
456 455
457 ipv6_mc_init_dev(ndev); 456 ipv6_mc_init_dev(ndev);
@@ -616,23 +615,23 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
616{ 615{
617 struct net *net = sock_net(in_skb->sk); 616 struct net *net = sock_net(in_skb->sk);
618 struct nlattr *tb[NETCONFA_MAX+1]; 617 struct nlattr *tb[NETCONFA_MAX+1];
618 struct inet6_dev *in6_dev = NULL;
619 struct net_device *dev = NULL;
619 struct netconfmsg *ncm; 620 struct netconfmsg *ncm;
620 struct sk_buff *skb; 621 struct sk_buff *skb;
621 struct ipv6_devconf *devconf; 622 struct ipv6_devconf *devconf;
622 struct inet6_dev *in6_dev;
623 struct net_device *dev;
624 int ifindex; 623 int ifindex;
625 int err; 624 int err;
626 625
627 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 626 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
628 devconf_ipv6_policy, extack); 627 devconf_ipv6_policy, extack);
629 if (err < 0) 628 if (err < 0)
630 goto errout; 629 return err;
631 630
632 err = -EINVAL;
633 if (!tb[NETCONFA_IFINDEX]) 631 if (!tb[NETCONFA_IFINDEX])
634 goto errout; 632 return -EINVAL;
635 633
634 err = -EINVAL;
636 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); 635 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
637 switch (ifindex) { 636 switch (ifindex) {
638 case NETCONFA_IFINDEX_ALL: 637 case NETCONFA_IFINDEX_ALL:
@@ -642,10 +641,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
642 devconf = net->ipv6.devconf_dflt; 641 devconf = net->ipv6.devconf_dflt;
643 break; 642 break;
644 default: 643 default:
645 dev = __dev_get_by_index(net, ifindex); 644 dev = dev_get_by_index(net, ifindex);
646 if (!dev) 645 if (!dev)
647 goto errout; 646 return -EINVAL;
648 in6_dev = __in6_dev_get(dev); 647 in6_dev = in6_dev_get(dev);
649 if (!in6_dev) 648 if (!in6_dev)
650 goto errout; 649 goto errout;
651 devconf = &in6_dev->cnf; 650 devconf = &in6_dev->cnf;
@@ -653,7 +652,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
653 } 652 }
654 653
655 err = -ENOBUFS; 654 err = -ENOBUFS;
656 skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC); 655 skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
657 if (!skb) 656 if (!skb)
658 goto errout; 657 goto errout;
659 658
@@ -669,6 +668,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
669 } 668 }
670 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 669 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
671errout: 670errout:
671 if (in6_dev)
672 in6_dev_put(in6_dev);
673 if (dev)
674 dev_put(dev);
672 return err; 675 return err;
673} 676}
674 677
@@ -945,12 +948,50 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
945 break; 948 break;
946 } 949 }
947 950
948 list_add_tail(&ifp->if_list, p); 951 list_add_tail_rcu(&ifp->if_list, p);
949} 952}
950 953
951static u32 inet6_addr_hash(const struct in6_addr *addr) 954static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
952{ 955{
953 return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); 956 u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
957
958 return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
959}
960
961static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
962 struct net_device *dev, unsigned int hash)
963{
964 struct inet6_ifaddr *ifp;
965
966 hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
967 if (!net_eq(dev_net(ifp->idev->dev), net))
968 continue;
969 if (ipv6_addr_equal(&ifp->addr, addr)) {
970 if (!dev || ifp->idev->dev == dev)
971 return true;
972 }
973 }
974 return false;
975}
976
977static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
978{
979 unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
980 int err = 0;
981
982 spin_lock(&addrconf_hash_lock);
983
984 /* Ignore adding duplicate addresses on an interface */
985 if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
986 ADBG("ipv6_add_addr: already assigned\n");
987 err = -EEXIST;
988 } else {
989 hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
990 }
991
992 spin_unlock(&addrconf_hash_lock);
993
994 return err;
954} 995}
955 996
956/* On success it returns ifp with increased reference count */ 997/* On success it returns ifp with increased reference count */
@@ -958,13 +999,13 @@ static u32 inet6_addr_hash(const struct in6_addr *addr)
958static struct inet6_ifaddr * 999static struct inet6_ifaddr *
959ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, 1000ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
960 const struct in6_addr *peer_addr, int pfxlen, 1001 const struct in6_addr *peer_addr, int pfxlen,
961 int scope, u32 flags, u32 valid_lft, u32 prefered_lft) 1002 int scope, u32 flags, u32 valid_lft, u32 prefered_lft,
1003 bool can_block, struct netlink_ext_ack *extack)
962{ 1004{
1005 gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
963 struct net *net = dev_net(idev->dev); 1006 struct net *net = dev_net(idev->dev);
964 struct inet6_ifaddr *ifa = NULL; 1007 struct inet6_ifaddr *ifa = NULL;
965 struct rt6_info *rt; 1008 struct rt6_info *rt = NULL;
966 struct in6_validator_info i6vi;
967 unsigned int hash;
968 int err = 0; 1009 int err = 0;
969 int addr_type = ipv6_addr_type(addr); 1010 int addr_type = ipv6_addr_type(addr);
970 1011
@@ -974,42 +1015,33 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
974 addr_type & IPV6_ADDR_LOOPBACK)) 1015 addr_type & IPV6_ADDR_LOOPBACK))
975 return ERR_PTR(-EADDRNOTAVAIL); 1016 return ERR_PTR(-EADDRNOTAVAIL);
976 1017
977 rcu_read_lock_bh();
978
979 in6_dev_hold(idev);
980
981 if (idev->dead) { 1018 if (idev->dead) {
982 err = -ENODEV; /*XXX*/ 1019 err = -ENODEV; /*XXX*/
983 goto out2; 1020 goto out;
984 } 1021 }
985 1022
986 if (idev->cnf.disable_ipv6) { 1023 if (idev->cnf.disable_ipv6) {
987 err = -EACCES; 1024 err = -EACCES;
988 goto out2;
989 }
990
991 i6vi.i6vi_addr = *addr;
992 i6vi.i6vi_dev = idev;
993 rcu_read_unlock_bh();
994
995 err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
996
997 rcu_read_lock_bh();
998 err = notifier_to_errno(err);
999 if (err)
1000 goto out2;
1001
1002 spin_lock(&addrconf_hash_lock);
1003
1004 /* Ignore adding duplicate addresses on an interface */
1005 if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
1006 ADBG("ipv6_add_addr: already assigned\n");
1007 err = -EEXIST;
1008 goto out; 1025 goto out;
1009 } 1026 }
1010 1027
1011 ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); 1028 /* validator notifier needs to be blocking;
1029 * do not call in atomic context
1030 */
1031 if (can_block) {
1032 struct in6_validator_info i6vi = {
1033 .i6vi_addr = *addr,
1034 .i6vi_dev = idev,
1035 .extack = extack,
1036 };
1037
1038 err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
1039 err = notifier_to_errno(err);
1040 if (err < 0)
1041 goto out;
1042 }
1012 1043
1044 ifa = kzalloc(sizeof(*ifa), gfp_flags);
1013 if (!ifa) { 1045 if (!ifa) {
1014 ADBG("ipv6_add_addr: malloc failed\n"); 1046 ADBG("ipv6_add_addr: malloc failed\n");
1015 err = -ENOBUFS; 1047 err = -ENOBUFS;
@@ -1019,6 +1051,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
1019 rt = addrconf_dst_alloc(idev, addr, false); 1051 rt = addrconf_dst_alloc(idev, addr, false);
1020 if (IS_ERR(rt)) { 1052 if (IS_ERR(rt)) {
1021 err = PTR_ERR(rt); 1053 err = PTR_ERR(rt);
1054 rt = NULL;
1022 goto out; 1055 goto out;
1023 } 1056 }
1024 1057
@@ -1049,16 +1082,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
1049 ifa->rt = rt; 1082 ifa->rt = rt;
1050 1083
1051 ifa->idev = idev; 1084 ifa->idev = idev;
1085 in6_dev_hold(idev);
1086
1052 /* For caller */ 1087 /* For caller */
1053 refcount_set(&ifa->refcnt, 1); 1088 refcount_set(&ifa->refcnt, 1);
1054 1089
1055 /* Add to big hash table */ 1090 rcu_read_lock_bh();
1056 hash = inet6_addr_hash(addr);
1057 1091
1058 hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); 1092 err = ipv6_add_addr_hash(idev->dev, ifa);
1059 spin_unlock(&addrconf_hash_lock); 1093 if (err < 0) {
1094 rcu_read_unlock_bh();
1095 goto out;
1096 }
1060 1097
1061 write_lock(&idev->lock); 1098 write_lock(&idev->lock);
1099
1062 /* Add to inet6_dev unicast addr list. */ 1100 /* Add to inet6_dev unicast addr list. */
1063 ipv6_link_dev_addr(idev, ifa); 1101 ipv6_link_dev_addr(idev, ifa);
1064 1102
@@ -1069,21 +1107,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
1069 1107
1070 in6_ifa_hold(ifa); 1108 in6_ifa_hold(ifa);
1071 write_unlock(&idev->lock); 1109 write_unlock(&idev->lock);
1072out2: 1110
1073 rcu_read_unlock_bh(); 1111 rcu_read_unlock_bh();
1074 1112
1075 if (likely(err == 0)) 1113 inet6addr_notifier_call_chain(NETDEV_UP, ifa);
1076 inet6addr_notifier_call_chain(NETDEV_UP, ifa); 1114out:
1077 else { 1115 if (unlikely(err < 0)) {
1078 kfree(ifa); 1116 if (rt)
1079 in6_dev_put(idev); 1117 ip6_rt_put(rt);
1118 if (ifa) {
1119 if (ifa->idev)
1120 in6_dev_put(ifa->idev);
1121 kfree(ifa);
1122 }
1080 ifa = ERR_PTR(err); 1123 ifa = ERR_PTR(err);
1081 } 1124 }
1082 1125
1083 return ifa; 1126 return ifa;
1084out:
1085 spin_unlock(&addrconf_hash_lock);
1086 goto out2;
1087} 1127}
1088 1128
1089enum cleanup_prefix_rt_t { 1129enum cleanup_prefix_rt_t {
@@ -1204,7 +1244,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
1204 if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE)) 1244 if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
1205 action = check_cleanup_prefix_route(ifp, &expires); 1245 action = check_cleanup_prefix_route(ifp, &expires);
1206 1246
1207 list_del_init(&ifp->if_list); 1247 list_del_rcu(&ifp->if_list);
1208 __in6_ifa_put(ifp); 1248 __in6_ifa_put(ifp);
1209 1249
1210 write_unlock_bh(&ifp->idev->lock); 1250 write_unlock_bh(&ifp->idev->lock);
@@ -1226,7 +1266,9 @@ out:
1226 in6_ifa_put(ifp); 1266 in6_ifa_put(ifp);
1227} 1267}
1228 1268
1229static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) 1269static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
1270 struct inet6_ifaddr *ift,
1271 bool block)
1230{ 1272{
1231 struct inet6_dev *idev = ifp->idev; 1273 struct inet6_dev *idev = ifp->idev;
1232 struct in6_addr addr, *tmpaddr; 1274 struct in6_addr addr, *tmpaddr;
@@ -1330,7 +1372,7 @@ retry:
1330 1372
1331 ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen, 1373 ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
1332 ipv6_addr_scope(&addr), addr_flags, 1374 ipv6_addr_scope(&addr), addr_flags,
1333 tmp_valid_lft, tmp_prefered_lft); 1375 tmp_valid_lft, tmp_prefered_lft, block, NULL);
1334 if (IS_ERR(ift)) { 1376 if (IS_ERR(ift)) {
1335 in6_ifa_put(ifp); 1377 in6_ifa_put(ifp);
1336 in6_dev_put(idev); 1378 in6_dev_put(idev);
@@ -1558,8 +1600,7 @@ static int __ipv6_dev_get_saddr(struct net *net,
1558{ 1600{
1559 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx]; 1601 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
1560 1602
1561 read_lock_bh(&idev->lock); 1603 list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
1562 list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
1563 int i; 1604 int i;
1564 1605
1565 /* 1606 /*
@@ -1609,11 +1650,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
1609 } 1650 }
1610 break; 1651 break;
1611 } else if (minihiscore < miniscore) { 1652 } else if (minihiscore < miniscore) {
1612 if (hiscore->ifa)
1613 in6_ifa_put(hiscore->ifa);
1614
1615 in6_ifa_hold(score->ifa);
1616
1617 swap(hiscore, score); 1653 swap(hiscore, score);
1618 hiscore_idx = 1 - hiscore_idx; 1654 hiscore_idx = 1 - hiscore_idx;
1619 1655
@@ -1625,7 +1661,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
1625 } 1661 }
1626 } 1662 }
1627out: 1663out:
1628 read_unlock_bh(&idev->lock);
1629 return hiscore_idx; 1664 return hiscore_idx;
1630} 1665}
1631 1666
@@ -1662,6 +1697,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
1662 int dst_type; 1697 int dst_type;
1663 bool use_oif_addr = false; 1698 bool use_oif_addr = false;
1664 int hiscore_idx = 0; 1699 int hiscore_idx = 0;
1700 int ret = 0;
1665 1701
1666 dst_type = __ipv6_addr_type(daddr); 1702 dst_type = __ipv6_addr_type(daddr);
1667 dst.addr = daddr; 1703 dst.addr = daddr;
@@ -1737,15 +1773,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
1737 } 1773 }
1738 1774
1739out: 1775out:
1740 rcu_read_unlock();
1741
1742 hiscore = &scores[hiscore_idx]; 1776 hiscore = &scores[hiscore_idx];
1743 if (!hiscore->ifa) 1777 if (!hiscore->ifa)
1744 return -EADDRNOTAVAIL; 1778 ret = -EADDRNOTAVAIL;
1779 else
1780 *saddr = hiscore->ifa->addr;
1745 1781
1746 *saddr = hiscore->ifa->addr; 1782 rcu_read_unlock();
1747 in6_ifa_put(hiscore->ifa); 1783 return ret;
1748 return 0;
1749} 1784}
1750EXPORT_SYMBOL(ipv6_dev_get_saddr); 1785EXPORT_SYMBOL(ipv6_dev_get_saddr);
1751 1786
@@ -1785,15 +1820,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
1785 return err; 1820 return err;
1786} 1821}
1787 1822
1788static int ipv6_count_addresses(struct inet6_dev *idev) 1823static int ipv6_count_addresses(const struct inet6_dev *idev)
1789{ 1824{
1825 const struct inet6_ifaddr *ifp;
1790 int cnt = 0; 1826 int cnt = 0;
1791 struct inet6_ifaddr *ifp;
1792 1827
1793 read_lock_bh(&idev->lock); 1828 rcu_read_lock();
1794 list_for_each_entry(ifp, &idev->addr_list, if_list) 1829 list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
1795 cnt++; 1830 cnt++;
1796 read_unlock_bh(&idev->lock); 1831 rcu_read_unlock();
1797 return cnt; 1832 return cnt;
1798} 1833}
1799 1834
@@ -1808,11 +1843,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
1808 const struct net_device *dev, int strict, 1843 const struct net_device *dev, int strict,
1809 u32 banned_flags) 1844 u32 banned_flags)
1810{ 1845{
1846 unsigned int hash = inet6_addr_hash(net, addr);
1811 struct inet6_ifaddr *ifp; 1847 struct inet6_ifaddr *ifp;
1812 unsigned int hash = inet6_addr_hash(addr);
1813 u32 ifp_flags; 1848 u32 ifp_flags;
1814 1849
1815 rcu_read_lock_bh(); 1850 rcu_read_lock();
1816 hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { 1851 hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
1817 if (!net_eq(dev_net(ifp->idev->dev), net)) 1852 if (!net_eq(dev_net(ifp->idev->dev), net))
1818 continue; 1853 continue;
@@ -1826,32 +1861,16 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
1826 !(ifp_flags&banned_flags) && 1861 !(ifp_flags&banned_flags) &&
1827 (!dev || ifp->idev->dev == dev || 1862 (!dev || ifp->idev->dev == dev ||
1828 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { 1863 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
1829 rcu_read_unlock_bh(); 1864 rcu_read_unlock();
1830 return 1; 1865 return 1;
1831 } 1866 }
1832 } 1867 }
1833 1868
1834 rcu_read_unlock_bh(); 1869 rcu_read_unlock();
1835 return 0; 1870 return 0;
1836} 1871}
1837EXPORT_SYMBOL(ipv6_chk_addr_and_flags); 1872EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
1838 1873
1839static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1840 struct net_device *dev)
1841{
1842 unsigned int hash = inet6_addr_hash(addr);
1843 struct inet6_ifaddr *ifp;
1844
1845 hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
1846 if (!net_eq(dev_net(ifp->idev->dev), net))
1847 continue;
1848 if (ipv6_addr_equal(&ifp->addr, addr)) {
1849 if (!dev || ifp->idev->dev == dev)
1850 return true;
1851 }
1852 }
1853 return false;
1854}
1855 1874
1856/* Compares an address/prefix_len with addresses on device @dev. 1875/* Compares an address/prefix_len with addresses on device @dev.
1857 * If one is found it returns true. 1876 * If one is found it returns true.
@@ -1859,20 +1878,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1859bool ipv6_chk_custom_prefix(const struct in6_addr *addr, 1878bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
1860 const unsigned int prefix_len, struct net_device *dev) 1879 const unsigned int prefix_len, struct net_device *dev)
1861{ 1880{
1862 struct inet6_dev *idev; 1881 const struct inet6_ifaddr *ifa;
1863 struct inet6_ifaddr *ifa; 1882 const struct inet6_dev *idev;
1864 bool ret = false; 1883 bool ret = false;
1865 1884
1866 rcu_read_lock(); 1885 rcu_read_lock();
1867 idev = __in6_dev_get(dev); 1886 idev = __in6_dev_get(dev);
1868 if (idev) { 1887 if (idev) {
1869 read_lock_bh(&idev->lock); 1888 list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
1870 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1871 ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); 1889 ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
1872 if (ret) 1890 if (ret)
1873 break; 1891 break;
1874 } 1892 }
1875 read_unlock_bh(&idev->lock);
1876 } 1893 }
1877 rcu_read_unlock(); 1894 rcu_read_unlock();
1878 1895
@@ -1882,22 +1899,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix);
1882 1899
1883int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) 1900int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
1884{ 1901{
1885 struct inet6_dev *idev; 1902 const struct inet6_ifaddr *ifa;
1886 struct inet6_ifaddr *ifa; 1903 const struct inet6_dev *idev;
1887 int onlink; 1904 int onlink;
1888 1905
1889 onlink = 0; 1906 onlink = 0;
1890 rcu_read_lock(); 1907 rcu_read_lock();
1891 idev = __in6_dev_get(dev); 1908 idev = __in6_dev_get(dev);
1892 if (idev) { 1909 if (idev) {
1893 read_lock_bh(&idev->lock); 1910 list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
1894 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1895 onlink = ipv6_prefix_equal(addr, &ifa->addr, 1911 onlink = ipv6_prefix_equal(addr, &ifa->addr,
1896 ifa->prefix_len); 1912 ifa->prefix_len);
1897 if (onlink) 1913 if (onlink)
1898 break; 1914 break;
1899 } 1915 }
1900 read_unlock_bh(&idev->lock);
1901 } 1916 }
1902 rcu_read_unlock(); 1917 rcu_read_unlock();
1903 return onlink; 1918 return onlink;
@@ -1907,11 +1922,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
1907struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, 1922struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
1908 struct net_device *dev, int strict) 1923 struct net_device *dev, int strict)
1909{ 1924{
1925 unsigned int hash = inet6_addr_hash(net, addr);
1910 struct inet6_ifaddr *ifp, *result = NULL; 1926 struct inet6_ifaddr *ifp, *result = NULL;
1911 unsigned int hash = inet6_addr_hash(addr);
1912 1927
1913 rcu_read_lock_bh(); 1928 rcu_read_lock();
1914 hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { 1929 hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
1915 if (!net_eq(dev_net(ifp->idev->dev), net)) 1930 if (!net_eq(dev_net(ifp->idev->dev), net))
1916 continue; 1931 continue;
1917 if (ipv6_addr_equal(&ifp->addr, addr)) { 1932 if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1923,7 +1938,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
1923 } 1938 }
1924 } 1939 }
1925 } 1940 }
1926 rcu_read_unlock_bh(); 1941 rcu_read_unlock();
1927 1942
1928 return result; 1943 return result;
1929} 1944}
@@ -1942,7 +1957,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
1942 if (ifpub) { 1957 if (ifpub) {
1943 in6_ifa_hold(ifpub); 1958 in6_ifa_hold(ifpub);
1944 spin_unlock_bh(&ifp->lock); 1959 spin_unlock_bh(&ifp->lock);
1945 ipv6_create_tempaddr(ifpub, ifp); 1960 ipv6_create_tempaddr(ifpub, ifp, true);
1946 in6_ifa_put(ifpub); 1961 in6_ifa_put(ifpub);
1947 } else { 1962 } else {
1948 spin_unlock_bh(&ifp->lock); 1963 spin_unlock_bh(&ifp->lock);
@@ -1975,7 +1990,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
1975 return err; 1990 return err;
1976} 1991}
1977 1992
1978void addrconf_dad_failure(struct inet6_ifaddr *ifp) 1993void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
1979{ 1994{
1980 struct inet6_dev *idev = ifp->idev; 1995 struct inet6_dev *idev = ifp->idev;
1981 struct net *net = dev_net(ifp->idev->dev); 1996 struct net *net = dev_net(ifp->idev->dev);
@@ -1985,8 +2000,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1985 return; 2000 return;
1986 } 2001 }
1987 2002
1988 net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", 2003 net_info_ratelimited("%s: IPv6 duplicate address %pI6c used by %pM detected!\n",
1989 ifp->idev->dev->name, &ifp->addr); 2004 ifp->idev->dev->name, &ifp->addr, eth_hdr(skb)->h_source);
1990 2005
1991 spin_lock_bh(&ifp->lock); 2006 spin_lock_bh(&ifp->lock);
1992 2007
@@ -2025,7 +2040,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
2025 2040
2026 ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen, 2041 ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
2027 scope, flags, valid_lft, 2042 scope, flags, valid_lft,
2028 preferred_lft); 2043 preferred_lft, false, NULL);
2029 if (IS_ERR(ifp2)) 2044 if (IS_ERR(ifp2))
2030 goto lock_errdad; 2045 goto lock_errdad;
2031 2046
@@ -2321,24 +2336,24 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
2321 if (!table) 2336 if (!table)
2322 return NULL; 2337 return NULL;
2323 2338
2324 read_lock_bh(&table->tb6_lock); 2339 rcu_read_lock();
2325 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); 2340 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
2326 if (!fn) 2341 if (!fn)
2327 goto out; 2342 goto out;
2328 2343
2329 noflags |= RTF_CACHE; 2344 for_each_fib6_node_rt_rcu(fn) {
2330 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2331 if (rt->dst.dev->ifindex != dev->ifindex) 2345 if (rt->dst.dev->ifindex != dev->ifindex)
2332 continue; 2346 continue;
2333 if ((rt->rt6i_flags & flags) != flags) 2347 if ((rt->rt6i_flags & flags) != flags)
2334 continue; 2348 continue;
2335 if ((rt->rt6i_flags & noflags) != 0) 2349 if ((rt->rt6i_flags & noflags) != 0)
2336 continue; 2350 continue;
2337 dst_hold(&rt->dst); 2351 if (!dst_hold_safe(&rt->dst))
2352 rt = NULL;
2338 break; 2353 break;
2339 } 2354 }
2340out: 2355out:
2341 read_unlock_bh(&table->tb6_lock); 2356 rcu_read_unlock();
2342 return rt; 2357 return rt;
2343} 2358}
2344 2359
@@ -2442,7 +2457,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
2442 * no temporary address currently exists. 2457 * no temporary address currently exists.
2443 */ 2458 */
2444 read_unlock_bh(&idev->lock); 2459 read_unlock_bh(&idev->lock);
2445 ipv6_create_tempaddr(ifp, NULL); 2460 ipv6_create_tempaddr(ifp, NULL, false);
2446 } else { 2461 } else {
2447 read_unlock_bh(&idev->lock); 2462 read_unlock_bh(&idev->lock);
2448 } 2463 }
@@ -2483,7 +2498,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
2483 pinfo->prefix_len, 2498 pinfo->prefix_len,
2484 addr_type&IPV6_ADDR_SCOPE_MASK, 2499 addr_type&IPV6_ADDR_SCOPE_MASK,
2485 addr_flags, valid_lft, 2500 addr_flags, valid_lft,
2486 prefered_lft); 2501 prefered_lft, false, NULL);
2487 2502
2488 if (IS_ERR_OR_NULL(ifp)) 2503 if (IS_ERR_OR_NULL(ifp))
2489 return -1; 2504 return -1;
@@ -2793,7 +2808,8 @@ static int inet6_addr_add(struct net *net, int ifindex,
2793 const struct in6_addr *pfx, 2808 const struct in6_addr *pfx,
2794 const struct in6_addr *peer_pfx, 2809 const struct in6_addr *peer_pfx,
2795 unsigned int plen, __u32 ifa_flags, 2810 unsigned int plen, __u32 ifa_flags,
2796 __u32 prefered_lft, __u32 valid_lft) 2811 __u32 prefered_lft, __u32 valid_lft,
2812 struct netlink_ext_ack *extack)
2797{ 2813{
2798 struct inet6_ifaddr *ifp; 2814 struct inet6_ifaddr *ifp;
2799 struct inet6_dev *idev; 2815 struct inet6_dev *idev;
@@ -2852,7 +2868,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
2852 } 2868 }
2853 2869
2854 ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, 2870 ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
2855 valid_lft, prefered_lft); 2871 valid_lft, prefered_lft, true, extack);
2856 2872
2857 if (!IS_ERR(ifp)) { 2873 if (!IS_ERR(ifp)) {
2858 if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { 2874 if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -2937,7 +2953,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
2937 rtnl_lock(); 2953 rtnl_lock();
2938 err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL, 2954 err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
2939 ireq.ifr6_prefixlen, IFA_F_PERMANENT, 2955 ireq.ifr6_prefixlen, IFA_F_PERMANENT,
2940 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 2956 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL);
2941 rtnl_unlock(); 2957 rtnl_unlock();
2942 return err; 2958 return err;
2943} 2959}
@@ -2967,7 +2983,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
2967 2983
2968 ifp = ipv6_add_addr(idev, addr, NULL, plen, 2984 ifp = ipv6_add_addr(idev, addr, NULL, plen,
2969 scope, IFA_F_PERMANENT, 2985 scope, IFA_F_PERMANENT,
2970 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 2986 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME,
2987 true, NULL);
2971 if (!IS_ERR(ifp)) { 2988 if (!IS_ERR(ifp)) {
2972 spin_lock_bh(&ifp->lock); 2989 spin_lock_bh(&ifp->lock);
2973 ifp->flags &= ~IFA_F_TENTATIVE; 2990 ifp->flags &= ~IFA_F_TENTATIVE;
@@ -3067,7 +3084,7 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
3067#endif 3084#endif
3068 3085
3069 ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 3086 ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
3070 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 3087 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
3071 if (!IS_ERR(ifp)) { 3088 if (!IS_ERR(ifp)) {
3072 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); 3089 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
3073 addrconf_dad_start(ifp); 3090 addrconf_dad_start(ifp);
@@ -3297,7 +3314,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
3297 struct rt6_info *rt, *prev; 3314 struct rt6_info *rt, *prev;
3298 3315
3299 rt = addrconf_dst_alloc(idev, &ifp->addr, false); 3316 rt = addrconf_dst_alloc(idev, &ifp->addr, false);
3300 if (unlikely(IS_ERR(rt))) 3317 if (IS_ERR(rt))
3301 return PTR_ERR(rt); 3318 return PTR_ERR(rt);
3302 3319
3303 /* ifp->rt can be accessed outside of rtnl */ 3320 /* ifp->rt can be accessed outside of rtnl */
@@ -3404,7 +3421,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3404 /* restore routes for permanent addresses */ 3421 /* restore routes for permanent addresses */
3405 addrconf_permanent_addr(dev); 3422 addrconf_permanent_addr(dev);
3406 3423
3407 if (!addrconf_qdisc_ok(dev)) { 3424 if (!addrconf_link_ready(dev)) {
3408 /* device is not ready yet. */ 3425 /* device is not ready yet. */
3409 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", 3426 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
3410 dev->name); 3427 dev->name);
@@ -3419,7 +3436,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3419 run_pending = 1; 3436 run_pending = 1;
3420 } 3437 }
3421 } else if (event == NETDEV_CHANGE) { 3438 } else if (event == NETDEV_CHANGE) {
3422 if (!addrconf_qdisc_ok(dev)) { 3439 if (!addrconf_link_ready(dev)) {
3423 /* device is still not ready. */ 3440 /* device is still not ready. */
3424 break; 3441 break;
3425 } 3442 }
@@ -3563,7 +3580,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3563 struct net *net = dev_net(dev); 3580 struct net *net = dev_net(dev);
3564 struct inet6_dev *idev; 3581 struct inet6_dev *idev;
3565 struct inet6_ifaddr *ifa, *tmp; 3582 struct inet6_ifaddr *ifa, *tmp;
3566 struct list_head del_list;
3567 int _keep_addr; 3583 int _keep_addr;
3568 bool keep_addr; 3584 bool keep_addr;
3569 int state, i; 3585 int state, i;
@@ -3655,7 +3671,6 @@ restart:
3655 */ 3671 */
3656 keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); 3672 keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
3657 3673
3658 INIT_LIST_HEAD(&del_list);
3659 list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { 3674 list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
3660 struct rt6_info *rt = NULL; 3675 struct rt6_info *rt = NULL;
3661 bool keep; 3676 bool keep;
@@ -3664,8 +3679,6 @@ restart:
3664 3679
3665 keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && 3680 keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
3666 !addr_is_local(&ifa->addr); 3681 !addr_is_local(&ifa->addr);
3667 if (!keep)
3668 list_move(&ifa->if_list, &del_list);
3669 3682
3670 write_unlock_bh(&idev->lock); 3683 write_unlock_bh(&idev->lock);
3671 spin_lock_bh(&ifa->lock); 3684 spin_lock_bh(&ifa->lock);
@@ -3699,19 +3712,14 @@ restart:
3699 } 3712 }
3700 3713
3701 write_lock_bh(&idev->lock); 3714 write_lock_bh(&idev->lock);
3715 if (!keep) {
3716 list_del_rcu(&ifa->if_list);
3717 in6_ifa_put(ifa);
3718 }
3702 } 3719 }
3703 3720
3704 write_unlock_bh(&idev->lock); 3721 write_unlock_bh(&idev->lock);
3705 3722
3706 /* now clean up addresses to be removed */
3707 while (!list_empty(&del_list)) {
3708 ifa = list_first_entry(&del_list,
3709 struct inet6_ifaddr, if_list);
3710 list_del(&ifa->if_list);
3711
3712 in6_ifa_put(ifa);
3713 }
3714
3715 /* Step 5: Discard anycast and multicast list */ 3723 /* Step 5: Discard anycast and multicast list */
3716 if (how) { 3724 if (how) {
3717 ipv6_ac_destroy_dev(idev); 3725 ipv6_ac_destroy_dev(idev);
@@ -3732,9 +3740,9 @@ restart:
3732 return 0; 3740 return 0;
3733} 3741}
3734 3742
3735static void addrconf_rs_timer(unsigned long data) 3743static void addrconf_rs_timer(struct timer_list *t)
3736{ 3744{
3737 struct inet6_dev *idev = (struct inet6_dev *)data; 3745 struct inet6_dev *idev = from_timer(idev, t, rs_timer);
3738 struct net_device *dev = idev->dev; 3746 struct net_device *dev = idev->dev;
3739 struct in6_addr lladdr; 3747 struct in6_addr lladdr;
3740 3748
@@ -4093,9 +4101,9 @@ struct if6_iter_state {
4093 4101
4094static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) 4102static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
4095{ 4103{
4096 struct inet6_ifaddr *ifa = NULL;
4097 struct if6_iter_state *state = seq->private; 4104 struct if6_iter_state *state = seq->private;
4098 struct net *net = seq_file_net(seq); 4105 struct net *net = seq_file_net(seq);
4106 struct inet6_ifaddr *ifa = NULL;
4099 int p = 0; 4107 int p = 0;
4100 4108
4101 /* initial bucket if pos is 0 */ 4109 /* initial bucket if pos is 0 */
@@ -4105,7 +4113,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
4105 } 4113 }
4106 4114
4107 for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { 4115 for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
4108 hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], 4116 hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
4109 addr_lst) { 4117 addr_lst) {
4110 if (!net_eq(dev_net(ifa->idev->dev), net)) 4118 if (!net_eq(dev_net(ifa->idev->dev), net))
4111 continue; 4119 continue;
@@ -4131,7 +4139,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
4131 struct if6_iter_state *state = seq->private; 4139 struct if6_iter_state *state = seq->private;
4132 struct net *net = seq_file_net(seq); 4140 struct net *net = seq_file_net(seq);
4133 4141
4134 hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) { 4142 hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
4135 if (!net_eq(dev_net(ifa->idev->dev), net)) 4143 if (!net_eq(dev_net(ifa->idev->dev), net))
4136 continue; 4144 continue;
4137 state->offset++; 4145 state->offset++;
@@ -4140,7 +4148,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
4140 4148
4141 while (++state->bucket < IN6_ADDR_HSIZE) { 4149 while (++state->bucket < IN6_ADDR_HSIZE) {
4142 state->offset = 0; 4150 state->offset = 0;
4143 hlist_for_each_entry_rcu_bh(ifa, 4151 hlist_for_each_entry_rcu(ifa,
4144 &inet6_addr_lst[state->bucket], addr_lst) { 4152 &inet6_addr_lst[state->bucket], addr_lst) {
4145 if (!net_eq(dev_net(ifa->idev->dev), net)) 4153 if (!net_eq(dev_net(ifa->idev->dev), net))
4146 continue; 4154 continue;
@@ -4153,9 +4161,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
4153} 4161}
4154 4162
4155static void *if6_seq_start(struct seq_file *seq, loff_t *pos) 4163static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
4156 __acquires(rcu_bh) 4164 __acquires(rcu)
4157{ 4165{
4158 rcu_read_lock_bh(); 4166 rcu_read_lock();
4159 return if6_get_first(seq, *pos); 4167 return if6_get_first(seq, *pos);
4160} 4168}
4161 4169
@@ -4169,9 +4177,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4169} 4177}
4170 4178
4171static void if6_seq_stop(struct seq_file *seq, void *v) 4179static void if6_seq_stop(struct seq_file *seq, void *v)
4172 __releases(rcu_bh) 4180 __releases(rcu)
4173{ 4181{
4174 rcu_read_unlock_bh(); 4182 rcu_read_unlock();
4175} 4183}
4176 4184
4177static int if6_seq_show(struct seq_file *seq, void *v) 4185static int if6_seq_show(struct seq_file *seq, void *v)
@@ -4240,12 +4248,12 @@ void if6_proc_exit(void)
4240/* Check if address is a home address configured on any interface. */ 4248/* Check if address is a home address configured on any interface. */
4241int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) 4249int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
4242{ 4250{
4243 int ret = 0; 4251 unsigned int hash = inet6_addr_hash(net, addr);
4244 struct inet6_ifaddr *ifp = NULL; 4252 struct inet6_ifaddr *ifp = NULL;
4245 unsigned int hash = inet6_addr_hash(addr); 4253 int ret = 0;
4246 4254
4247 rcu_read_lock_bh(); 4255 rcu_read_lock();
4248 hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { 4256 hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
4249 if (!net_eq(dev_net(ifp->idev->dev), net)) 4257 if (!net_eq(dev_net(ifp->idev->dev), net))
4250 continue; 4258 continue;
4251 if (ipv6_addr_equal(&ifp->addr, addr) && 4259 if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -4254,7 +4262,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
4254 break; 4262 break;
4255 } 4263 }
4256 } 4264 }
4257 rcu_read_unlock_bh(); 4265 rcu_read_unlock();
4258 return ret; 4266 return ret;
4259} 4267}
4260#endif 4268#endif
@@ -4344,7 +4352,7 @@ restart:
4344 spin_lock(&ifpub->lock); 4352 spin_lock(&ifpub->lock);
4345 ifpub->regen_count = 0; 4353 ifpub->regen_count = 0;
4346 spin_unlock(&ifpub->lock); 4354 spin_unlock(&ifpub->lock);
4347 ipv6_create_tempaddr(ifpub, ifp); 4355 ipv6_create_tempaddr(ifpub, ifp, true);
4348 in6_ifa_put(ifpub); 4356 in6_ifa_put(ifpub);
4349 in6_ifa_put(ifp); 4357 in6_ifa_put(ifp);
4350 goto restart; 4358 goto restart;
@@ -4580,7 +4588,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
4580 */ 4588 */
4581 return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, 4589 return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
4582 ifm->ifa_prefixlen, ifa_flags, 4590 ifm->ifa_prefixlen, ifa_flags,
4583 preferred_lft, valid_lft); 4591 preferred_lft, valid_lft, extack);
4584 } 4592 }
4585 4593
4586 if (nlh->nlmsg_flags & NLM_F_EXCL || 4594 if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -4907,17 +4915,15 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4907 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, 4915 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
4908 extack); 4916 extack);
4909 if (err < 0) 4917 if (err < 0)
4910 goto errout; 4918 return err;
4911 4919
4912 addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); 4920 addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
4913 if (!addr) { 4921 if (!addr)
4914 err = -EINVAL; 4922 return -EINVAL;
4915 goto errout;
4916 }
4917 4923
4918 ifm = nlmsg_data(nlh); 4924 ifm = nlmsg_data(nlh);
4919 if (ifm->ifa_index) 4925 if (ifm->ifa_index)
4920 dev = __dev_get_by_index(net, ifm->ifa_index); 4926 dev = dev_get_by_index(net, ifm->ifa_index);
4921 4927
4922 ifa = ipv6_get_ifaddr(net, addr, dev, 1); 4928 ifa = ipv6_get_ifaddr(net, addr, dev, 1);
4923 if (!ifa) { 4929 if (!ifa) {
@@ -4943,6 +4949,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4943errout_ifa: 4949errout_ifa:
4944 in6_ifa_put(ifa); 4950 in6_ifa_put(ifa);
4945errout: 4951errout:
4952 if (dev)
4953 dev_put(dev);
4946 return err; 4954 return err;
4947} 4955}
4948 4956
@@ -5050,6 +5058,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
5050 array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; 5058 array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
5051 array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; 5059 array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
5052 array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; 5060 array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
5061 array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
5053} 5062}
5054 5063
5055static inline size_t inet6_ifla6_size(void) 5064static inline size_t inet6_ifla6_size(void)
@@ -5899,10 +5908,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
5899 spin_lock(&ifa->lock); 5908 spin_lock(&ifa->lock);
5900 if (ifa->rt) { 5909 if (ifa->rt) {
5901 struct rt6_info *rt = ifa->rt; 5910 struct rt6_info *rt = ifa->rt;
5902 struct fib6_table *table = rt->rt6i_table;
5903 int cpu; 5911 int cpu;
5904 5912
5905 read_lock(&table->tb6_lock); 5913 rcu_read_lock();
5906 addrconf_set_nopolicy(ifa->rt, val); 5914 addrconf_set_nopolicy(ifa->rt, val);
5907 if (rt->rt6i_pcpu) { 5915 if (rt->rt6i_pcpu) {
5908 for_each_possible_cpu(cpu) { 5916 for_each_possible_cpu(cpu) {
@@ -5912,7 +5920,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
5912 addrconf_set_nopolicy(*rtp, val); 5920 addrconf_set_nopolicy(*rtp, val);
5913 } 5921 }
5914 } 5922 }
5915 read_unlock(&table->tb6_lock); 5923 rcu_read_unlock();
5916 } 5924 }
5917 spin_unlock(&ifa->lock); 5925 spin_unlock(&ifa->lock);
5918 } 5926 }
@@ -5978,6 +5986,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
5978} 5986}
5979 5987
5980static int minus_one = -1; 5988static int minus_one = -1;
5989static const int zero = 0;
5981static const int one = 1; 5990static const int one = 1;
5982static const int two_five_five = 255; 5991static const int two_five_five = 255;
5983 5992
@@ -6349,6 +6358,15 @@ static const struct ctl_table addrconf_sysctl[] = {
6349 .proc_handler = addrconf_sysctl_disable_policy, 6358 .proc_handler = addrconf_sysctl_disable_policy,
6350 }, 6359 },
6351 { 6360 {
6361 .procname = "ndisc_tclass",
6362 .data = &ipv6_devconf.ndisc_tclass,
6363 .maxlen = sizeof(int),
6364 .mode = 0644,
6365 .proc_handler = proc_dointvec_minmax,
6366 .extra1 = (void *)&zero,
6367 .extra2 = (void *)&two_five_five,
6368 },
6369 {
6352 /* sentinel */ 6370 /* sentinel */
6353 } 6371 }
6354}; 6372};
@@ -6586,13 +6604,13 @@ int __init addrconf_init(void)
6586 __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0); 6604 __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
6587 __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0); 6605 __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
6588 __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, 6606 __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
6589 inet6_dump_ifaddr, 0); 6607 inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
6590 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, 6608 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
6591 inet6_dump_ifmcaddr, 0); 6609 inet6_dump_ifmcaddr, 0);
6592 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, 6610 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
6593 inet6_dump_ifacaddr, 0); 6611 inet6_dump_ifacaddr, 0);
6594 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, 6612 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
6595 inet6_netconf_dump_devconf, 0); 6613 inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
6596 6614
6597 ipv6_addr_label_rtnl_register(); 6615 ipv6_addr_label_rtnl_register();
6598 6616
@@ -6619,9 +6637,9 @@ void addrconf_cleanup(void)
6619 unregister_pernet_subsys(&addrconf_ops); 6637 unregister_pernet_subsys(&addrconf_ops);
6620 ipv6_addr_label_cleanup(); 6638 ipv6_addr_label_cleanup();
6621 6639
6622 rtnl_lock(); 6640 rtnl_af_unregister(&inet6_ops);
6623 6641
6624 __rtnl_af_unregister(&inet6_ops); 6642 rtnl_lock();
6625 6643
6626 /* clean dev list */ 6644 /* clean dev list */
6627 for_each_netdev(&init_net, dev) { 6645 for_each_netdev(&init_net, dev) {
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 9e3488d50b15..32b564dfd02a 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -88,7 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
88EXPORT_SYMBOL(__ipv6_addr_type); 88EXPORT_SYMBOL(__ipv6_addr_type);
89 89
90static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); 90static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
91static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain); 91static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain);
92 92
93int register_inet6addr_notifier(struct notifier_block *nb) 93int register_inet6addr_notifier(struct notifier_block *nb)
94{ 94{
@@ -110,19 +110,20 @@ EXPORT_SYMBOL(inet6addr_notifier_call_chain);
110 110
111int register_inet6addr_validator_notifier(struct notifier_block *nb) 111int register_inet6addr_validator_notifier(struct notifier_block *nb)
112{ 112{
113 return atomic_notifier_chain_register(&inet6addr_validator_chain, nb); 113 return blocking_notifier_chain_register(&inet6addr_validator_chain, nb);
114} 114}
115EXPORT_SYMBOL(register_inet6addr_validator_notifier); 115EXPORT_SYMBOL(register_inet6addr_validator_notifier);
116 116
117int unregister_inet6addr_validator_notifier(struct notifier_block *nb) 117int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
118{ 118{
119 return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb); 119 return blocking_notifier_chain_unregister(&inet6addr_validator_chain,
120 nb);
120} 121}
121EXPORT_SYMBOL(unregister_inet6addr_validator_notifier); 122EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);
122 123
123int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) 124int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
124{ 125{
125 return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v); 126 return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v);
126} 127}
127EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); 128EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
128 129
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f664871feca6..00e1f8ee08f8 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -19,7 +19,6 @@
19#include <linux/if_addrlabel.h> 19#include <linux/if_addrlabel.h>
20#include <linux/netlink.h> 20#include <linux/netlink.h>
21#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
22#include <linux/refcount.h>
23 22
24#if 0 23#if 0
25#define ADDRLABEL(x...) printk(x) 24#define ADDRLABEL(x...) printk(x)
@@ -31,30 +30,15 @@
31 * Policy Table 30 * Policy Table
32 */ 31 */
33struct ip6addrlbl_entry { 32struct ip6addrlbl_entry {
34 possible_net_t lbl_net;
35 struct in6_addr prefix; 33 struct in6_addr prefix;
36 int prefixlen; 34 int prefixlen;
37 int ifindex; 35 int ifindex;
38 int addrtype; 36 int addrtype;
39 u32 label; 37 u32 label;
40 struct hlist_node list; 38 struct hlist_node list;
41 refcount_t refcnt;
42 struct rcu_head rcu; 39 struct rcu_head rcu;
43}; 40};
44 41
45static struct ip6addrlbl_table
46{
47 struct hlist_head head;
48 spinlock_t lock;
49 u32 seq;
50} ip6addrlbl_table;
51
52static inline
53struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
54{
55 return read_pnet(&lbl->lbl_net);
56}
57
58/* 42/*
59 * Default policy table (RFC6724 + extensions) 43 * Default policy table (RFC6724 + extensions)
60 * 44 *
@@ -126,36 +110,11 @@ static const __net_initconst struct ip6addrlbl_init_table
126 } 110 }
127}; 111};
128 112
129/* Object management */
130static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
131{
132 kfree(p);
133}
134
135static void ip6addrlbl_free_rcu(struct rcu_head *h)
136{
137 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
138}
139
140static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
141{
142 return refcount_inc_not_zero(&p->refcnt);
143}
144
145static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
146{
147 if (refcount_dec_and_test(&p->refcnt))
148 call_rcu(&p->rcu, ip6addrlbl_free_rcu);
149}
150
151/* Find label */ 113/* Find label */
152static bool __ip6addrlbl_match(struct net *net, 114static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p,
153 const struct ip6addrlbl_entry *p,
154 const struct in6_addr *addr, 115 const struct in6_addr *addr,
155 int addrtype, int ifindex) 116 int addrtype, int ifindex)
156{ 117{
157 if (!net_eq(ip6addrlbl_net(p), net))
158 return false;
159 if (p->ifindex && p->ifindex != ifindex) 118 if (p->ifindex && p->ifindex != ifindex)
160 return false; 119 return false;
161 if (p->addrtype && p->addrtype != addrtype) 120 if (p->addrtype && p->addrtype != addrtype)
@@ -170,8 +129,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
170 int type, int ifindex) 129 int type, int ifindex)
171{ 130{
172 struct ip6addrlbl_entry *p; 131 struct ip6addrlbl_entry *p;
173 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 132
174 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 133 hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
134 if (__ip6addrlbl_match(p, addr, type, ifindex))
175 return p; 135 return p;
176 } 136 }
177 return NULL; 137 return NULL;
@@ -197,8 +157,7 @@ u32 ipv6_addr_label(struct net *net,
197} 157}
198 158
199/* allocate one entry */ 159/* allocate one entry */
200static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 160static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
201 const struct in6_addr *prefix,
202 int prefixlen, int ifindex, 161 int prefixlen, int ifindex,
203 u32 label) 162 u32 label)
204{ 163{
@@ -237,24 +196,22 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
237 newp->addrtype = addrtype; 196 newp->addrtype = addrtype;
238 newp->label = label; 197 newp->label = label;
239 INIT_HLIST_NODE(&newp->list); 198 INIT_HLIST_NODE(&newp->list);
240 write_pnet(&newp->lbl_net, net);
241 refcount_set(&newp->refcnt, 1);
242 return newp; 199 return newp;
243} 200}
244 201
245/* add a label */ 202/* add a label */
246static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 203static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
204 int replace)
247{ 205{
248 struct hlist_node *n;
249 struct ip6addrlbl_entry *last = NULL, *p = NULL; 206 struct ip6addrlbl_entry *last = NULL, *p = NULL;
207 struct hlist_node *n;
250 int ret = 0; 208 int ret = 0;
251 209
252 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 210 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
253 replace); 211 replace);
254 212
255 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 213 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
256 if (p->prefixlen == newp->prefixlen && 214 if (p->prefixlen == newp->prefixlen &&
257 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
258 p->ifindex == newp->ifindex && 215 p->ifindex == newp->ifindex &&
259 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 216 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
260 if (!replace) { 217 if (!replace) {
@@ -262,7 +219,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
262 goto out; 219 goto out;
263 } 220 }
264 hlist_replace_rcu(&p->list, &newp->list); 221 hlist_replace_rcu(&p->list, &newp->list);
265 ip6addrlbl_put(p); 222 kfree_rcu(p, rcu);
266 goto out; 223 goto out;
267 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 224 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
268 (p->prefixlen < newp->prefixlen)) { 225 (p->prefixlen < newp->prefixlen)) {
@@ -274,10 +231,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
274 if (last) 231 if (last)
275 hlist_add_behind_rcu(&newp->list, &last->list); 232 hlist_add_behind_rcu(&newp->list, &last->list);
276 else 233 else
277 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 234 hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
278out: 235out:
279 if (!ret) 236 if (!ret)
280 ip6addrlbl_table.seq++; 237 net->ipv6.ip6addrlbl_table.seq++;
281 return ret; 238 return ret;
282} 239}
283 240
@@ -293,14 +250,14 @@ static int ip6addrlbl_add(struct net *net,
293 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 250 __func__, prefix, prefixlen, ifindex, (unsigned int)label,
294 replace); 251 replace);
295 252
296 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 253 newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
297 if (IS_ERR(newp)) 254 if (IS_ERR(newp))
298 return PTR_ERR(newp); 255 return PTR_ERR(newp);
299 spin_lock(&ip6addrlbl_table.lock); 256 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
300 ret = __ip6addrlbl_add(newp, replace); 257 ret = __ip6addrlbl_add(net, newp, replace);
301 spin_unlock(&ip6addrlbl_table.lock); 258 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
302 if (ret) 259 if (ret)
303 ip6addrlbl_free(newp); 260 kfree(newp);
304 return ret; 261 return ret;
305} 262}
306 263
@@ -316,13 +273,12 @@ static int __ip6addrlbl_del(struct net *net,
316 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 273 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
317 __func__, prefix, prefixlen, ifindex); 274 __func__, prefix, prefixlen, ifindex);
318 275
319 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 276 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
320 if (p->prefixlen == prefixlen && 277 if (p->prefixlen == prefixlen &&
321 net_eq(ip6addrlbl_net(p), net) &&
322 p->ifindex == ifindex && 278 p->ifindex == ifindex &&
323 ipv6_addr_equal(&p->prefix, prefix)) { 279 ipv6_addr_equal(&p->prefix, prefix)) {
324 hlist_del_rcu(&p->list); 280 hlist_del_rcu(&p->list);
325 ip6addrlbl_put(p); 281 kfree_rcu(p, rcu);
326 ret = 0; 282 ret = 0;
327 break; 283 break;
328 } 284 }
@@ -341,9 +297,9 @@ static int ip6addrlbl_del(struct net *net,
341 __func__, prefix, prefixlen, ifindex); 297 __func__, prefix, prefixlen, ifindex);
342 298
343 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 299 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
344 spin_lock(&ip6addrlbl_table.lock); 300 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
345 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 301 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
346 spin_unlock(&ip6addrlbl_table.lock); 302 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
347 return ret; 303 return ret;
348} 304}
349 305
@@ -355,6 +311,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
355 311
356 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 312 ADDRLABEL(KERN_DEBUG "%s\n", __func__);
357 313
314 spin_lock_init(&net->ipv6.ip6addrlbl_table.lock);
315 INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
316
358 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 317 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
359 int ret = ip6addrlbl_add(net, 318 int ret = ip6addrlbl_add(net,
360 ip6addrlbl_init_table[i].prefix, 319 ip6addrlbl_init_table[i].prefix,
@@ -374,14 +333,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
374 struct hlist_node *n; 333 struct hlist_node *n;
375 334
376 /* Remove all labels belonging to the exiting net */ 335 /* Remove all labels belonging to the exiting net */
377 spin_lock(&ip6addrlbl_table.lock); 336 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
378 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 337 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
379 if (net_eq(ip6addrlbl_net(p), net)) { 338 hlist_del_rcu(&p->list);
380 hlist_del_rcu(&p->list); 339 kfree_rcu(p, rcu);
381 ip6addrlbl_put(p);
382 }
383 } 340 }
384 spin_unlock(&ip6addrlbl_table.lock); 341 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
385} 342}
386 343
387static struct pernet_operations ipv6_addr_label_ops = { 344static struct pernet_operations ipv6_addr_label_ops = {
@@ -391,8 +348,6 @@ static struct pernet_operations ipv6_addr_label_ops = {
391 348
392int __init ipv6_addr_label_init(void) 349int __init ipv6_addr_label_init(void)
393{ 350{
394 spin_lock_init(&ip6addrlbl_table.lock);
395
396 return register_pernet_subsys(&ipv6_addr_label_ops); 351 return register_pernet_subsys(&ipv6_addr_label_ops);
397} 352}
398 353
@@ -511,11 +466,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
511 int err; 466 int err;
512 467
513 rcu_read_lock(); 468 rcu_read_lock();
514 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 469 hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
515 if (idx >= s_idx && 470 if (idx >= s_idx) {
516 net_eq(ip6addrlbl_net(p), net)) {
517 err = ip6addrlbl_fill(skb, p, 471 err = ip6addrlbl_fill(skb, p,
518 ip6addrlbl_table.seq, 472 net->ipv6.ip6addrlbl_table.seq,
519 NETLINK_CB(cb->skb).portid, 473 NETLINK_CB(cb->skb).portid,
520 cb->nlh->nlmsg_seq, 474 cb->nlh->nlmsg_seq,
521 RTM_NEWADDRLABEL, 475 RTM_NEWADDRLABEL,
@@ -568,38 +522,28 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
568 return -EINVAL; 522 return -EINVAL;
569 addr = nla_data(tb[IFAL_ADDRESS]); 523 addr = nla_data(tb[IFAL_ADDRESS]);
570 524
571 rcu_read_lock();
572 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
573 if (p && !ip6addrlbl_hold(p))
574 p = NULL;
575 lseq = ip6addrlbl_table.seq;
576 rcu_read_unlock();
577
578 if (!p) {
579 err = -ESRCH;
580 goto out;
581 }
582
583 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); 525 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
584 if (!skb) { 526 if (!skb)
585 ip6addrlbl_put(p);
586 return -ENOBUFS; 527 return -ENOBUFS;
587 }
588 528
589 err = ip6addrlbl_fill(skb, p, lseq, 529 err = -ESRCH;
590 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
591 RTM_NEWADDRLABEL, 0);
592 530
593 ip6addrlbl_put(p); 531 rcu_read_lock();
532 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
533 lseq = net->ipv6.ip6addrlbl_table.seq;
534 if (p)
535 err = ip6addrlbl_fill(skb, p, lseq,
536 NETLINK_CB(in_skb).portid,
537 nlh->nlmsg_seq,
538 RTM_NEWADDRLABEL, 0);
539 rcu_read_unlock();
594 540
595 if (err < 0) { 541 if (err < 0) {
596 WARN_ON(err == -EMSGSIZE); 542 WARN_ON(err == -EMSGSIZE);
597 kfree_skb(skb); 543 kfree_skb(skb);
598 goto out; 544 } else {
545 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
599 } 546 }
600
601 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
602out:
603 return err; 547 return err;
604} 548}
605 549
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fe5262fd6aa5..c26f71234b9c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -810,6 +810,10 @@ static int __net_init inet6_net_init(struct net *net)
810 net->ipv6.sysctl.idgen_retries = 3; 810 net->ipv6.sysctl.idgen_retries = 3;
811 net->ipv6.sysctl.idgen_delay = 1 * HZ; 811 net->ipv6.sysctl.idgen_delay = 1 * HZ;
812 net->ipv6.sysctl.flowlabel_state_ranges = 0; 812 net->ipv6.sysctl.flowlabel_state_ranges = 0;
813 net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
814 net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
815 net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
816 net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
813 atomic_set(&net->ipv6.fib6_sernum, 1); 817 atomic_set(&net->ipv6.fib6_sernum, 1);
814 818
815 err = ipv6_init_mibs(net); 819 err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7802b72196f3..78c974391567 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -271,6 +271,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
271 case NEXTHDR_DEST: 271 case NEXTHDR_DEST:
272 if (dir == XFRM_POLICY_OUT) 272 if (dir == XFRM_POLICY_OUT)
273 ipv6_rearrange_destopt(iph, exthdr.opth); 273 ipv6_rearrange_destopt(iph, exthdr.opth);
274 /* fall through */
274 case NEXTHDR_HOP: 275 case NEXTHDR_HOP:
275 if (!zero_out_mutable_opts(exthdr.opth)) { 276 if (!zero_out_mutable_opts(exthdr.opth)) {
276 net_dbg_ratelimited("overrun %sopts\n", 277 net_dbg_ratelimited("overrun %sopts\n",
@@ -443,7 +444,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
443 if (err == -EINPROGRESS) 444 if (err == -EINPROGRESS)
444 goto out; 445 goto out;
445 446
446 if (err == -EBUSY) 447 if (err == -ENOSPC)
447 err = NET_XMIT_DROP; 448 err = NET_XMIT_DROP;
448 goto out_free; 449 goto out_free;
449 } 450 }
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 89910e2c10f4..a902ff8f59be 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -396,7 +396,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
396 case -EINPROGRESS: 396 case -EINPROGRESS:
397 goto error; 397 goto error;
398 398
399 case -EBUSY: 399 case -ENOSPC:
400 err = NET_XMIT_DROP; 400 err = NET_XMIT_DROP;
401 break; 401 break;
402 402
@@ -483,8 +483,8 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
483 goto out; 483 goto out;
484 } 484 }
485 485
486 if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) 486 ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
487 BUG(); 487 BUG_ON(ret);
488 488
489 ret = -EINVAL; 489 ret = -EINVAL;
490 padlen = nexthdr[0]; 490 padlen = nexthdr[0];
@@ -559,14 +559,14 @@ static void esp_input_restore_header(struct sk_buff *skb)
559static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) 559static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
560{ 560{
561 struct xfrm_state *x = xfrm_input_state(skb); 561 struct xfrm_state *x = xfrm_input_state(skb);
562 struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
563 562
564 /* For ESN we move the header forward by 4 bytes to 563 /* For ESN we move the header forward by 4 bytes to
565 * accomodate the high bits. We will move it back after 564 * accomodate the high bits. We will move it back after
566 * decryption. 565 * decryption.
567 */ 566 */
568 if ((x->props.flags & XFRM_STATE_ESN)) { 567 if ((x->props.flags & XFRM_STATE_ESN)) {
569 esph = skb_push(skb, 4); 568 struct ip_esp_hdr *esph = skb_push(skb, 4);
569
570 *seqhi = esph->spi; 570 *seqhi = esph->spi;
571 esph->spi = esph->seq_no; 571 esph->spi = esph->seq_no;
572 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; 572 esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 95516138e861..83bd75713535 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -74,8 +74,20 @@ struct tlvtype_proc {
74 74
75/* An unknown option is detected, decide what to do */ 75/* An unknown option is detected, decide what to do */
76 76
77static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) 77static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
78 bool disallow_unknowns)
78{ 79{
80 if (disallow_unknowns) {
81 /* If unknown TLVs are disallowed by configuration
82 * then always silently drop packet. Note this also
83 * means no ICMP parameter problem is sent which
84 * could be a good property to mitigate a reflection DOS
85 * attack.
86 */
87
88 goto drop;
89 }
90
79 switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { 91 switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
80 case 0: /* ignore */ 92 case 0: /* ignore */
81 return true; 93 return true;
@@ -89,25 +101,36 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
89 */ 101 */
90 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) 102 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
91 break; 103 break;
104 /* fall through */
92 case 2: /* send ICMP PARM PROB regardless and drop packet */ 105 case 2: /* send ICMP PARM PROB regardless and drop packet */
93 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); 106 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
94 return false; 107 return false;
95 } 108 }
96 109
110drop:
97 kfree_skb(skb); 111 kfree_skb(skb);
98 return false; 112 return false;
99} 113}
100 114
101/* Parse tlv encoded option header (hop-by-hop or destination) */ 115/* Parse tlv encoded option header (hop-by-hop or destination) */
102 116
103static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) 117static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
118 struct sk_buff *skb,
119 int max_count)
104{ 120{
105 const struct tlvtype_proc *curr; 121 int len = (skb_transport_header(skb)[1] + 1) << 3;
106 const unsigned char *nh = skb_network_header(skb); 122 const unsigned char *nh = skb_network_header(skb);
107 int off = skb_network_header_len(skb); 123 int off = skb_network_header_len(skb);
108 int len = (skb_transport_header(skb)[1] + 1) << 3; 124 const struct tlvtype_proc *curr;
125 bool disallow_unknowns = false;
126 int tlv_count = 0;
109 int padlen = 0; 127 int padlen = 0;
110 128
129 if (unlikely(max_count < 0)) {
130 disallow_unknowns = true;
131 max_count = -max_count;
132 }
133
111 if (skb_transport_offset(skb) + len > skb_headlen(skb)) 134 if (skb_transport_offset(skb) + len > skb_headlen(skb))
112 goto bad; 135 goto bad;
113 136
@@ -148,6 +171,11 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
148 default: /* Other TLV code so scan list */ 171 default: /* Other TLV code so scan list */
149 if (optlen > len) 172 if (optlen > len)
150 goto bad; 173 goto bad;
174
175 tlv_count++;
176 if (tlv_count > max_count)
177 goto bad;
178
151 for (curr = procs; curr->type >= 0; curr++) { 179 for (curr = procs; curr->type >= 0; curr++) {
152 if (curr->type == nh[off]) { 180 if (curr->type == nh[off]) {
153 /* type specific length/alignment 181 /* type specific length/alignment
@@ -158,10 +186,10 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
158 break; 186 break;
159 } 187 }
160 } 188 }
161 if (curr->type < 0) { 189 if (curr->type < 0 &&
162 if (ip6_tlvopt_unknown(skb, off) == 0) 190 !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
163 return false; 191 return false;
164 } 192
165 padlen = 0; 193 padlen = 0;
166 break; 194 break;
167 } 195 }
@@ -186,7 +214,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
186 struct ipv6_destopt_hao *hao; 214 struct ipv6_destopt_hao *hao;
187 struct inet6_skb_parm *opt = IP6CB(skb); 215 struct inet6_skb_parm *opt = IP6CB(skb);
188 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 216 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
189 struct in6_addr tmp_addr;
190 int ret; 217 int ret;
191 218
192 if (opt->dsthao) { 219 if (opt->dsthao) {
@@ -228,9 +255,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
228 if (skb->ip_summed == CHECKSUM_COMPLETE) 255 if (skb->ip_summed == CHECKSUM_COMPLETE)
229 skb->ip_summed = CHECKSUM_NONE; 256 skb->ip_summed = CHECKSUM_NONE;
230 257
231 tmp_addr = ipv6h->saddr; 258 swap(ipv6h->saddr, hao->addr);
232 ipv6h->saddr = hao->addr;
233 hao->addr = tmp_addr;
234 259
235 if (skb->tstamp == 0) 260 if (skb->tstamp == 0)
236 __net_timestamp(skb); 261 __net_timestamp(skb);
@@ -260,23 +285,31 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
260 __u16 dstbuf; 285 __u16 dstbuf;
261#endif 286#endif
262 struct dst_entry *dst = skb_dst(skb); 287 struct dst_entry *dst = skb_dst(skb);
288 struct net *net = dev_net(skb->dev);
289 int extlen;
263 290
264 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || 291 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
265 !pskb_may_pull(skb, (skb_transport_offset(skb) + 292 !pskb_may_pull(skb, (skb_transport_offset(skb) +
266 ((skb_transport_header(skb)[1] + 1) << 3)))) { 293 ((skb_transport_header(skb)[1] + 1) << 3)))) {
267 __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 294 __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
268 IPSTATS_MIB_INHDRERRORS); 295 IPSTATS_MIB_INHDRERRORS);
296fail_and_free:
269 kfree_skb(skb); 297 kfree_skb(skb);
270 return -1; 298 return -1;
271 } 299 }
272 300
301 extlen = (skb_transport_header(skb)[1] + 1) << 3;
302 if (extlen > net->ipv6.sysctl.max_dst_opts_len)
303 goto fail_and_free;
304
273 opt->lastopt = opt->dst1 = skb_network_header_len(skb); 305 opt->lastopt = opt->dst1 = skb_network_header_len(skb);
274#if IS_ENABLED(CONFIG_IPV6_MIP6) 306#if IS_ENABLED(CONFIG_IPV6_MIP6)
275 dstbuf = opt->dst1; 307 dstbuf = opt->dst1;
276#endif 308#endif
277 309
278 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { 310 if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
279 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; 311 init_net.ipv6.sysctl.max_dst_opts_cnt)) {
312 skb->transport_header += extlen;
280 opt = IP6CB(skb); 313 opt = IP6CB(skb);
281#if IS_ENABLED(CONFIG_IPV6_MIP6) 314#if IS_ENABLED(CONFIG_IPV6_MIP6)
282 opt->nhoff = dstbuf; 315 opt->nhoff = dstbuf;
@@ -805,6 +838,8 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = {
805int ipv6_parse_hopopts(struct sk_buff *skb) 838int ipv6_parse_hopopts(struct sk_buff *skb)
806{ 839{
807 struct inet6_skb_parm *opt = IP6CB(skb); 840 struct inet6_skb_parm *opt = IP6CB(skb);
841 struct net *net = dev_net(skb->dev);
842 int extlen;
808 843
809 /* 844 /*
810 * skb_network_header(skb) is equal to skb->data, and 845 * skb_network_header(skb) is equal to skb->data, and
@@ -815,13 +850,19 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
815 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || 850 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
816 !pskb_may_pull(skb, (sizeof(struct ipv6hdr) + 851 !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
817 ((skb_transport_header(skb)[1] + 1) << 3)))) { 852 ((skb_transport_header(skb)[1] + 1) << 3)))) {
853fail_and_free:
818 kfree_skb(skb); 854 kfree_skb(skb);
819 return -1; 855 return -1;
820 } 856 }
821 857
858 extlen = (skb_transport_header(skb)[1] + 1) << 3;
859 if (extlen > net->ipv6.sysctl.max_hbh_opts_len)
860 goto fail_and_free;
861
822 opt->flags |= IP6SKB_HOPBYHOP; 862 opt->flags |= IP6SKB_HOPBYHOP;
823 if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { 863 if (ip6_parse_tlv(tlvprochopopt_lst, skb,
824 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; 864 init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
865 skb->transport_header += extlen;
825 opt = IP6CB(skb); 866 opt = IP6CB(skb);
826 opt->nhoff = sizeof(struct ipv6hdr); 867 opt->nhoff = sizeof(struct ipv6hdr);
827 return 1; 868 return 1;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 305e2ed730bf..11025f8d124b 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
99 break; 99 break;
100 hdrlen = 8; 100 hdrlen = 8;
101 } else if (nexthdr == NEXTHDR_AUTH) 101 } else if (nexthdr == NEXTHDR_AUTH)
102 hdrlen = (hp->hdrlen+2)<<2; 102 hdrlen = ipv6_authlen(hp);
103 else 103 else
104 hdrlen = ipv6_optlen(hp); 104 hdrlen = ipv6_optlen(hp);
105 105
@@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
187{ 187{
188 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); 188 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
189 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 189 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
190 unsigned int len;
191 bool found; 190 bool found;
192 191
193 if (fragoff) 192 if (fragoff)
@@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
204 start = *offset + sizeof(struct ipv6hdr); 203 start = *offset + sizeof(struct ipv6hdr);
205 nexthdr = ip6->nexthdr; 204 nexthdr = ip6->nexthdr;
206 } 205 }
207 len = skb->len - start;
208 206
209 do { 207 do {
210 struct ipv6_opt_hdr _hdr, *hp; 208 struct ipv6_opt_hdr _hdr, *hp;
@@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
273 271
274 if (!found) { 272 if (!found) {
275 nexthdr = hp->nexthdr; 273 nexthdr = hp->nexthdr;
276 len -= hdrlen;
277 start += hdrlen; 274 start += hdrlen;
278 } 275 }
279 } while (!found); 276 } while (!found);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 5acb54405b10..6ae5dd3f4d0d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -250,16 +250,15 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
250 return (*op & 0xC0) == 0x80; 250 return (*op & 0xC0) == 0x80;
251} 251}
252 252
253int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, 253void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 struct icmp6hdr *thdr, int len) 254 struct icmp6hdr *thdr, int len)
255{ 255{
256 struct sk_buff *skb; 256 struct sk_buff *skb;
257 struct icmp6hdr *icmp6h; 257 struct icmp6hdr *icmp6h;
258 int err = 0;
259 258
260 skb = skb_peek(&sk->sk_write_queue); 259 skb = skb_peek(&sk->sk_write_queue);
261 if (!skb) 260 if (!skb)
262 goto out; 261 return;
263 262
264 icmp6h = icmp6_hdr(skb); 263 icmp6h = icmp6_hdr(skb);
265 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); 264 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
@@ -287,8 +286,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
287 tmp_csum); 286 tmp_csum);
288 } 287 }
289 ip6_push_pending_frames(sk); 288 ip6_push_pending_frames(sk);
290out:
291 return err;
292} 289}
293 290
294struct icmpv6_msg { 291struct icmpv6_msg {
@@ -438,7 +435,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
438 int iif = 0; 435 int iif = 0;
439 int addr_type = 0; 436 int addr_type = 0;
440 int len; 437 int len;
441 int err = 0;
442 u32 mark = IP6_REPLY_MARK(net, skb->mark); 438 u32 mark = IP6_REPLY_MARK(net, skb->mark);
443 439
444 if ((u8 *)hdr < skb->head || 440 if ((u8 *)hdr < skb->head ||
@@ -575,17 +571,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
575 rcu_read_lock(); 571 rcu_read_lock();
576 idev = __in6_dev_get(skb->dev); 572 idev = __in6_dev_get(skb->dev);
577 573
578 err = ip6_append_data(sk, icmpv6_getfrag, &msg, 574 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
579 len + sizeof(struct icmp6hdr), 575 len + sizeof(struct icmp6hdr),
580 sizeof(struct icmp6hdr), 576 sizeof(struct icmp6hdr),
581 &ipc6, &fl6, (struct rt6_info *)dst, 577 &ipc6, &fl6, (struct rt6_info *)dst,
582 MSG_DONTWAIT, &sockc_unused); 578 MSG_DONTWAIT, &sockc_unused)) {
583 if (err) {
584 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 579 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
585 ip6_flush_pending_frames(sk); 580 ip6_flush_pending_frames(sk);
586 } else { 581 } else {
587 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 582 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
588 len + sizeof(struct icmp6hdr)); 583 len + sizeof(struct icmp6hdr));
589 } 584 }
590 rcu_read_unlock(); 585 rcu_read_unlock();
591out_dst_release: 586out_dst_release:
@@ -682,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
682 struct icmpv6_msg msg; 677 struct icmpv6_msg msg;
683 struct dst_entry *dst; 678 struct dst_entry *dst;
684 struct ipcm6_cookie ipc6; 679 struct ipcm6_cookie ipc6;
685 int err = 0;
686 u32 mark = IP6_REPLY_MARK(net, skb->mark); 680 u32 mark = IP6_REPLY_MARK(net, skb->mark);
687 struct sockcm_cookie sockc_unused = {0}; 681 struct sockcm_cookie sockc_unused = {0};
688 682
@@ -719,8 +713,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
719 else if (!fl6.flowi6_oif) 713 else if (!fl6.flowi6_oif)
720 fl6.flowi6_oif = np->ucast_oif; 714 fl6.flowi6_oif = np->ucast_oif;
721 715
722 err = ip6_dst_lookup(net, sk, &dst, &fl6); 716 if (ip6_dst_lookup(net, sk, &dst, &fl6))
723 if (err)
724 goto out; 717 goto out;
725 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); 718 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
726 if (IS_ERR(dst)) 719 if (IS_ERR(dst))
@@ -737,17 +730,16 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
737 ipc6.dontfrag = np->dontfrag; 730 ipc6.dontfrag = np->dontfrag;
738 ipc6.opt = NULL; 731 ipc6.opt = NULL;
739 732
740 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), 733 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
741 sizeof(struct icmp6hdr), &ipc6, &fl6, 734 skb->len + sizeof(struct icmp6hdr),
742 (struct rt6_info *)dst, MSG_DONTWAIT, 735 sizeof(struct icmp6hdr), &ipc6, &fl6,
743 &sockc_unused); 736 (struct rt6_info *)dst, MSG_DONTWAIT,
744 737 &sockc_unused)) {
745 if (err) {
746 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 738 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
747 ip6_flush_pending_frames(sk); 739 ip6_flush_pending_frames(sk);
748 } else { 740 } else {
749 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 741 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
750 skb->len + sizeof(struct icmp6hdr)); 742 skb->len + sizeof(struct icmp6hdr));
751 } 743 }
752 dst_release(dst); 744 dst_release(dst);
753out: 745out:
@@ -872,10 +864,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
872 goto discard_it; 864 goto discard_it;
873 hdr = icmp6_hdr(skb); 865 hdr = icmp6_hdr(skb);
874 866
875 /* 867 /* to notify */
876 * Drop through to notify 868 /* fall through */
877 */
878
879 case ICMPV6_DEST_UNREACH: 869 case ICMPV6_DEST_UNREACH:
880 case ICMPV6_TIME_EXCEED: 870 case ICMPV6_TIME_EXCEED:
881 case ICMPV6_PARAMPROB: 871 case ICMPV6_PARAMPROB:
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index e0170f62bc39..3c7a11b62334 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -55,17 +55,6 @@ struct ila_identifier {
55 }; 55 };
56}; 56};
57 57
58enum {
59 ILA_ATYPE_IID = 0,
60 ILA_ATYPE_LUID,
61 ILA_ATYPE_VIRT_V4,
62 ILA_ATYPE_VIRT_UNI_V6,
63 ILA_ATYPE_VIRT_MULTI_V6,
64 ILA_ATYPE_RSVD_1,
65 ILA_ATYPE_RSVD_2,
66 ILA_ATYPE_RSVD_3,
67};
68
69#define CSUM_NEUTRAL_FLAG htonl(0x10000000) 58#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
70 59
71struct ila_addr { 60struct ila_addr {
@@ -93,6 +82,7 @@ struct ila_params {
93 struct ila_locator locator_match; 82 struct ila_locator locator_match;
94 __wsum csum_diff; 83 __wsum csum_diff;
95 u8 csum_mode; 84 u8 csum_mode;
85 u8 ident_type;
96}; 86};
97 87
98static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) 88static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index aba0998ddbfb..8c88ecf29b93 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -13,30 +13,37 @@
13#include <uapi/linux/ila.h> 13#include <uapi/linux/ila.h>
14#include "ila.h" 14#include "ila.h"
15 15
16static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) 16void ila_init_saved_csum(struct ila_params *p)
17{ 17{
18 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); 18 if (!p->locator_match.v64)
19 return;
19 20
21 p->csum_diff = compute_csum_diff8(
22 (__be32 *)&p->locator,
23 (__be32 *)&p->locator_match);
24}
25
26static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p)
27{
20 if (p->locator_match.v64) 28 if (p->locator_match.v64)
21 return p->csum_diff; 29 return p->csum_diff;
22 else 30 else
23 return compute_csum_diff8((__be32 *)&iaddr->loc, 31 return compute_csum_diff8((__be32 *)&p->locator,
24 (__be32 *)&p->locator); 32 (__be32 *)&iaddr->loc);
25} 33}
26 34
27static void ila_csum_do_neutral(struct ila_addr *iaddr, 35static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
28 struct ila_params *p) 36{
37 return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p);
38}
39
40static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr,
41 struct ila_params *p)
29{ 42{
30 __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; 43 __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
31 __wsum diff, fval; 44 __wsum diff, fval;
32 45
33 /* Check if checksum adjust value has been cached */ 46 diff = get_csum_diff_iaddr(iaddr, p);
34 if (p->locator_match.v64) {
35 diff = p->csum_diff;
36 } else {
37 diff = compute_csum_diff8((__be32 *)&p->locator,
38 (__be32 *)iaddr);
39 }
40 47
41 fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? 48 fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
42 CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG); 49 CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
@@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
53 iaddr->ident.csum_neutral ^= 1; 60 iaddr->ident.csum_neutral ^= 1;
54} 61}
55 62
56static void ila_csum_adjust_transport(struct sk_buff *skb, 63static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr,
57 struct ila_params *p) 64 struct ila_params *p)
58{ 65{
66 __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
59 __wsum diff; 67 __wsum diff;
60 struct ipv6hdr *ip6h = ipv6_hdr(skb); 68
61 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); 69 diff = get_csum_diff_iaddr(iaddr, p);
70
71 *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
72}
73
74static void ila_csum_adjust_transport(struct sk_buff *skb,
75 struct ila_params *p)
76{
62 size_t nhoff = sizeof(struct ipv6hdr); 77 size_t nhoff = sizeof(struct ipv6hdr);
78 struct ipv6hdr *ip6h = ipv6_hdr(skb);
79 __wsum diff;
63 80
64 switch (ip6h->nexthdr) { 81 switch (ip6h->nexthdr) {
65 case NEXTHDR_TCP: 82 case NEXTHDR_TCP:
@@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
98 } 115 }
99 break; 116 break;
100 } 117 }
101
102 /* Now change destination address */
103 iaddr->loc = p->locator;
104} 118}
105 119
106void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, 120void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
107 bool set_csum_neutral) 121 bool sir2ila)
108{ 122{
109 struct ipv6hdr *ip6h = ipv6_hdr(skb); 123 struct ipv6hdr *ip6h = ipv6_hdr(skb);
110 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); 124 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
111 125
112 /* First deal with the transport checksum */ 126 switch (p->csum_mode) {
113 if (ila_csum_neutral_set(iaddr->ident)) { 127 case ILA_CSUM_ADJUST_TRANSPORT:
114 /* C-bit is set in the locator indicating that this 128 ila_csum_adjust_transport(skb, p);
115 * is a locator being translated to a SIR address. 129 break;
116 * Perform (receiver) checksum-neutral translation. 130 case ILA_CSUM_NEUTRAL_MAP:
117 */ 131 if (sir2ila) {
118 if (!set_csum_neutral) 132 if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) {
119 ila_csum_do_neutral(iaddr, p); 133 /* Checksum flag should never be
120 } else { 134 * set in a formatted SIR address.
121 switch (p->csum_mode) { 135 */
122 case ILA_CSUM_ADJUST_TRANSPORT: 136 break;
123 ila_csum_adjust_transport(skb, p); 137 }
124 break; 138 } else if (!ila_csum_neutral_set(iaddr->ident)) {
125 case ILA_CSUM_NEUTRAL_MAP: 139 /* ILA to SIR translation and C-bit isn't
126 ila_csum_do_neutral(iaddr, p); 140 * set so we're good.
127 break; 141 */
128 case ILA_CSUM_NO_ACTION:
129 break; 142 break;
130 } 143 }
144 ila_csum_do_neutral_fmt(iaddr, p);
145 break;
146 case ILA_CSUM_NEUTRAL_MAP_AUTO:
147 ila_csum_do_neutral_nofmt(iaddr, p);
148 break;
149 case ILA_CSUM_NO_ACTION:
150 break;
131 } 151 }
132 152
133 /* Now change destination address */ 153 /* Now change destination address */
134 iaddr->loc = p->locator; 154 iaddr->loc = p->locator;
135} 155}
136 156
137void ila_init_saved_csum(struct ila_params *p)
138{
139 if (!p->locator_match.v64)
140 return;
141
142 p->csum_diff = compute_csum_diff8(
143 (__be32 *)&p->locator,
144 (__be32 *)&p->locator_match);
145}
146
147static int __init ila_init(void) 157static int __init ila_init(void)
148{ 158{
149 int ret; 159 int ret;
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 696281b4bca2..3d56a2fb6f86 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -20,6 +20,7 @@ struct ila_lwt {
20 struct ila_params p; 20 struct ila_params p;
21 struct dst_cache dst_cache; 21 struct dst_cache dst_cache;
22 u32 connected : 1; 22 u32 connected : 1;
23 u32 lwt_output : 1;
23}; 24};
24 25
25static inline struct ila_lwt *ila_lwt_lwtunnel( 26static inline struct ila_lwt *ila_lwt_lwtunnel(
@@ -45,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
45 if (skb->protocol != htons(ETH_P_IPV6)) 46 if (skb->protocol != htons(ETH_P_IPV6))
46 goto drop; 47 goto drop;
47 48
48 ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate), 49 if (ilwt->lwt_output)
49 true); 50 ila_update_ipv6_locator(skb,
51 ila_params_lwtunnel(orig_dst->lwtstate),
52 true);
50 53
51 if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) { 54 if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
52 /* Already have a next hop address in route, no need for 55 /* Already have a next hop address in route, no need for
@@ -98,11 +101,15 @@ drop:
98static int ila_input(struct sk_buff *skb) 101static int ila_input(struct sk_buff *skb)
99{ 102{
100 struct dst_entry *dst = skb_dst(skb); 103 struct dst_entry *dst = skb_dst(skb);
104 struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate);
101 105
102 if (skb->protocol != htons(ETH_P_IPV6)) 106 if (skb->protocol != htons(ETH_P_IPV6))
103 goto drop; 107 goto drop;
104 108
105 ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false); 109 if (!ilwt->lwt_output)
110 ila_update_ipv6_locator(skb,
111 ila_params_lwtunnel(dst->lwtstate),
112 false);
106 113
107 return dst->lwtstate->orig_input(skb); 114 return dst->lwtstate->orig_input(skb);
108 115
@@ -114,6 +121,8 @@ drop:
114static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { 121static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
115 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, 122 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
116 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, 123 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
124 [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
125 [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, },
117}; 126};
118 127
119static int ila_build_state(struct nlattr *nla, 128static int ila_build_state(struct nlattr *nla,
@@ -127,33 +136,84 @@ static int ila_build_state(struct nlattr *nla,
127 struct lwtunnel_state *newts; 136 struct lwtunnel_state *newts;
128 const struct fib6_config *cfg6 = cfg; 137 const struct fib6_config *cfg6 = cfg;
129 struct ila_addr *iaddr; 138 struct ila_addr *iaddr;
139 u8 ident_type = ILA_ATYPE_USE_FORMAT;
140 u8 hook_type = ILA_HOOK_ROUTE_OUTPUT;
141 u8 csum_mode = ILA_CSUM_NO_ACTION;
142 bool lwt_output = true;
143 u8 eff_ident_type;
130 int ret; 144 int ret;
131 145
132 if (family != AF_INET6) 146 if (family != AF_INET6)
133 return -EINVAL; 147 return -EINVAL;
134 148
135 if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { 149 ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
136 /* Need to have full locator and at least type field 150 if (ret < 0)
137 * included in destination 151 return ret;
138 */ 152
153 if (!tb[ILA_ATTR_LOCATOR])
139 return -EINVAL; 154 return -EINVAL;
140 }
141 155
142 iaddr = (struct ila_addr *)&cfg6->fc_dst; 156 iaddr = (struct ila_addr *)&cfg6->fc_dst;
143 157
144 if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) { 158 if (tb[ILA_ATTR_IDENT_TYPE])
145 /* Don't allow translation for a non-ILA address or checksum 159 ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]);
146 * neutral flag to be set. 160
161 if (ident_type == ILA_ATYPE_USE_FORMAT) {
162 /* Infer identifier type from type field in formatted
163 * identifier.
147 */ 164 */
165
166 if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
167 /* Need to have full locator and at least type field
168 * included in destination
169 */
170 return -EINVAL;
171 }
172
173 eff_ident_type = iaddr->ident.type;
174 } else {
175 eff_ident_type = ident_type;
176 }
177
178 switch (eff_ident_type) {
179 case ILA_ATYPE_IID:
180 /* Don't allow ILA for IID type */
181 return -EINVAL;
182 case ILA_ATYPE_LUID:
183 break;
184 case ILA_ATYPE_VIRT_V4:
185 case ILA_ATYPE_VIRT_UNI_V6:
186 case ILA_ATYPE_VIRT_MULTI_V6:
187 case ILA_ATYPE_NONLOCAL_ADDR:
188 /* These ILA formats are not supported yet. */
189 default:
148 return -EINVAL; 190 return -EINVAL;
149 } 191 }
150 192
151 ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); 193 if (tb[ILA_ATTR_HOOK_TYPE])
152 if (ret < 0) 194 hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]);
153 return ret; 195
196 switch (hook_type) {
197 case ILA_HOOK_ROUTE_OUTPUT:
198 lwt_output = true;
199 break;
200 case ILA_HOOK_ROUTE_INPUT:
201 lwt_output = false;
202 break;
203 default:
204 return -EINVAL;
205 }
154 206
155 if (!tb[ILA_ATTR_LOCATOR]) 207 if (tb[ILA_ATTR_CSUM_MODE])
208 csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
209
210 if (csum_mode == ILA_CSUM_NEUTRAL_MAP &&
211 ila_csum_neutral_set(iaddr->ident)) {
212 /* Don't allow translation if checksum neutral bit is
213 * configured and it's set in the SIR address.
214 */
156 return -EINVAL; 215 return -EINVAL;
216 }
157 217
158 newts = lwtunnel_state_alloc(sizeof(*ilwt)); 218 newts = lwtunnel_state_alloc(sizeof(*ilwt));
159 if (!newts) 219 if (!newts)
@@ -166,19 +226,18 @@ static int ila_build_state(struct nlattr *nla,
166 return ret; 226 return ret;
167 } 227 }
168 228
229 ilwt->lwt_output = !!lwt_output;
230
169 p = ila_params_lwtunnel(newts); 231 p = ila_params_lwtunnel(newts);
170 232
233 p->csum_mode = csum_mode;
234 p->ident_type = ident_type;
171 p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); 235 p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
172 236
173 /* Precompute checksum difference for translation since we 237 /* Precompute checksum difference for translation since we
174 * know both the old locator and the new one. 238 * know both the old locator and the new one.
175 */ 239 */
176 p->locator_match = iaddr->loc; 240 p->locator_match = iaddr->loc;
177 p->csum_diff = compute_csum_diff8(
178 (__be32 *)&p->locator_match, (__be32 *)&p->locator);
179
180 if (tb[ILA_ATTR_CSUM_MODE])
181 p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
182 241
183 ila_init_saved_csum(p); 242 ila_init_saved_csum(p);
184 243
@@ -203,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb,
203 struct lwtunnel_state *lwtstate) 262 struct lwtunnel_state *lwtstate)
204{ 263{
205 struct ila_params *p = ila_params_lwtunnel(lwtstate); 264 struct ila_params *p = ila_params_lwtunnel(lwtstate);
265 struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate);
206 266
207 if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64, 267 if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
208 ILA_ATTR_PAD)) 268 ILA_ATTR_PAD))
209 goto nla_put_failure; 269 goto nla_put_failure;
270
210 if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode)) 271 if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
211 goto nla_put_failure; 272 goto nla_put_failure;
212 273
274 if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type))
275 goto nla_put_failure;
276
277 if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE,
278 ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT :
279 ILA_HOOK_ROUTE_INPUT))
280 goto nla_put_failure;
281
213 return 0; 282 return 0;
214 283
215nla_put_failure: 284nla_put_failure:
@@ -220,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
220{ 289{
221 return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */ 290 return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
222 nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */ 291 nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
292 nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */
293 nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */
223 0; 294 0;
224} 295}
225 296
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 3123b9de91b5..6eb5e68f112a 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -121,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
121 [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, 121 [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
122 [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, 122 [ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
123 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, 123 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
124 [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
124}; 125};
125 126
126static int parse_nl_config(struct genl_info *info, 127static int parse_nl_config(struct genl_info *info,
@@ -138,6 +139,14 @@ static int parse_nl_config(struct genl_info *info,
138 139
139 if (info->attrs[ILA_ATTR_CSUM_MODE]) 140 if (info->attrs[ILA_ATTR_CSUM_MODE])
140 xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); 141 xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
142 else
143 xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
144
145 if (info->attrs[ILA_ATTR_IDENT_TYPE])
146 xp->ip.ident_type = nla_get_u8(
147 info->attrs[ILA_ATTR_IDENT_TYPE]);
148 else
149 xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
141 150
142 if (info->attrs[ILA_ATTR_IFINDEX]) 151 if (info->attrs[ILA_ATTR_IFINDEX])
143 xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); 152 xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -198,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg)
198 } 207 }
199} 208}
200 209
201static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral); 210static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
202 211
203static unsigned int 212static unsigned int
204ila_nf_input(void *priv, 213ila_nf_input(void *priv,
@@ -396,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
396 (__force u64)ila->xp.ip.locator_match.v64, 405 (__force u64)ila->xp.ip.locator_match.v64,
397 ILA_ATTR_PAD) || 406 ILA_ATTR_PAD) ||
398 nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || 407 nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
399 nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) 408 nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) ||
409 nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type))
400 return -1; 410 return -1;
401 411
402 return 0; 412 return 0;
@@ -607,7 +617,7 @@ static struct pernet_operations ila_net_ops = {
607 .size = sizeof(struct ila_net), 617 .size = sizeof(struct ila_net),
608}; 618};
609 619
610static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) 620static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
611{ 621{
612 struct ila_map *ila; 622 struct ila_map *ila;
613 struct ipv6hdr *ip6h = ipv6_hdr(skb); 623 struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -617,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
617 627
618 /* Assumes skb contains a valid IPv6 header that is pulled */ 628 /* Assumes skb contains a valid IPv6 header that is pulled */
619 629
620 if (!ila_addr_is_ila(iaddr)) { 630 /* No check here that ILA type in the mapping matches what is in the
621 /* Type indicates this is not an ILA address */ 631 * address. We assume that whatever sender gaves us can be translated.
622 return 0; 632 * The checksum mode however is relevant.
623 } 633 */
624 634
625 rcu_read_lock(); 635 rcu_read_lock();
626 636
627 ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); 637 ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
628 if (ila) 638 if (ila)
629 ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral); 639 ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila);
630 640
631 rcu_read_unlock(); 641 rcu_read_unlock();
632 642
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e5308d7cbd75..f5285f4e1d08 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -38,14 +38,6 @@
38#include <net/ip6_fib.h> 38#include <net/ip6_fib.h>
39#include <net/ip6_route.h> 39#include <net/ip6_route.h>
40 40
41#define RT6_DEBUG 2
42
43#if RT6_DEBUG >= 3
44#define RT6_TRACE(x...) pr_debug(x)
45#else
46#define RT6_TRACE(x...) do { ; } while (0)
47#endif
48
49static struct kmem_cache *fib6_node_kmem __read_mostly; 41static struct kmem_cache *fib6_node_kmem __read_mostly;
50 42
51struct fib6_cleaner { 43struct fib6_cleaner {
@@ -62,9 +54,12 @@ struct fib6_cleaner {
62#define FWS_INIT FWS_L 54#define FWS_INIT FWS_L
63#endif 55#endif
64 56
65static void fib6_prune_clones(struct net *net, struct fib6_node *fn); 57static struct rt6_info *fib6_find_prefix(struct net *net,
66static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); 58 struct fib6_table *table,
67static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); 59 struct fib6_node *fn);
60static struct fib6_node *fib6_repair_tree(struct net *net,
61 struct fib6_table *table,
62 struct fib6_node *fn);
68static int fib6_walk(struct net *net, struct fib6_walker *w); 63static int fib6_walk(struct net *net, struct fib6_walker *w);
69static int fib6_walk_continue(struct fib6_walker *w); 64static int fib6_walk_continue(struct fib6_walker *w);
70 65
@@ -75,7 +70,7 @@ static int fib6_walk_continue(struct fib6_walker *w);
75 * result of redirects, path MTU changes, etc. 70 * result of redirects, path MTU changes, etc.
76 */ 71 */
77 72
78static void fib6_gc_timer_cb(unsigned long arg); 73static void fib6_gc_timer_cb(struct timer_list *t);
79 74
80#define FOR_WALKERS(net, w) \ 75#define FOR_WALKERS(net, w) \
81 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh) 76 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
@@ -110,6 +105,20 @@ enum {
110 FIB6_NO_SERNUM_CHANGE = 0, 105 FIB6_NO_SERNUM_CHANGE = 0,
111}; 106};
112 107
108void fib6_update_sernum(struct rt6_info *rt)
109{
110 struct fib6_table *table = rt->rt6i_table;
111 struct net *net = dev_net(rt->dst.dev);
112 struct fib6_node *fn;
113
114 spin_lock_bh(&table->tb6_lock);
115 fn = rcu_dereference_protected(rt->rt6i_node,
116 lockdep_is_held(&table->tb6_lock));
117 if (fn)
118 fn->fn_sernum = fib6_new_sernum(net);
119 spin_unlock_bh(&table->tb6_lock);
120}
121
113/* 122/*
114 * Auxiliary address test functions for the radix tree. 123 * Auxiliary address test functions for the radix tree.
115 * 124 *
@@ -140,18 +149,21 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
140 addr[fn_bit >> 5]; 149 addr[fn_bit >> 5];
141} 150}
142 151
143static struct fib6_node *node_alloc(void) 152static struct fib6_node *node_alloc(struct net *net)
144{ 153{
145 struct fib6_node *fn; 154 struct fib6_node *fn;
146 155
147 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); 156 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
157 if (fn)
158 net->ipv6.rt6_stats->fib_nodes++;
148 159
149 return fn; 160 return fn;
150} 161}
151 162
152static void node_free_immediate(struct fib6_node *fn) 163static void node_free_immediate(struct net *net, struct fib6_node *fn)
153{ 164{
154 kmem_cache_free(fib6_node_kmem, fn); 165 kmem_cache_free(fib6_node_kmem, fn);
166 net->ipv6.rt6_stats->fib_nodes--;
155} 167}
156 168
157static void node_free_rcu(struct rcu_head *head) 169static void node_free_rcu(struct rcu_head *head)
@@ -161,9 +173,10 @@ static void node_free_rcu(struct rcu_head *head)
161 kmem_cache_free(fib6_node_kmem, fn); 173 kmem_cache_free(fib6_node_kmem, fn);
162} 174}
163 175
164static void node_free(struct fib6_node *fn) 176static void node_free(struct net *net, struct fib6_node *fn)
165{ 177{
166 call_rcu(&fn->rcu, node_free_rcu); 178 call_rcu(&fn->rcu, node_free_rcu);
179 net->ipv6.rt6_stats->fib_nodes--;
167} 180}
168 181
169void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) 182void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
@@ -185,9 +198,6 @@ void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
185 *ppcpu_rt = NULL; 198 *ppcpu_rt = NULL;
186 } 199 }
187 } 200 }
188
189 free_percpu(non_pcpu_rt->rt6i_pcpu);
190 non_pcpu_rt->rt6i_pcpu = NULL;
191} 201}
192EXPORT_SYMBOL_GPL(rt6_free_pcpu); 202EXPORT_SYMBOL_GPL(rt6_free_pcpu);
193 203
@@ -205,8 +215,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
205 * Initialize table lock at a single place to give lockdep a key, 215 * Initialize table lock at a single place to give lockdep a key,
206 * tables aren't visible prior to being linked to the list. 216 * tables aren't visible prior to being linked to the list.
207 */ 217 */
208 rwlock_init(&tb->tb6_lock); 218 spin_lock_init(&tb->tb6_lock);
209
210 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); 219 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
211 220
212 /* 221 /*
@@ -225,7 +234,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
225 table = kzalloc(sizeof(*table), GFP_ATOMIC); 234 table = kzalloc(sizeof(*table), GFP_ATOMIC);
226 if (table) { 235 if (table) {
227 table->tb6_id = id; 236 table->tb6_id = id;
228 table->tb6_root.leaf = net->ipv6.ip6_null_entry; 237 rcu_assign_pointer(table->tb6_root.leaf,
238 net->ipv6.ip6_null_entry);
229 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 239 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
230 inet_peer_base_init(&table->tb6_peers); 240 inet_peer_base_init(&table->tb6_peers);
231 } 241 }
@@ -322,11 +332,8 @@ unsigned int fib6_tables_seq_read(struct net *net)
322 struct hlist_head *head = &net->ipv6.fib_table_hash[h]; 332 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
323 struct fib6_table *tb; 333 struct fib6_table *tb;
324 334
325 hlist_for_each_entry_rcu(tb, head, tb6_hlist) { 335 hlist_for_each_entry_rcu(tb, head, tb6_hlist)
326 read_lock_bh(&tb->tb6_lock);
327 fib_seq += tb->fib_seq; 336 fib_seq += tb->fib_seq;
328 read_unlock_bh(&tb->tb6_lock);
329 }
330 } 337 }
331 rcu_read_unlock(); 338 rcu_read_unlock();
332 339
@@ -346,9 +353,11 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
346 353
347static int call_fib6_entry_notifiers(struct net *net, 354static int call_fib6_entry_notifiers(struct net *net,
348 enum fib_event_type event_type, 355 enum fib_event_type event_type,
349 struct rt6_info *rt) 356 struct rt6_info *rt,
357 struct netlink_ext_ack *extack)
350{ 358{
351 struct fib6_entry_notifier_info info = { 359 struct fib6_entry_notifier_info info = {
360 .info.extack = extack,
352 .rt = rt, 361 .rt = rt,
353 }; 362 };
354 363
@@ -372,7 +381,7 @@ static int fib6_node_dump(struct fib6_walker *w)
372{ 381{
373 struct rt6_info *rt; 382 struct rt6_info *rt;
374 383
375 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) 384 for_each_fib6_walker_rt(w)
376 fib6_rt_dump(rt, w->args); 385 fib6_rt_dump(rt, w->args);
377 w->leaf = NULL; 386 w->leaf = NULL;
378 return 0; 387 return 0;
@@ -382,9 +391,9 @@ static void fib6_table_dump(struct net *net, struct fib6_table *tb,
382 struct fib6_walker *w) 391 struct fib6_walker *w)
383{ 392{
384 w->root = &tb->tb6_root; 393 w->root = &tb->tb6_root;
385 read_lock_bh(&tb->tb6_lock); 394 spin_lock_bh(&tb->tb6_lock);
386 fib6_walk(net, w); 395 fib6_walk(net, w);
387 read_unlock_bh(&tb->tb6_lock); 396 spin_unlock_bh(&tb->tb6_lock);
388} 397}
389 398
390/* Called with rcu_read_lock() */ 399/* Called with rcu_read_lock() */
@@ -421,7 +430,7 @@ static int fib6_dump_node(struct fib6_walker *w)
421 int res; 430 int res;
422 struct rt6_info *rt; 431 struct rt6_info *rt;
423 432
424 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { 433 for_each_fib6_walker_rt(w) {
425 res = rt6_dump_route(rt, w->args); 434 res = rt6_dump_route(rt, w->args);
426 if (res < 0) { 435 if (res < 0) {
427 /* Frame is full, suspend walking */ 436 /* Frame is full, suspend walking */
@@ -480,9 +489,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
480 w->count = 0; 489 w->count = 0;
481 w->skip = 0; 490 w->skip = 0;
482 491
483 read_lock_bh(&table->tb6_lock); 492 spin_lock_bh(&table->tb6_lock);
484 res = fib6_walk(net, w); 493 res = fib6_walk(net, w);
485 read_unlock_bh(&table->tb6_lock); 494 spin_unlock_bh(&table->tb6_lock);
486 if (res > 0) { 495 if (res > 0) {
487 cb->args[4] = 1; 496 cb->args[4] = 1;
488 cb->args[5] = w->root->fn_sernum; 497 cb->args[5] = w->root->fn_sernum;
@@ -497,9 +506,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
497 } else 506 } else
498 w->skip = 0; 507 w->skip = 0;
499 508
500 read_lock_bh(&table->tb6_lock); 509 spin_lock_bh(&table->tb6_lock);
501 res = fib6_walk_continue(w); 510 res = fib6_walk_continue(w);
502 read_unlock_bh(&table->tb6_lock); 511 spin_unlock_bh(&table->tb6_lock);
503 if (res <= 0) { 512 if (res <= 0) {
504 fib6_walker_unlink(net, w); 513 fib6_walker_unlink(net, w);
505 cb->args[4] = 0; 514 cb->args[4] = 0;
@@ -580,11 +589,13 @@ out:
580 * node. 589 * node.
581 */ 590 */
582 591
583static struct fib6_node *fib6_add_1(struct fib6_node *root, 592static struct fib6_node *fib6_add_1(struct net *net,
584 struct in6_addr *addr, int plen, 593 struct fib6_table *table,
585 int offset, int allow_create, 594 struct fib6_node *root,
586 int replace_required, int sernum, 595 struct in6_addr *addr, int plen,
587 struct netlink_ext_ack *extack) 596 int offset, int allow_create,
597 int replace_required,
598 struct netlink_ext_ack *extack)
588{ 599{
589 struct fib6_node *fn, *in, *ln; 600 struct fib6_node *fn, *in, *ln;
590 struct fib6_node *pn = NULL; 601 struct fib6_node *pn = NULL;
@@ -599,7 +610,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
599 fn = root; 610 fn = root;
600 611
601 do { 612 do {
602 key = (struct rt6key *)((u8 *)fn->leaf + offset); 613 struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
614 lockdep_is_held(&table->tb6_lock));
615 key = (struct rt6key *)((u8 *)leaf + offset);
603 616
604 /* 617 /*
605 * Prefix match 618 * Prefix match
@@ -625,12 +638,10 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
625 if (plen == fn->fn_bit) { 638 if (plen == fn->fn_bit) {
626 /* clean up an intermediate node */ 639 /* clean up an intermediate node */
627 if (!(fn->fn_flags & RTN_RTINFO)) { 640 if (!(fn->fn_flags & RTN_RTINFO)) {
628 rt6_release(fn->leaf); 641 RCU_INIT_POINTER(fn->leaf, NULL);
629 fn->leaf = NULL; 642 rt6_release(leaf);
630 } 643 }
631 644
632 fn->fn_sernum = sernum;
633
634 return fn; 645 return fn;
635 } 646 }
636 647
@@ -639,10 +650,13 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
639 */ 650 */
640 651
641 /* Try to walk down on tree. */ 652 /* Try to walk down on tree. */
642 fn->fn_sernum = sernum;
643 dir = addr_bit_set(addr, fn->fn_bit); 653 dir = addr_bit_set(addr, fn->fn_bit);
644 pn = fn; 654 pn = fn;
645 fn = dir ? fn->right : fn->left; 655 fn = dir ?
656 rcu_dereference_protected(fn->right,
657 lockdep_is_held(&table->tb6_lock)) :
658 rcu_dereference_protected(fn->left,
659 lockdep_is_held(&table->tb6_lock));
646 } while (fn); 660 } while (fn);
647 661
648 if (!allow_create) { 662 if (!allow_create) {
@@ -668,19 +682,17 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
668 * Create new leaf node without children. 682 * Create new leaf node without children.
669 */ 683 */
670 684
671 ln = node_alloc(); 685 ln = node_alloc(net);
672 686
673 if (!ln) 687 if (!ln)
674 return ERR_PTR(-ENOMEM); 688 return ERR_PTR(-ENOMEM);
675 ln->fn_bit = plen; 689 ln->fn_bit = plen;
676 690 RCU_INIT_POINTER(ln->parent, pn);
677 ln->parent = pn;
678 ln->fn_sernum = sernum;
679 691
680 if (dir) 692 if (dir)
681 pn->right = ln; 693 rcu_assign_pointer(pn->right, ln);
682 else 694 else
683 pn->left = ln; 695 rcu_assign_pointer(pn->left, ln);
684 696
685 return ln; 697 return ln;
686 698
@@ -694,7 +706,8 @@ insert_above:
694 * and the current 706 * and the current
695 */ 707 */
696 708
697 pn = fn->parent; 709 pn = rcu_dereference_protected(fn->parent,
710 lockdep_is_held(&table->tb6_lock));
698 711
699 /* find 1st bit in difference between the 2 addrs. 712 /* find 1st bit in difference between the 2 addrs.
700 713
@@ -710,14 +723,14 @@ insert_above:
710 * (new leaf node)[ln] (old node)[fn] 723 * (new leaf node)[ln] (old node)[fn]
711 */ 724 */
712 if (plen > bit) { 725 if (plen > bit) {
713 in = node_alloc(); 726 in = node_alloc(net);
714 ln = node_alloc(); 727 ln = node_alloc(net);
715 728
716 if (!in || !ln) { 729 if (!in || !ln) {
717 if (in) 730 if (in)
718 node_free_immediate(in); 731 node_free_immediate(net, in);
719 if (ln) 732 if (ln)
720 node_free_immediate(ln); 733 node_free_immediate(net, ln);
721 return ERR_PTR(-ENOMEM); 734 return ERR_PTR(-ENOMEM);
722 } 735 }
723 736
@@ -731,31 +744,28 @@ insert_above:
731 744
732 in->fn_bit = bit; 745 in->fn_bit = bit;
733 746
734 in->parent = pn; 747 RCU_INIT_POINTER(in->parent, pn);
735 in->leaf = fn->leaf; 748 in->leaf = fn->leaf;
736 atomic_inc(&in->leaf->rt6i_ref); 749 atomic_inc(&rcu_dereference_protected(in->leaf,
737 750 lockdep_is_held(&table->tb6_lock))->rt6i_ref);
738 in->fn_sernum = sernum;
739 751
740 /* update parent pointer */ 752 /* update parent pointer */
741 if (dir) 753 if (dir)
742 pn->right = in; 754 rcu_assign_pointer(pn->right, in);
743 else 755 else
744 pn->left = in; 756 rcu_assign_pointer(pn->left, in);
745 757
746 ln->fn_bit = plen; 758 ln->fn_bit = plen;
747 759
748 ln->parent = in; 760 RCU_INIT_POINTER(ln->parent, in);
749 fn->parent = in; 761 rcu_assign_pointer(fn->parent, in);
750
751 ln->fn_sernum = sernum;
752 762
753 if (addr_bit_set(addr, bit)) { 763 if (addr_bit_set(addr, bit)) {
754 in->right = ln; 764 rcu_assign_pointer(in->right, ln);
755 in->left = fn; 765 rcu_assign_pointer(in->left, fn);
756 } else { 766 } else {
757 in->left = ln; 767 rcu_assign_pointer(in->left, ln);
758 in->right = fn; 768 rcu_assign_pointer(in->right, fn);
759 } 769 }
760 } else { /* plen <= bit */ 770 } else { /* plen <= bit */
761 771
@@ -765,28 +775,26 @@ insert_above:
765 * (old node)[fn] NULL 775 * (old node)[fn] NULL
766 */ 776 */
767 777
768 ln = node_alloc(); 778 ln = node_alloc(net);
769 779
770 if (!ln) 780 if (!ln)
771 return ERR_PTR(-ENOMEM); 781 return ERR_PTR(-ENOMEM);
772 782
773 ln->fn_bit = plen; 783 ln->fn_bit = plen;
774 784
775 ln->parent = pn; 785 RCU_INIT_POINTER(ln->parent, pn);
776
777 ln->fn_sernum = sernum;
778
779 if (dir)
780 pn->right = ln;
781 else
782 pn->left = ln;
783 786
784 if (addr_bit_set(&key->addr, plen)) 787 if (addr_bit_set(&key->addr, plen))
785 ln->right = fn; 788 RCU_INIT_POINTER(ln->right, fn);
786 else 789 else
787 ln->left = fn; 790 RCU_INIT_POINTER(ln->left, fn);
791
792 rcu_assign_pointer(fn->parent, ln);
788 793
789 fn->parent = ln; 794 if (dir)
795 rcu_assign_pointer(pn->right, ln);
796 else
797 rcu_assign_pointer(pn->left, ln);
790 } 798 }
791 return ln; 799 return ln;
792} 800}
@@ -832,6 +840,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
832static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, 840static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
833 struct net *net) 841 struct net *net)
834{ 842{
843 struct fib6_table *table = rt->rt6i_table;
844
835 if (atomic_read(&rt->rt6i_ref) != 1) { 845 if (atomic_read(&rt->rt6i_ref) != 1) {
836 /* This route is used as dummy address holder in some split 846 /* This route is used as dummy address holder in some split
837 * nodes. It is not leaked, but it still holds other resources, 847 * nodes. It is not leaked, but it still holds other resources,
@@ -840,12 +850,17 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
840 * to still alive ones. 850 * to still alive ones.
841 */ 851 */
842 while (fn) { 852 while (fn) {
843 if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { 853 struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
844 fn->leaf = fib6_find_prefix(net, fn); 854 lockdep_is_held(&table->tb6_lock));
845 atomic_inc(&fn->leaf->rt6i_ref); 855 struct rt6_info *new_leaf;
856 if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
857 new_leaf = fib6_find_prefix(net, table, fn);
858 atomic_inc(&new_leaf->rt6i_ref);
859 rcu_assign_pointer(fn->leaf, new_leaf);
846 rt6_release(rt); 860 rt6_release(rt);
847 } 861 }
848 fn = fn->parent; 862 fn = rcu_dereference_protected(fn->parent,
863 lockdep_is_held(&table->tb6_lock));
849 } 864 }
850 } 865 }
851} 866}
@@ -855,11 +870,14 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
855 */ 870 */
856 871
857static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, 872static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
858 struct nl_info *info, struct mx6_config *mxc) 873 struct nl_info *info, struct mx6_config *mxc,
874 struct netlink_ext_ack *extack)
859{ 875{
876 struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
877 lockdep_is_held(&rt->rt6i_table->tb6_lock));
860 struct rt6_info *iter = NULL; 878 struct rt6_info *iter = NULL;
861 struct rt6_info **ins; 879 struct rt6_info __rcu **ins;
862 struct rt6_info **fallback_ins = NULL; 880 struct rt6_info __rcu **fallback_ins = NULL;
863 int replace = (info->nlh && 881 int replace = (info->nlh &&
864 (info->nlh->nlmsg_flags & NLM_F_REPLACE)); 882 (info->nlh->nlmsg_flags & NLM_F_REPLACE));
865 int add = (!info->nlh || 883 int add = (!info->nlh ||
@@ -874,7 +892,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
874 892
875 ins = &fn->leaf; 893 ins = &fn->leaf;
876 894
877 for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { 895 for (iter = leaf; iter;
896 iter = rcu_dereference_protected(iter->dst.rt6_next,
897 lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
878 /* 898 /*
879 * Search for duplicates 899 * Search for duplicates
880 */ 900 */
@@ -936,7 +956,8 @@ next_iter:
936 if (fallback_ins && !found) { 956 if (fallback_ins && !found) {
937 /* No ECMP-able route found, replace first non-ECMP one */ 957 /* No ECMP-able route found, replace first non-ECMP one */
938 ins = fallback_ins; 958 ins = fallback_ins;
939 iter = *ins; 959 iter = rcu_dereference_protected(*ins,
960 lockdep_is_held(&rt->rt6i_table->tb6_lock));
940 found++; 961 found++;
941 } 962 }
942 963
@@ -950,7 +971,7 @@ next_iter:
950 struct rt6_info *sibling, *temp_sibling; 971 struct rt6_info *sibling, *temp_sibling;
951 972
952 /* Find the first route that have the same metric */ 973 /* Find the first route that have the same metric */
953 sibling = fn->leaf; 974 sibling = leaf;
954 while (sibling) { 975 while (sibling) {
955 if (sibling->rt6i_metric == rt->rt6i_metric && 976 if (sibling->rt6i_metric == rt->rt6i_metric &&
956 rt6_qualify_for_ecmp(sibling)) { 977 rt6_qualify_for_ecmp(sibling)) {
@@ -958,7 +979,8 @@ next_iter:
958 &sibling->rt6i_siblings); 979 &sibling->rt6i_siblings);
959 break; 980 break;
960 } 981 }
961 sibling = sibling->dst.rt6_next; 982 sibling = rcu_dereference_protected(sibling->dst.rt6_next,
983 lockdep_is_held(&rt->rt6i_table->tb6_lock));
962 } 984 }
963 /* For each sibling in the list, increment the counter of 985 /* For each sibling in the list, increment the counter of
964 * siblings. BUG() if counters does not match, list of siblings 986 * siblings. BUG() if counters does not match, list of siblings
@@ -987,12 +1009,12 @@ add:
987 if (err) 1009 if (err)
988 return err; 1010 return err;
989 1011
990 rt->dst.rt6_next = iter; 1012 rcu_assign_pointer(rt->dst.rt6_next, iter);
991 *ins = rt;
992 rcu_assign_pointer(rt->rt6i_node, fn);
993 atomic_inc(&rt->rt6i_ref); 1013 atomic_inc(&rt->rt6i_ref);
1014 rcu_assign_pointer(rt->rt6i_node, fn);
1015 rcu_assign_pointer(*ins, rt);
994 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, 1016 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
995 rt); 1017 rt, extack);
996 if (!info->skip_notify) 1018 if (!info->skip_notify)
997 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 1019 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
998 info->nl_net->ipv6.rt6_stats->fib_rt_entries++; 1020 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1016,12 +1038,12 @@ add:
1016 if (err) 1038 if (err)
1017 return err; 1039 return err;
1018 1040
1019 *ins = rt; 1041 atomic_inc(&rt->rt6i_ref);
1020 rcu_assign_pointer(rt->rt6i_node, fn); 1042 rcu_assign_pointer(rt->rt6i_node, fn);
1021 rt->dst.rt6_next = iter->dst.rt6_next; 1043 rt->dst.rt6_next = iter->dst.rt6_next;
1022 atomic_inc(&rt->rt6i_ref); 1044 rcu_assign_pointer(*ins, rt);
1023 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, 1045 call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
1024 rt); 1046 rt, extack);
1025 if (!info->skip_notify) 1047 if (!info->skip_notify)
1026 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); 1048 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
1027 if (!(fn->fn_flags & RTN_RTINFO)) { 1049 if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -1031,14 +1053,15 @@ add:
1031 nsiblings = iter->rt6i_nsiblings; 1053 nsiblings = iter->rt6i_nsiblings;
1032 iter->rt6i_node = NULL; 1054 iter->rt6i_node = NULL;
1033 fib6_purge_rt(iter, fn, info->nl_net); 1055 fib6_purge_rt(iter, fn, info->nl_net);
1034 if (fn->rr_ptr == iter) 1056 if (rcu_access_pointer(fn->rr_ptr) == iter)
1035 fn->rr_ptr = NULL; 1057 fn->rr_ptr = NULL;
1036 rt6_release(iter); 1058 rt6_release(iter);
1037 1059
1038 if (nsiblings) { 1060 if (nsiblings) {
1039 /* Replacing an ECMP route, remove all siblings */ 1061 /* Replacing an ECMP route, remove all siblings */
1040 ins = &rt->dst.rt6_next; 1062 ins = &rt->dst.rt6_next;
1041 iter = *ins; 1063 iter = rcu_dereference_protected(*ins,
1064 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1042 while (iter) { 1065 while (iter) {
1043 if (iter->rt6i_metric > rt->rt6i_metric) 1066 if (iter->rt6i_metric > rt->rt6i_metric)
1044 break; 1067 break;
@@ -1046,14 +1069,16 @@ add:
1046 *ins = iter->dst.rt6_next; 1069 *ins = iter->dst.rt6_next;
1047 iter->rt6i_node = NULL; 1070 iter->rt6i_node = NULL;
1048 fib6_purge_rt(iter, fn, info->nl_net); 1071 fib6_purge_rt(iter, fn, info->nl_net);
1049 if (fn->rr_ptr == iter) 1072 if (rcu_access_pointer(fn->rr_ptr) == iter)
1050 fn->rr_ptr = NULL; 1073 fn->rr_ptr = NULL;
1051 rt6_release(iter); 1074 rt6_release(iter);
1052 nsiblings--; 1075 nsiblings--;
1076 info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
1053 } else { 1077 } else {
1054 ins = &iter->dst.rt6_next; 1078 ins = &iter->dst.rt6_next;
1055 } 1079 }
1056 iter = *ins; 1080 iter = rcu_dereference_protected(*ins,
1081 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1057 } 1082 }
1058 WARN_ON(nsiblings != 0); 1083 WARN_ON(nsiblings != 0);
1059 } 1084 }
@@ -1077,16 +1102,33 @@ void fib6_force_start_gc(struct net *net)
1077 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 1102 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1078} 1103}
1079 1104
1105static void fib6_update_sernum_upto_root(struct rt6_info *rt,
1106 int sernum)
1107{
1108 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
1109 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1110
1111 /* paired with smp_rmb() in rt6_get_cookie_safe() */
1112 smp_wmb();
1113 while (fn) {
1114 fn->fn_sernum = sernum;
1115 fn = rcu_dereference_protected(fn->parent,
1116 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1117 }
1118}
1119
1080/* 1120/*
1081 * Add routing information to the routing tree. 1121 * Add routing information to the routing tree.
1082 * <destination addr>/<source addr> 1122 * <destination addr>/<source addr>
1083 * with source addr info in sub-trees 1123 * with source addr info in sub-trees
1124 * Need to own table->tb6_lock
1084 */ 1125 */
1085 1126
1086int fib6_add(struct fib6_node *root, struct rt6_info *rt, 1127int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1087 struct nl_info *info, struct mx6_config *mxc, 1128 struct nl_info *info, struct mx6_config *mxc,
1088 struct netlink_ext_ack *extack) 1129 struct netlink_ext_ack *extack)
1089{ 1130{
1131 struct fib6_table *table = rt->rt6i_table;
1090 struct fib6_node *fn, *pn = NULL; 1132 struct fib6_node *fn, *pn = NULL;
1091 int err = -ENOMEM; 1133 int err = -ENOMEM;
1092 int allow_create = 1; 1134 int allow_create = 1;
@@ -1095,6 +1137,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1095 1137
1096 if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) 1138 if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
1097 return -EINVAL; 1139 return -EINVAL;
1140 if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
1141 return -EINVAL;
1098 1142
1099 if (info->nlh) { 1143 if (info->nlh) {
1100 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) 1144 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1105,9 +1149,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1105 if (!allow_create && !replace_required) 1149 if (!allow_create && !replace_required)
1106 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); 1150 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
1107 1151
1108 fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, 1152 fn = fib6_add_1(info->nl_net, table, root,
1153 &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
1109 offsetof(struct rt6_info, rt6i_dst), allow_create, 1154 offsetof(struct rt6_info, rt6i_dst), allow_create,
1110 replace_required, sernum, extack); 1155 replace_required, extack);
1111 if (IS_ERR(fn)) { 1156 if (IS_ERR(fn)) {
1112 err = PTR_ERR(fn); 1157 err = PTR_ERR(fn);
1113 fn = NULL; 1158 fn = NULL;
@@ -1120,7 +1165,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1120 if (rt->rt6i_src.plen) { 1165 if (rt->rt6i_src.plen) {
1121 struct fib6_node *sn; 1166 struct fib6_node *sn;
1122 1167
1123 if (!fn->subtree) { 1168 if (!rcu_access_pointer(fn->subtree)) {
1124 struct fib6_node *sfn; 1169 struct fib6_node *sfn;
1125 1170
1126 /* 1171 /*
@@ -1134,42 +1179,40 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1134 */ 1179 */
1135 1180
1136 /* Create subtree root node */ 1181 /* Create subtree root node */
1137 sfn = node_alloc(); 1182 sfn = node_alloc(info->nl_net);
1138 if (!sfn) 1183 if (!sfn)
1139 goto failure; 1184 goto failure;
1140 1185
1141 sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
1142 atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); 1186 atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
1187 rcu_assign_pointer(sfn->leaf,
1188 info->nl_net->ipv6.ip6_null_entry);
1143 sfn->fn_flags = RTN_ROOT; 1189 sfn->fn_flags = RTN_ROOT;
1144 sfn->fn_sernum = sernum;
1145 1190
1146 /* Now add the first leaf node to new subtree */ 1191 /* Now add the first leaf node to new subtree */
1147 1192
1148 sn = fib6_add_1(sfn, &rt->rt6i_src.addr, 1193 sn = fib6_add_1(info->nl_net, table, sfn,
1149 rt->rt6i_src.plen, 1194 &rt->rt6i_src.addr, rt->rt6i_src.plen,
1150 offsetof(struct rt6_info, rt6i_src), 1195 offsetof(struct rt6_info, rt6i_src),
1151 allow_create, replace_required, sernum, 1196 allow_create, replace_required, extack);
1152 extack);
1153 1197
1154 if (IS_ERR(sn)) { 1198 if (IS_ERR(sn)) {
1155 /* If it is failed, discard just allocated 1199 /* If it is failed, discard just allocated
1156 root, and then (in failure) stale node 1200 root, and then (in failure) stale node
1157 in main tree. 1201 in main tree.
1158 */ 1202 */
1159 node_free_immediate(sfn); 1203 node_free_immediate(info->nl_net, sfn);
1160 err = PTR_ERR(sn); 1204 err = PTR_ERR(sn);
1161 goto failure; 1205 goto failure;
1162 } 1206 }
1163 1207
1164 /* Now link new subtree to main tree */ 1208 /* Now link new subtree to main tree */
1165 sfn->parent = fn; 1209 rcu_assign_pointer(sfn->parent, fn);
1166 fn->subtree = sfn; 1210 rcu_assign_pointer(fn->subtree, sfn);
1167 } else { 1211 } else {
1168 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 1212 sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
1169 rt->rt6i_src.plen, 1213 &rt->rt6i_src.addr, rt->rt6i_src.plen,
1170 offsetof(struct rt6_info, rt6i_src), 1214 offsetof(struct rt6_info, rt6i_src),
1171 allow_create, replace_required, sernum, 1215 allow_create, replace_required, extack);
1172 extack);
1173 1216
1174 if (IS_ERR(sn)) { 1217 if (IS_ERR(sn)) {
1175 err = PTR_ERR(sn); 1218 err = PTR_ERR(sn);
@@ -1177,19 +1220,18 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1177 } 1220 }
1178 } 1221 }
1179 1222
1180 if (!fn->leaf) { 1223 if (!rcu_access_pointer(fn->leaf)) {
1181 fn->leaf = rt;
1182 atomic_inc(&rt->rt6i_ref); 1224 atomic_inc(&rt->rt6i_ref);
1225 rcu_assign_pointer(fn->leaf, rt);
1183 } 1226 }
1184 fn = sn; 1227 fn = sn;
1185 } 1228 }
1186#endif 1229#endif
1187 1230
1188 err = fib6_add_rt2node(fn, rt, info, mxc); 1231 err = fib6_add_rt2node(fn, rt, info, mxc, extack);
1189 if (!err) { 1232 if (!err) {
1233 fib6_update_sernum_upto_root(rt, sernum);
1190 fib6_start_gc(info->nl_net, rt); 1234 fib6_start_gc(info->nl_net, rt);
1191 if (!(rt->rt6i_flags & RTF_CACHE))
1192 fib6_prune_clones(info->nl_net, pn);
1193 } 1235 }
1194 1236
1195out: 1237out:
@@ -1199,19 +1241,23 @@ out:
1199 * If fib6_add_1 has cleared the old leaf pointer in the 1241 * If fib6_add_1 has cleared the old leaf pointer in the
1200 * super-tree leaf node we have to find a new one for it. 1242 * super-tree leaf node we have to find a new one for it.
1201 */ 1243 */
1202 if (pn != fn && pn->leaf == rt) { 1244 struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
1203 pn->leaf = NULL; 1245 lockdep_is_held(&table->tb6_lock));
1246 if (pn != fn && pn_leaf == rt) {
1247 pn_leaf = NULL;
1248 RCU_INIT_POINTER(pn->leaf, NULL);
1204 atomic_dec(&rt->rt6i_ref); 1249 atomic_dec(&rt->rt6i_ref);
1205 } 1250 }
1206 if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { 1251 if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1207 pn->leaf = fib6_find_prefix(info->nl_net, pn); 1252 pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
1208#if RT6_DEBUG >= 2 1253#if RT6_DEBUG >= 2
1209 if (!pn->leaf) { 1254 if (!pn_leaf) {
1210 WARN_ON(pn->leaf == NULL); 1255 WARN_ON(!pn_leaf);
1211 pn->leaf = info->nl_net->ipv6.ip6_null_entry; 1256 pn_leaf = info->nl_net->ipv6.ip6_null_entry;
1212 } 1257 }
1213#endif 1258#endif
1214 atomic_inc(&pn->leaf->rt6i_ref); 1259 atomic_inc(&pn_leaf->rt6i_ref);
1260 rcu_assign_pointer(pn->leaf, pn_leaf);
1215 } 1261 }
1216#endif 1262#endif
1217 goto failure; 1263 goto failure;
@@ -1226,7 +1272,7 @@ failure:
1226 * fn->leaf. 1272 * fn->leaf.
1227 */ 1273 */
1228 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 1274 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
1229 fib6_repair_tree(info->nl_net, fn); 1275 fib6_repair_tree(info->nl_net, table, fn);
1230 /* Always release dst as dst->__refcnt is guaranteed 1276 /* Always release dst as dst->__refcnt is guaranteed
1231 * to be taken before entering this function 1277 * to be taken before entering this function
1232 */ 1278 */
@@ -1264,7 +1310,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
1264 1310
1265 dir = addr_bit_set(args->addr, fn->fn_bit); 1311 dir = addr_bit_set(args->addr, fn->fn_bit);
1266 1312
1267 next = dir ? fn->right : fn->left; 1313 next = dir ? rcu_dereference(fn->right) :
1314 rcu_dereference(fn->left);
1268 1315
1269 if (next) { 1316 if (next) {
1270 fn = next; 1317 fn = next;
@@ -1274,18 +1321,22 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
1274 } 1321 }
1275 1322
1276 while (fn) { 1323 while (fn) {
1277 if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { 1324 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1325
1326 if (subtree || fn->fn_flags & RTN_RTINFO) {
1327 struct rt6_info *leaf = rcu_dereference(fn->leaf);
1278 struct rt6key *key; 1328 struct rt6key *key;
1279 1329
1280 key = (struct rt6key *) ((u8 *) fn->leaf + 1330 if (!leaf)
1281 args->offset); 1331 goto backtrack;
1332
1333 key = (struct rt6key *) ((u8 *)leaf + args->offset);
1282 1334
1283 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { 1335 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
1284#ifdef CONFIG_IPV6_SUBTREES 1336#ifdef CONFIG_IPV6_SUBTREES
1285 if (fn->subtree) { 1337 if (subtree) {
1286 struct fib6_node *sfn; 1338 struct fib6_node *sfn;
1287 sfn = fib6_lookup_1(fn->subtree, 1339 sfn = fib6_lookup_1(subtree, args + 1);
1288 args + 1);
1289 if (!sfn) 1340 if (!sfn)
1290 goto backtrack; 1341 goto backtrack;
1291 fn = sfn; 1342 fn = sfn;
@@ -1295,18 +1346,18 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
1295 return fn; 1346 return fn;
1296 } 1347 }
1297 } 1348 }
1298#ifdef CONFIG_IPV6_SUBTREES
1299backtrack: 1349backtrack:
1300#endif
1301 if (fn->fn_flags & RTN_ROOT) 1350 if (fn->fn_flags & RTN_ROOT)
1302 break; 1351 break;
1303 1352
1304 fn = fn->parent; 1353 fn = rcu_dereference(fn->parent);
1305 } 1354 }
1306 1355
1307 return NULL; 1356 return NULL;
1308} 1357}
1309 1358
1359/* called with rcu_read_lock() held
1360 */
1310struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, 1361struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
1311 const struct in6_addr *saddr) 1362 const struct in6_addr *saddr)
1312{ 1363{
@@ -1337,54 +1388,87 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
1337/* 1388/*
1338 * Get node with specified destination prefix (and source prefix, 1389 * Get node with specified destination prefix (and source prefix,
1339 * if subtrees are used) 1390 * if subtrees are used)
1391 * exact_match == true means we try to find fn with exact match of
1392 * the passed in prefix addr
1393 * exact_match == false means we try to find fn with longest prefix
1394 * match of the passed in prefix addr. This is useful for finding fn
1395 * for cached route as it will be stored in the exception table under
1396 * the node with longest prefix length.
1340 */ 1397 */
1341 1398
1342 1399
1343static struct fib6_node *fib6_locate_1(struct fib6_node *root, 1400static struct fib6_node *fib6_locate_1(struct fib6_node *root,
1344 const struct in6_addr *addr, 1401 const struct in6_addr *addr,
1345 int plen, int offset) 1402 int plen, int offset,
1403 bool exact_match)
1346{ 1404{
1347 struct fib6_node *fn; 1405 struct fib6_node *fn, *prev = NULL;
1348 1406
1349 for (fn = root; fn ; ) { 1407 for (fn = root; fn ; ) {
1350 struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); 1408 struct rt6_info *leaf = rcu_dereference(fn->leaf);
1409 struct rt6key *key;
1410
1411 /* This node is being deleted */
1412 if (!leaf) {
1413 if (plen <= fn->fn_bit)
1414 goto out;
1415 else
1416 goto next;
1417 }
1418
1419 key = (struct rt6key *)((u8 *)leaf + offset);
1351 1420
1352 /* 1421 /*
1353 * Prefix match 1422 * Prefix match
1354 */ 1423 */
1355 if (plen < fn->fn_bit || 1424 if (plen < fn->fn_bit ||
1356 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) 1425 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
1357 return NULL; 1426 goto out;
1358 1427
1359 if (plen == fn->fn_bit) 1428 if (plen == fn->fn_bit)
1360 return fn; 1429 return fn;
1361 1430
1431 prev = fn;
1432
1433next:
1362 /* 1434 /*
1363 * We have more bits to go 1435 * We have more bits to go
1364 */ 1436 */
1365 if (addr_bit_set(addr, fn->fn_bit)) 1437 if (addr_bit_set(addr, fn->fn_bit))
1366 fn = fn->right; 1438 fn = rcu_dereference(fn->right);
1367 else 1439 else
1368 fn = fn->left; 1440 fn = rcu_dereference(fn->left);
1369 } 1441 }
1370 return NULL; 1442out:
1443 if (exact_match)
1444 return NULL;
1445 else
1446 return prev;
1371} 1447}
1372 1448
1373struct fib6_node *fib6_locate(struct fib6_node *root, 1449struct fib6_node *fib6_locate(struct fib6_node *root,
1374 const struct in6_addr *daddr, int dst_len, 1450 const struct in6_addr *daddr, int dst_len,
1375 const struct in6_addr *saddr, int src_len) 1451 const struct in6_addr *saddr, int src_len,
1452 bool exact_match)
1376{ 1453{
1377 struct fib6_node *fn; 1454 struct fib6_node *fn;
1378 1455
1379 fn = fib6_locate_1(root, daddr, dst_len, 1456 fn = fib6_locate_1(root, daddr, dst_len,
1380 offsetof(struct rt6_info, rt6i_dst)); 1457 offsetof(struct rt6_info, rt6i_dst),
1458 exact_match);
1381 1459
1382#ifdef CONFIG_IPV6_SUBTREES 1460#ifdef CONFIG_IPV6_SUBTREES
1383 if (src_len) { 1461 if (src_len) {
1384 WARN_ON(saddr == NULL); 1462 WARN_ON(saddr == NULL);
1385 if (fn && fn->subtree) 1463 if (fn) {
1386 fn = fib6_locate_1(fn->subtree, saddr, src_len, 1464 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1387 offsetof(struct rt6_info, rt6i_src)); 1465
1466 if (subtree) {
1467 fn = fib6_locate_1(subtree, saddr, src_len,
1468 offsetof(struct rt6_info, rt6i_src),
1469 exact_match);
1470 }
1471 }
1388 } 1472 }
1389#endif 1473#endif
1390 1474
@@ -1400,16 +1484,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
1400 * 1484 *
1401 */ 1485 */
1402 1486
1403static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) 1487static struct rt6_info *fib6_find_prefix(struct net *net,
1488 struct fib6_table *table,
1489 struct fib6_node *fn)
1404{ 1490{
1491 struct fib6_node *child_left, *child_right;
1492
1405 if (fn->fn_flags & RTN_ROOT) 1493 if (fn->fn_flags & RTN_ROOT)
1406 return net->ipv6.ip6_null_entry; 1494 return net->ipv6.ip6_null_entry;
1407 1495
1408 while (fn) { 1496 while (fn) {
1409 if (fn->left) 1497 child_left = rcu_dereference_protected(fn->left,
1410 return fn->left->leaf; 1498 lockdep_is_held(&table->tb6_lock));
1411 if (fn->right) 1499 child_right = rcu_dereference_protected(fn->right,
1412 return fn->right->leaf; 1500 lockdep_is_held(&table->tb6_lock));
1501 if (child_left)
1502 return rcu_dereference_protected(child_left->leaf,
1503 lockdep_is_held(&table->tb6_lock));
1504 if (child_right)
1505 return rcu_dereference_protected(child_right->leaf,
1506 lockdep_is_held(&table->tb6_lock));
1413 1507
1414 fn = FIB6_SUBTREE(fn); 1508 fn = FIB6_SUBTREE(fn);
1415 } 1509 }
@@ -1419,31 +1513,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
1419/* 1513/*
1420 * Called to trim the tree of intermediate nodes when possible. "fn" 1514 * Called to trim the tree of intermediate nodes when possible. "fn"
1421 * is the node we want to try and remove. 1515 * is the node we want to try and remove.
1516 * Need to own table->tb6_lock
1422 */ 1517 */
1423 1518
1424static struct fib6_node *fib6_repair_tree(struct net *net, 1519static struct fib6_node *fib6_repair_tree(struct net *net,
1425 struct fib6_node *fn) 1520 struct fib6_table *table,
1521 struct fib6_node *fn)
1426{ 1522{
1427 int children; 1523 int children;
1428 int nstate; 1524 int nstate;
1429 struct fib6_node *child, *pn; 1525 struct fib6_node *child;
1430 struct fib6_walker *w; 1526 struct fib6_walker *w;
1431 int iter = 0; 1527 int iter = 0;
1432 1528
1433 for (;;) { 1529 for (;;) {
1530 struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1531 lockdep_is_held(&table->tb6_lock));
1532 struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
1533 lockdep_is_held(&table->tb6_lock));
1534 struct fib6_node *pn = rcu_dereference_protected(fn->parent,
1535 lockdep_is_held(&table->tb6_lock));
1536 struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
1537 lockdep_is_held(&table->tb6_lock));
1538 struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
1539 lockdep_is_held(&table->tb6_lock));
1540 struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
1541 lockdep_is_held(&table->tb6_lock));
1542 struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
1543 lockdep_is_held(&table->tb6_lock));
1544 struct rt6_info *new_fn_leaf;
1545
1434 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); 1546 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1435 iter++; 1547 iter++;
1436 1548
1437 WARN_ON(fn->fn_flags & RTN_RTINFO); 1549 WARN_ON(fn->fn_flags & RTN_RTINFO);
1438 WARN_ON(fn->fn_flags & RTN_TL_ROOT); 1550 WARN_ON(fn->fn_flags & RTN_TL_ROOT);
1439 WARN_ON(fn->leaf); 1551 WARN_ON(fn_leaf);
1440 1552
1441 children = 0; 1553 children = 0;
1442 child = NULL; 1554 child = NULL;
1443 if (fn->right) 1555 if (fn_r)
1444 child = fn->right, children |= 1; 1556 child = fn_r, children |= 1;
1445 if (fn->left) 1557 if (fn_l)
1446 child = fn->left, children |= 2; 1558 child = fn_l, children |= 2;
1447 1559
1448 if (children == 3 || FIB6_SUBTREE(fn) 1560 if (children == 3 || FIB6_SUBTREE(fn)
1449#ifdef CONFIG_IPV6_SUBTREES 1561#ifdef CONFIG_IPV6_SUBTREES
@@ -1451,36 +1563,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1451 || (children && fn->fn_flags & RTN_ROOT) 1563 || (children && fn->fn_flags & RTN_ROOT)
1452#endif 1564#endif
1453 ) { 1565 ) {
1454 fn->leaf = fib6_find_prefix(net, fn); 1566 new_fn_leaf = fib6_find_prefix(net, table, fn);
1455#if RT6_DEBUG >= 2 1567#if RT6_DEBUG >= 2
1456 if (!fn->leaf) { 1568 if (!new_fn_leaf) {
1457 WARN_ON(!fn->leaf); 1569 WARN_ON(!new_fn_leaf);
1458 fn->leaf = net->ipv6.ip6_null_entry; 1570 new_fn_leaf = net->ipv6.ip6_null_entry;
1459 } 1571 }
1460#endif 1572#endif
1461 atomic_inc(&fn->leaf->rt6i_ref); 1573 atomic_inc(&new_fn_leaf->rt6i_ref);
1462 return fn->parent; 1574 rcu_assign_pointer(fn->leaf, new_fn_leaf);
1575 return pn;
1463 } 1576 }
1464 1577
1465 pn = fn->parent;
1466#ifdef CONFIG_IPV6_SUBTREES 1578#ifdef CONFIG_IPV6_SUBTREES
1467 if (FIB6_SUBTREE(pn) == fn) { 1579 if (FIB6_SUBTREE(pn) == fn) {
1468 WARN_ON(!(fn->fn_flags & RTN_ROOT)); 1580 WARN_ON(!(fn->fn_flags & RTN_ROOT));
1469 FIB6_SUBTREE(pn) = NULL; 1581 RCU_INIT_POINTER(pn->subtree, NULL);
1470 nstate = FWS_L; 1582 nstate = FWS_L;
1471 } else { 1583 } else {
1472 WARN_ON(fn->fn_flags & RTN_ROOT); 1584 WARN_ON(fn->fn_flags & RTN_ROOT);
1473#endif 1585#endif
1474 if (pn->right == fn) 1586 if (pn_r == fn)
1475 pn->right = child; 1587 rcu_assign_pointer(pn->right, child);
1476 else if (pn->left == fn) 1588 else if (pn_l == fn)
1477 pn->left = child; 1589 rcu_assign_pointer(pn->left, child);
1478#if RT6_DEBUG >= 2 1590#if RT6_DEBUG >= 2
1479 else 1591 else
1480 WARN_ON(1); 1592 WARN_ON(1);
1481#endif 1593#endif
1482 if (child) 1594 if (child)
1483 child->parent = pn; 1595 rcu_assign_pointer(child->parent, pn);
1484 nstate = FWS_R; 1596 nstate = FWS_R;
1485#ifdef CONFIG_IPV6_SUBTREES 1597#ifdef CONFIG_IPV6_SUBTREES
1486 } 1598 }
@@ -1489,19 +1601,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1489 read_lock(&net->ipv6.fib6_walker_lock); 1601 read_lock(&net->ipv6.fib6_walker_lock);
1490 FOR_WALKERS(net, w) { 1602 FOR_WALKERS(net, w) {
1491 if (!child) { 1603 if (!child) {
1492 if (w->root == fn) { 1604 if (w->node == fn) {
1493 w->root = w->node = NULL;
1494 RT6_TRACE("W %p adjusted by delroot 1\n", w);
1495 } else if (w->node == fn) {
1496 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); 1605 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
1497 w->node = pn; 1606 w->node = pn;
1498 w->state = nstate; 1607 w->state = nstate;
1499 } 1608 }
1500 } else { 1609 } else {
1501 if (w->root == fn) {
1502 w->root = child;
1503 RT6_TRACE("W %p adjusted by delroot 2\n", w);
1504 }
1505 if (w->node == fn) { 1610 if (w->node == fn) {
1506 w->node = child; 1611 w->node = child;
1507 if (children&2) { 1612 if (children&2) {
@@ -1516,33 +1621,39 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1516 } 1621 }
1517 read_unlock(&net->ipv6.fib6_walker_lock); 1622 read_unlock(&net->ipv6.fib6_walker_lock);
1518 1623
1519 node_free(fn); 1624 node_free(net, fn);
1520 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) 1625 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
1521 return pn; 1626 return pn;
1522 1627
1523 rt6_release(pn->leaf); 1628 RCU_INIT_POINTER(pn->leaf, NULL);
1524 pn->leaf = NULL; 1629 rt6_release(pn_leaf);
1525 fn = pn; 1630 fn = pn;
1526 } 1631 }
1527} 1632}
1528 1633
1529static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, 1634static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1530 struct nl_info *info) 1635 struct rt6_info __rcu **rtp, struct nl_info *info)
1531{ 1636{
1532 struct fib6_walker *w; 1637 struct fib6_walker *w;
1533 struct rt6_info *rt = *rtp; 1638 struct rt6_info *rt = rcu_dereference_protected(*rtp,
1639 lockdep_is_held(&table->tb6_lock));
1534 struct net *net = info->nl_net; 1640 struct net *net = info->nl_net;
1535 1641
1536 RT6_TRACE("fib6_del_route\n"); 1642 RT6_TRACE("fib6_del_route\n");
1537 1643
1644 WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
1645
1538 /* Unlink it */ 1646 /* Unlink it */
1539 *rtp = rt->dst.rt6_next; 1647 *rtp = rt->dst.rt6_next;
1540 rt->rt6i_node = NULL; 1648 rt->rt6i_node = NULL;
1541 net->ipv6.rt6_stats->fib_rt_entries--; 1649 net->ipv6.rt6_stats->fib_rt_entries--;
1542 net->ipv6.rt6_stats->fib_discarded_routes++; 1650 net->ipv6.rt6_stats->fib_discarded_routes++;
1543 1651
1652 /* Flush all cached dst in exception table */
1653 rt6_flush_exceptions(rt);
1654
1544 /* Reset round-robin state, if necessary */ 1655 /* Reset round-robin state, if necessary */
1545 if (fn->rr_ptr == rt) 1656 if (rcu_access_pointer(fn->rr_ptr) == rt)
1546 fn->rr_ptr = NULL; 1657 fn->rr_ptr = NULL;
1547 1658
1548 /* Remove this entry from other siblings */ 1659 /* Remove this entry from other siblings */
@@ -1561,36 +1672,38 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1561 FOR_WALKERS(net, w) { 1672 FOR_WALKERS(net, w) {
1562 if (w->state == FWS_C && w->leaf == rt) { 1673 if (w->state == FWS_C && w->leaf == rt) {
1563 RT6_TRACE("walker %p adjusted by delroute\n", w); 1674 RT6_TRACE("walker %p adjusted by delroute\n", w);
1564 w->leaf = rt->dst.rt6_next; 1675 w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
1676 lockdep_is_held(&table->tb6_lock));
1565 if (!w->leaf) 1677 if (!w->leaf)
1566 w->state = FWS_U; 1678 w->state = FWS_U;
1567 } 1679 }
1568 } 1680 }
1569 read_unlock(&net->ipv6.fib6_walker_lock); 1681 read_unlock(&net->ipv6.fib6_walker_lock);
1570 1682
1571 rt->dst.rt6_next = NULL;
1572
1573 /* If it was last route, expunge its radix tree node */ 1683 /* If it was last route, expunge its radix tree node */
1574 if (!fn->leaf) { 1684 if (!rcu_access_pointer(fn->leaf)) {
1575 fn->fn_flags &= ~RTN_RTINFO; 1685 fn->fn_flags &= ~RTN_RTINFO;
1576 net->ipv6.rt6_stats->fib_route_nodes--; 1686 net->ipv6.rt6_stats->fib_route_nodes--;
1577 fn = fib6_repair_tree(net, fn); 1687 fn = fib6_repair_tree(net, table, fn);
1578 } 1688 }
1579 1689
1580 fib6_purge_rt(rt, fn, net); 1690 fib6_purge_rt(rt, fn, net);
1581 1691
1582 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt); 1692 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
1583 if (!info->skip_notify) 1693 if (!info->skip_notify)
1584 inet6_rt_notify(RTM_DELROUTE, rt, info, 0); 1694 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
1585 rt6_release(rt); 1695 rt6_release(rt);
1586} 1696}
1587 1697
1698/* Need to own table->tb6_lock */
1588int fib6_del(struct rt6_info *rt, struct nl_info *info) 1699int fib6_del(struct rt6_info *rt, struct nl_info *info)
1589{ 1700{
1590 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, 1701 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
1591 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 1702 lockdep_is_held(&rt->rt6i_table->tb6_lock));
1703 struct fib6_table *table = rt->rt6i_table;
1592 struct net *net = info->nl_net; 1704 struct net *net = info->nl_net;
1593 struct rt6_info **rtp; 1705 struct rt6_info __rcu **rtp;
1706 struct rt6_info __rcu **rtp_next;
1594 1707
1595#if RT6_DEBUG >= 2 1708#if RT6_DEBUG >= 2
1596 if (rt->dst.obsolete > 0) { 1709 if (rt->dst.obsolete > 0) {
@@ -1603,28 +1716,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1603 1716
1604 WARN_ON(!(fn->fn_flags & RTN_RTINFO)); 1717 WARN_ON(!(fn->fn_flags & RTN_RTINFO));
1605 1718
1606 if (!(rt->rt6i_flags & RTF_CACHE)) { 1719 /* remove cached dst from exception table */
1607 struct fib6_node *pn = fn; 1720 if (rt->rt6i_flags & RTF_CACHE)
1608#ifdef CONFIG_IPV6_SUBTREES 1721 return rt6_remove_exception_rt(rt);
1609 /* clones of this route might be in another subtree */
1610 if (rt->rt6i_src.plen) {
1611 while (!(pn->fn_flags & RTN_ROOT))
1612 pn = pn->parent;
1613 pn = pn->parent;
1614 }
1615#endif
1616 fib6_prune_clones(info->nl_net, pn);
1617 }
1618 1722
1619 /* 1723 /*
1620 * Walk the leaf entries looking for ourself 1724 * Walk the leaf entries looking for ourself
1621 */ 1725 */
1622 1726
1623 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { 1727 for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
1624 if (*rtp == rt) { 1728 struct rt6_info *cur = rcu_dereference_protected(*rtp,
1625 fib6_del_route(fn, rtp, info); 1729 lockdep_is_held(&table->tb6_lock));
1730 if (rt == cur) {
1731 fib6_del_route(table, fn, rtp, info);
1626 return 0; 1732 return 0;
1627 } 1733 }
1734 rtp_next = &cur->dst.rt6_next;
1628 } 1735 }
1629 return -ENOENT; 1736 return -ENOENT;
1630} 1737}
@@ -1651,22 +1758,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1651 * 0 -> walk is complete. 1758 * 0 -> walk is complete.
1652 * >0 -> walk is incomplete (i.e. suspended) 1759 * >0 -> walk is incomplete (i.e. suspended)
1653 * <0 -> walk is terminated by an error. 1760 * <0 -> walk is terminated by an error.
1761 *
1762 * This function is called with tb6_lock held.
1654 */ 1763 */
1655 1764
1656static int fib6_walk_continue(struct fib6_walker *w) 1765static int fib6_walk_continue(struct fib6_walker *w)
1657{ 1766{
1658 struct fib6_node *fn, *pn; 1767 struct fib6_node *fn, *pn, *left, *right;
1768
1769 /* w->root should always be table->tb6_root */
1770 WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
1659 1771
1660 for (;;) { 1772 for (;;) {
1661 fn = w->node; 1773 fn = w->node;
1662 if (!fn) 1774 if (!fn)
1663 return 0; 1775 return 0;
1664 1776
1665 if (w->prune && fn != w->root &&
1666 fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
1667 w->state = FWS_C;
1668 w->leaf = fn->leaf;
1669 }
1670 switch (w->state) { 1777 switch (w->state) {
1671#ifdef CONFIG_IPV6_SUBTREES 1778#ifdef CONFIG_IPV6_SUBTREES
1672 case FWS_S: 1779 case FWS_S:
@@ -1676,21 +1783,26 @@ static int fib6_walk_continue(struct fib6_walker *w)
1676 } 1783 }
1677 w->state = FWS_L; 1784 w->state = FWS_L;
1678#endif 1785#endif
1786 /* fall through */
1679 case FWS_L: 1787 case FWS_L:
1680 if (fn->left) { 1788 left = rcu_dereference_protected(fn->left, 1);
1681 w->node = fn->left; 1789 if (left) {
1790 w->node = left;
1682 w->state = FWS_INIT; 1791 w->state = FWS_INIT;
1683 continue; 1792 continue;
1684 } 1793 }
1685 w->state = FWS_R; 1794 w->state = FWS_R;
1795 /* fall through */
1686 case FWS_R: 1796 case FWS_R:
1687 if (fn->right) { 1797 right = rcu_dereference_protected(fn->right, 1);
1688 w->node = fn->right; 1798 if (right) {
1799 w->node = right;
1689 w->state = FWS_INIT; 1800 w->state = FWS_INIT;
1690 continue; 1801 continue;
1691 } 1802 }
1692 w->state = FWS_C; 1803 w->state = FWS_C;
1693 w->leaf = fn->leaf; 1804 w->leaf = rcu_dereference_protected(fn->leaf, 1);
1805 /* fall through */
1694 case FWS_C: 1806 case FWS_C:
1695 if (w->leaf && fn->fn_flags & RTN_RTINFO) { 1807 if (w->leaf && fn->fn_flags & RTN_RTINFO) {
1696 int err; 1808 int err;
@@ -1709,10 +1821,13 @@ static int fib6_walk_continue(struct fib6_walker *w)
1709 } 1821 }
1710skip: 1822skip:
1711 w->state = FWS_U; 1823 w->state = FWS_U;
1824 /* fall through */
1712 case FWS_U: 1825 case FWS_U:
1713 if (fn == w->root) 1826 if (fn == w->root)
1714 return 0; 1827 return 0;
1715 pn = fn->parent; 1828 pn = rcu_dereference_protected(fn->parent, 1);
1829 left = rcu_dereference_protected(pn->left, 1);
1830 right = rcu_dereference_protected(pn->right, 1);
1716 w->node = pn; 1831 w->node = pn;
1717#ifdef CONFIG_IPV6_SUBTREES 1832#ifdef CONFIG_IPV6_SUBTREES
1718 if (FIB6_SUBTREE(pn) == fn) { 1833 if (FIB6_SUBTREE(pn) == fn) {
@@ -1721,13 +1836,13 @@ skip:
1721 continue; 1836 continue;
1722 } 1837 }
1723#endif 1838#endif
1724 if (pn->left == fn) { 1839 if (left == fn) {
1725 w->state = FWS_R; 1840 w->state = FWS_R;
1726 continue; 1841 continue;
1727 } 1842 }
1728 if (pn->right == fn) { 1843 if (right == fn) {
1729 w->state = FWS_C; 1844 w->state = FWS_C;
1730 w->leaf = w->node->leaf; 1845 w->leaf = rcu_dereference_protected(w->node->leaf, 1);
1731 continue; 1846 continue;
1732 } 1847 }
1733#if RT6_DEBUG >= 2 1848#if RT6_DEBUG >= 2
@@ -1770,7 +1885,7 @@ static int fib6_clean_node(struct fib6_walker *w)
1770 return 0; 1885 return 0;
1771 } 1886 }
1772 1887
1773 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { 1888 for_each_fib6_walker_rt(w) {
1774 res = c->func(rt, c->arg); 1889 res = c->func(rt, c->arg);
1775 if (res < 0) { 1890 if (res < 0) {
1776 w->leaf = rt; 1891 w->leaf = rt;
@@ -1798,20 +1913,16 @@ static int fib6_clean_node(struct fib6_walker *w)
1798 * func is called on each route. 1913 * func is called on each route.
1799 * It may return -1 -> delete this route. 1914 * It may return -1 -> delete this route.
1800 * 0 -> continue walking 1915 * 0 -> continue walking
1801 *
1802 * prune==1 -> only immediate children of node (certainly,
1803 * ignoring pure split nodes) will be scanned.
1804 */ 1916 */
1805 1917
1806static void fib6_clean_tree(struct net *net, struct fib6_node *root, 1918static void fib6_clean_tree(struct net *net, struct fib6_node *root,
1807 int (*func)(struct rt6_info *, void *arg), 1919 int (*func)(struct rt6_info *, void *arg),
1808 bool prune, int sernum, void *arg) 1920 int sernum, void *arg)
1809{ 1921{
1810 struct fib6_cleaner c; 1922 struct fib6_cleaner c;
1811 1923
1812 c.w.root = root; 1924 c.w.root = root;
1813 c.w.func = fib6_clean_node; 1925 c.w.func = fib6_clean_node;
1814 c.w.prune = prune;
1815 c.w.count = 0; 1926 c.w.count = 0;
1816 c.w.skip = 0; 1927 c.w.skip = 0;
1817 c.func = func; 1928 c.func = func;
@@ -1834,10 +1945,10 @@ static void __fib6_clean_all(struct net *net,
1834 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 1945 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
1835 head = &net->ipv6.fib_table_hash[h]; 1946 head = &net->ipv6.fib_table_hash[h];
1836 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 1947 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
1837 write_lock_bh(&table->tb6_lock); 1948 spin_lock_bh(&table->tb6_lock);
1838 fib6_clean_tree(net, &table->tb6_root, 1949 fib6_clean_tree(net, &table->tb6_root,
1839 func, false, sernum, arg); 1950 func, sernum, arg);
1840 write_unlock_bh(&table->tb6_lock); 1951 spin_unlock_bh(&table->tb6_lock);
1841 } 1952 }
1842 } 1953 }
1843 rcu_read_unlock(); 1954 rcu_read_unlock();
@@ -1849,22 +1960,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
1849 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); 1960 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
1850} 1961}
1851 1962
1852static int fib6_prune_clone(struct rt6_info *rt, void *arg)
1853{
1854 if (rt->rt6i_flags & RTF_CACHE) {
1855 RT6_TRACE("pruning clone %p\n", rt);
1856 return -1;
1857 }
1858
1859 return 0;
1860}
1861
1862static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
1863{
1864 fib6_clean_tree(net, fn, fib6_prune_clone, true,
1865 FIB6_NO_SERNUM_CHANGE, NULL);
1866}
1867
1868static void fib6_flush_trees(struct net *net) 1963static void fib6_flush_trees(struct net *net)
1869{ 1964{
1870 int new_sernum = fib6_new_sernum(net); 1965 int new_sernum = fib6_new_sernum(net);
@@ -1876,12 +1971,6 @@ static void fib6_flush_trees(struct net *net)
1876 * Garbage collection 1971 * Garbage collection
1877 */ 1972 */
1878 1973
1879struct fib6_gc_args
1880{
1881 int timeout;
1882 int more;
1883};
1884
1885static int fib6_age(struct rt6_info *rt, void *arg) 1974static int fib6_age(struct rt6_info *rt, void *arg)
1886{ 1975{
1887 struct fib6_gc_args *gc_args = arg; 1976 struct fib6_gc_args *gc_args = arg;
@@ -1890,9 +1979,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1890 /* 1979 /*
1891 * check addrconf expiration here. 1980 * check addrconf expiration here.
1892 * Routes are expired even if they are in use. 1981 * Routes are expired even if they are in use.
1893 *
1894 * Also age clones. Note, that clones are aged out
1895 * only if they are not in use now.
1896 */ 1982 */
1897 1983
1898 if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { 1984 if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
@@ -1901,31 +1987,14 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1901 return -1; 1987 return -1;
1902 } 1988 }
1903 gc_args->more++; 1989 gc_args->more++;
1904 } else if (rt->rt6i_flags & RTF_CACHE) {
1905 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
1906 rt->dst.obsolete = DST_OBSOLETE_KILL;
1907 if (atomic_read(&rt->dst.__refcnt) == 1 &&
1908 rt->dst.obsolete == DST_OBSOLETE_KILL) {
1909 RT6_TRACE("aging clone %p\n", rt);
1910 return -1;
1911 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1912 struct neighbour *neigh;
1913 __u8 neigh_flags = 0;
1914
1915 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1916 if (neigh) {
1917 neigh_flags = neigh->flags;
1918 neigh_release(neigh);
1919 }
1920 if (!(neigh_flags & NTF_ROUTER)) {
1921 RT6_TRACE("purging route %p via non-router but gateway\n",
1922 rt);
1923 return -1;
1924 }
1925 }
1926 gc_args->more++;
1927 } 1990 }
1928 1991
1992 /* Also age clones in the exception table.
1993 * Note, that clones are aged out
1994 * only if they are not in use now.
1995 */
1996 rt6_age_exceptions(rt, gc_args, now);
1997
1929 return 0; 1998 return 0;
1930} 1999}
1931 2000
@@ -1957,9 +2026,11 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1957 spin_unlock_bh(&net->ipv6.fib6_gc_lock); 2026 spin_unlock_bh(&net->ipv6.fib6_gc_lock);
1958} 2027}
1959 2028
1960static void fib6_gc_timer_cb(unsigned long arg) 2029static void fib6_gc_timer_cb(struct timer_list *t)
1961{ 2030{
1962 fib6_run_gc(0, (struct net *)arg, true); 2031 struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
2032
2033 fib6_run_gc(0, arg, true);
1963} 2034}
1964 2035
1965static int __net_init fib6_net_init(struct net *net) 2036static int __net_init fib6_net_init(struct net *net)
@@ -1974,7 +2045,7 @@ static int __net_init fib6_net_init(struct net *net)
1974 spin_lock_init(&net->ipv6.fib6_gc_lock); 2045 spin_lock_init(&net->ipv6.fib6_gc_lock);
1975 rwlock_init(&net->ipv6.fib6_walker_lock); 2046 rwlock_init(&net->ipv6.fib6_walker_lock);
1976 INIT_LIST_HEAD(&net->ipv6.fib6_walkers); 2047 INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
1977 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); 2048 timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);
1978 2049
1979 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 2050 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
1980 if (!net->ipv6.rt6_stats) 2051 if (!net->ipv6.rt6_stats)
@@ -1993,7 +2064,8 @@ static int __net_init fib6_net_init(struct net *net)
1993 goto out_fib_table_hash; 2064 goto out_fib_table_hash;
1994 2065
1995 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; 2066 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
1996 net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 2067 rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
2068 net->ipv6.ip6_null_entry);
1997 net->ipv6.fib6_main_tbl->tb6_root.fn_flags = 2069 net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
1998 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 2070 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
1999 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); 2071 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -2004,7 +2076,8 @@ static int __net_init fib6_net_init(struct net *net)
2004 if (!net->ipv6.fib6_local_tbl) 2076 if (!net->ipv6.fib6_local_tbl)
2005 goto out_fib6_main_tbl; 2077 goto out_fib6_main_tbl;
2006 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; 2078 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
2007 net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 2079 rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
2080 net->ipv6.ip6_null_entry);
2008 net->ipv6.fib6_local_tbl->tb6_root.fn_flags = 2081 net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
2009 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 2082 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
2010 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); 2083 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -2134,7 +2207,9 @@ static int ipv6_route_yield(struct fib6_walker *w)
2134 return 1; 2207 return 1;
2135 2208
2136 do { 2209 do {
2137 iter->w.leaf = iter->w.leaf->dst.rt6_next; 2210 iter->w.leaf = rcu_dereference_protected(
2211 iter->w.leaf->dst.rt6_next,
2212 lockdep_is_held(&iter->tbl->tb6_lock));
2138 iter->skip--; 2213 iter->skip--;
2139 if (!iter->skip && iter->w.leaf) 2214 if (!iter->skip && iter->w.leaf)
2140 return 1; 2215 return 1;
@@ -2199,7 +2274,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2199 if (!v) 2274 if (!v)
2200 goto iter_table; 2275 goto iter_table;
2201 2276
2202 n = ((struct rt6_info *)v)->dst.rt6_next; 2277 n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
2203 if (n) { 2278 if (n) {
2204 ++*pos; 2279 ++*pos;
2205 return n; 2280 return n;
@@ -2207,9 +2282,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2207 2282
2208iter_table: 2283iter_table:
2209 ipv6_route_check_sernum(iter); 2284 ipv6_route_check_sernum(iter);
2210 read_lock(&iter->tbl->tb6_lock); 2285 spin_lock_bh(&iter->tbl->tb6_lock);
2211 r = fib6_walk_continue(&iter->w); 2286 r = fib6_walk_continue(&iter->w);
2212 read_unlock(&iter->tbl->tb6_lock); 2287 spin_unlock_bh(&iter->tbl->tb6_lock);
2213 if (r > 0) { 2288 if (r > 0) {
2214 if (v) 2289 if (v)
2215 ++*pos; 2290 ++*pos;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 9f2e73c71768..7f59c8fabeeb 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -46,7 +46,7 @@
46static atomic_t fl_size = ATOMIC_INIT(0); 46static atomic_t fl_size = ATOMIC_INIT(0);
47static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; 47static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
48 48
49static void ip6_fl_gc(unsigned long dummy); 49static void ip6_fl_gc(struct timer_list *unused);
50static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); 50static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
51 51
52/* FL hash table lock: it protects only of GC */ 52/* FL hash table lock: it protects only of GC */
@@ -127,7 +127,7 @@ static void fl_release(struct ip6_flowlabel *fl)
127 spin_unlock_bh(&ip6_fl_lock); 127 spin_unlock_bh(&ip6_fl_lock);
128} 128}
129 129
130static void ip6_fl_gc(unsigned long dummy) 130static void ip6_fl_gc(struct timer_list *unused)
131{ 131{
132 int i; 132 int i;
133 unsigned long now = jiffies; 133 unsigned long now = jiffies;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 59c121b932ac..4cfd8e0696fe 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
369static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 369static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
370 u8 type, u8 code, int offset, __be32 info) 370 u8 type, u8 code, int offset, __be32 info)
371{ 371{
372 struct net *net = dev_net(skb->dev);
372 const struct gre_base_hdr *greh; 373 const struct gre_base_hdr *greh;
373 const struct ipv6hdr *ipv6h; 374 const struct ipv6hdr *ipv6h;
374 int grehlen = sizeof(*greh); 375 int grehlen = sizeof(*greh);
@@ -402,9 +403,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
402 return; 403 return;
403 404
404 switch (type) { 405 switch (type) {
405 __u32 teli;
406 struct ipv6_tlv_tnl_enc_lim *tel; 406 struct ipv6_tlv_tnl_enc_lim *tel;
407 __u32 mtu; 407 __u32 teli;
408 case ICMPV6_DEST_UNREACH: 408 case ICMPV6_DEST_UNREACH:
409 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", 409 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
410 t->parms.name); 410 t->parms.name);
@@ -435,12 +435,11 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
435 } 435 }
436 return; 436 return;
437 case ICMPV6_PKT_TOOBIG: 437 case ICMPV6_PKT_TOOBIG:
438 mtu = be32_to_cpu(info) - offset - t->tun_hlen; 438 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
439 if (t->dev->type == ARPHRD_ETHER) 439 return;
440 mtu -= ETH_HLEN; 440 case NDISC_REDIRECT:
441 if (mtu < IPV6_MIN_MTU) 441 ip6_redirect(skb, net, skb->dev->ifindex, 0,
442 mtu = IPV6_MIN_MTU; 442 sock_net_uid(net, NULL));
443 t->dev->mtu = mtu;
444 return; 443 return;
445 } 444 }
446 445
@@ -461,7 +460,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
461 &ipv6h->saddr, &ipv6h->daddr, tpi->key, 460 &ipv6h->saddr, &ipv6h->daddr, tpi->key,
462 tpi->proto); 461 tpi->proto);
463 if (tunnel) { 462 if (tunnel) {
464 ip6_tnl_rcv(tunnel, skb, tpi, NULL, false); 463 ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
465 464
466 return PACKET_RCVD; 465 return PACKET_RCVD;
467 } 466 }
@@ -503,7 +502,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
503 __u32 *pmtu, __be16 proto) 502 __u32 *pmtu, __be16 proto)
504{ 503{
505 struct ip6_tnl *tunnel = netdev_priv(dev); 504 struct ip6_tnl *tunnel = netdev_priv(dev);
506 struct dst_entry *dst = skb_dst(skb);
507 __be16 protocol; 505 __be16 protocol;
508 506
509 if (dev->type == ARPHRD_ETHER) 507 if (dev->type == ARPHRD_ETHER)
@@ -522,10 +520,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
522 gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, 520 gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
523 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); 521 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
524 522
525 /* TooBig packet may have updated dst->dev's mtu */
526 if (dst && dst_mtu(dst) > dst->dev->mtu)
527 dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
528
529 return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, 523 return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
530 NEXTHDR_GRE); 524 NEXTHDR_GRE);
531} 525}
@@ -1164,19 +1158,21 @@ err_alloc_dev:
1164 return err; 1158 return err;
1165} 1159}
1166 1160
1167static void __net_exit ip6gre_exit_net(struct net *net) 1161static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
1168{ 1162{
1163 struct net *net;
1169 LIST_HEAD(list); 1164 LIST_HEAD(list);
1170 1165
1171 rtnl_lock(); 1166 rtnl_lock();
1172 ip6gre_destroy_tunnels(net, &list); 1167 list_for_each_entry(net, net_list, exit_list)
1168 ip6gre_destroy_tunnels(net, &list);
1173 unregister_netdevice_many(&list); 1169 unregister_netdevice_many(&list);
1174 rtnl_unlock(); 1170 rtnl_unlock();
1175} 1171}
1176 1172
1177static struct pernet_operations ip6gre_net_ops = { 1173static struct pernet_operations ip6gre_net_ops = {
1178 .init = ip6gre_init_net, 1174 .init = ip6gre_init_net,
1179 .exit = ip6gre_exit_net, 1175 .exit_batch = ip6gre_exit_batch_net,
1180 .id = &ip6gre_net_id, 1176 .id = &ip6gre_net_id,
1181 .size = sizeof(struct ip6gre_net), 1177 .size = sizeof(struct ip6gre_net),
1182}; 1178};
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index dab946554157..3d3092adf1d2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -471,15 +471,16 @@ static int
471ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 471ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
472 u8 *type, u8 *code, int *msg, __u32 *info, int offset) 472 u8 *type, u8 *code, int *msg, __u32 *info, int offset)
473{ 473{
474 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data; 474 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
475 struct ip6_tnl *t; 475 struct net *net = dev_net(skb->dev);
476 int rel_msg = 0;
477 u8 rel_type = ICMPV6_DEST_UNREACH; 476 u8 rel_type = ICMPV6_DEST_UNREACH;
478 u8 rel_code = ICMPV6_ADDR_UNREACH; 477 u8 rel_code = ICMPV6_ADDR_UNREACH;
479 u8 tproto;
480 __u32 rel_info = 0; 478 __u32 rel_info = 0;
481 __u16 len; 479 struct ip6_tnl *t;
482 int err = -ENOENT; 480 int err = -ENOENT;
481 int rel_msg = 0;
482 u8 tproto;
483 __u16 len;
483 484
484 /* If the packet doesn't contain the original IPv6 header we are 485 /* If the packet doesn't contain the original IPv6 header we are
485 in trouble since we might need the source address for further 486 in trouble since we might need the source address for further
@@ -497,9 +498,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
497 err = 0; 498 err = 0;
498 499
499 switch (*type) { 500 switch (*type) {
500 __u32 teli;
501 struct ipv6_tlv_tnl_enc_lim *tel; 501 struct ipv6_tlv_tnl_enc_lim *tel;
502 __u32 mtu; 502 __u32 mtu, teli;
503 case ICMPV6_DEST_UNREACH: 503 case ICMPV6_DEST_UNREACH:
504 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", 504 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
505 t->parms.name); 505 t->parms.name);
@@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
530 } 530 }
531 break; 531 break;
532 case ICMPV6_PKT_TOOBIG: 532 case ICMPV6_PKT_TOOBIG:
533 ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
534 sock_net_uid(net, NULL));
533 mtu = *info - offset; 535 mtu = *info - offset;
534 if (mtu < IPV6_MIN_MTU) 536 if (mtu < IPV6_MIN_MTU)
535 mtu = IPV6_MIN_MTU; 537 mtu = IPV6_MIN_MTU;
536 t->dev->mtu = mtu;
537
538 len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); 538 len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
539 if (len > mtu) { 539 if (len > mtu) {
540 rel_type = ICMPV6_PKT_TOOBIG; 540 rel_type = ICMPV6_PKT_TOOBIG;
@@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
543 rel_msg = 1; 543 rel_msg = 1;
544 } 544 }
545 break; 545 break;
546 case NDISC_REDIRECT:
547 ip6_redirect(skb, net, skb->dev->ifindex, 0,
548 sock_net_uid(net, NULL));
549 break;
546 } 550 }
547 551
548 *type = rel_type; 552 *type = rel_type;
@@ -559,13 +563,12 @@ static int
559ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 563ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
560 u8 type, u8 code, int offset, __be32 info) 564 u8 type, u8 code, int offset, __be32 info)
561{ 565{
562 int rel_msg = 0;
563 u8 rel_type = type;
564 u8 rel_code = code;
565 __u32 rel_info = ntohl(info); 566 __u32 rel_info = ntohl(info);
566 int err;
567 struct sk_buff *skb2;
568 const struct iphdr *eiph; 567 const struct iphdr *eiph;
568 struct sk_buff *skb2;
569 int err, rel_msg = 0;
570 u8 rel_type = type;
571 u8 rel_code = code;
569 struct rtable *rt; 572 struct rtable *rt;
570 struct flowi4 fl4; 573 struct flowi4 fl4;
571 574
@@ -590,9 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
590 rel_type = ICMP_DEST_UNREACH; 593 rel_type = ICMP_DEST_UNREACH;
591 rel_code = ICMP_FRAG_NEEDED; 594 rel_code = ICMP_FRAG_NEEDED;
592 break; 595 break;
593 case NDISC_REDIRECT:
594 rel_type = ICMP_REDIRECT;
595 rel_code = ICMP_REDIR_HOST;
596 default: 596 default:
597 return 0; 597 return 0;
598 } 598 }
@@ -611,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
611 eiph = ip_hdr(skb2); 611 eiph = ip_hdr(skb2);
612 612
613 /* Try to guess incoming interface */ 613 /* Try to guess incoming interface */
614 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 614 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
615 eiph->saddr, 0, 615 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
616 0, 0,
617 IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
618 if (IS_ERR(rt)) 616 if (IS_ERR(rt))
619 goto out; 617 goto out;
620 618
621 skb2->dev = rt->dst.dev; 619 skb2->dev = rt->dst.dev;
620 ip_rt_put(rt);
622 621
623 /* route "incoming" packet */ 622 /* route "incoming" packet */
624 if (rt->rt_flags & RTCF_LOCAL) { 623 if (rt->rt_flags & RTCF_LOCAL) {
625 ip_rt_put(rt);
626 rt = NULL;
627 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 624 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
628 eiph->daddr, eiph->saddr, 625 eiph->daddr, eiph->saddr, 0, 0,
629 0, 0, 626 IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
630 IPPROTO_IPIP, 627 if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
631 RT_TOS(eiph->tos), 0);
632 if (IS_ERR(rt) ||
633 rt->dst.dev->type != ARPHRD_TUNNEL) {
634 if (!IS_ERR(rt)) 628 if (!IS_ERR(rt))
635 ip_rt_put(rt); 629 ip_rt_put(rt);
636 goto out; 630 goto out;
637 } 631 }
638 skb_dst_set(skb2, &rt->dst); 632 skb_dst_set(skb2, &rt->dst);
639 } else { 633 } else {
640 ip_rt_put(rt);
641 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, 634 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
642 skb2->dev) || 635 skb2->dev) ||
643 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) 636 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
@@ -649,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
649 if (rel_info > dst_mtu(skb_dst(skb2))) 642 if (rel_info > dst_mtu(skb_dst(skb2)))
650 goto out; 643 goto out;
651 644
652 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); 645 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
646 rel_info);
653 } 647 }
654 if (rel_type == ICMP_REDIRECT)
655 skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
656 648
657 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 649 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
658 650
@@ -665,11 +657,10 @@ static int
665ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 657ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
666 u8 type, u8 code, int offset, __be32 info) 658 u8 type, u8 code, int offset, __be32 info)
667{ 659{
668 int rel_msg = 0; 660 __u32 rel_info = ntohl(info);
661 int err, rel_msg = 0;
669 u8 rel_type = type; 662 u8 rel_type = type;
670 u8 rel_code = code; 663 u8 rel_code = code;
671 __u32 rel_info = ntohl(info);
672 int err;
673 664
674 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, 665 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
675 &rel_msg, &rel_info, offset); 666 &rel_msg, &rel_info, offset);
@@ -769,7 +760,8 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
769 760
770 if ((ipv6_addr_is_multicast(laddr) || 761 if ((ipv6_addr_is_multicast(laddr) ||
771 likely(ipv6_chk_addr(net, laddr, ldev, 0))) && 762 likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
772 likely(!ipv6_chk_addr(net, raddr, NULL, 0))) 763 ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
764 likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
773 ret = 1; 765 ret = 1;
774 } 766 }
775 return ret; 767 return ret;
@@ -999,7 +991,8 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
999 if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) 991 if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
1000 pr_warn("%s xmit: Local address not yet configured!\n", 992 pr_warn("%s xmit: Local address not yet configured!\n",
1001 p->name); 993 p->name);
1002 else if (!ipv6_addr_is_multicast(raddr) && 994 else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
995 !ipv6_addr_is_multicast(raddr) &&
1003 unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) 996 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
1004 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", 997 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
1005 p->name); 998 p->name);
@@ -2168,17 +2161,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
2168 .priority = 1, 2161 .priority = 1,
2169}; 2162};
2170 2163
2171static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) 2164static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
2172{ 2165{
2173 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 2166 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2174 struct net_device *dev, *aux; 2167 struct net_device *dev, *aux;
2175 int h; 2168 int h;
2176 struct ip6_tnl *t; 2169 struct ip6_tnl *t;
2177 LIST_HEAD(list);
2178 2170
2179 for_each_netdev_safe(net, dev, aux) 2171 for_each_netdev_safe(net, dev, aux)
2180 if (dev->rtnl_link_ops == &ip6_link_ops) 2172 if (dev->rtnl_link_ops == &ip6_link_ops)
2181 unregister_netdevice_queue(dev, &list); 2173 unregister_netdevice_queue(dev, list);
2182 2174
2183 for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { 2175 for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
2184 t = rtnl_dereference(ip6n->tnls_r_l[h]); 2176 t = rtnl_dereference(ip6n->tnls_r_l[h]);
@@ -2187,12 +2179,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
2187 * been added to the list by the previous loop. 2179 * been added to the list by the previous loop.
2188 */ 2180 */
2189 if (!net_eq(dev_net(t->dev), net)) 2181 if (!net_eq(dev_net(t->dev), net))
2190 unregister_netdevice_queue(t->dev, &list); 2182 unregister_netdevice_queue(t->dev, list);
2191 t = rtnl_dereference(t->next); 2183 t = rtnl_dereference(t->next);
2192 } 2184 }
2193 } 2185 }
2194
2195 unregister_netdevice_many(&list);
2196} 2186}
2197 2187
2198static int __net_init ip6_tnl_init_net(struct net *net) 2188static int __net_init ip6_tnl_init_net(struct net *net)
@@ -2236,16 +2226,21 @@ err_alloc_dev:
2236 return err; 2226 return err;
2237} 2227}
2238 2228
2239static void __net_exit ip6_tnl_exit_net(struct net *net) 2229static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
2240{ 2230{
2231 struct net *net;
2232 LIST_HEAD(list);
2233
2241 rtnl_lock(); 2234 rtnl_lock();
2242 ip6_tnl_destroy_tunnels(net); 2235 list_for_each_entry(net, net_list, exit_list)
2236 ip6_tnl_destroy_tunnels(net, &list);
2237 unregister_netdevice_many(&list);
2243 rtnl_unlock(); 2238 rtnl_unlock();
2244} 2239}
2245 2240
2246static struct pernet_operations ip6_tnl_net_ops = { 2241static struct pernet_operations ip6_tnl_net_ops = {
2247 .init = ip6_tnl_init_net, 2242 .init = ip6_tnl_init_net,
2248 .exit = ip6_tnl_exit_net, 2243 .exit_batch = ip6_tnl_exit_batch_net,
2249 .id = &ip6_tnl_net_id, 2244 .id = &ip6_tnl_net_id,
2250 .size = sizeof(struct ip6_tnl_net), 2245 .size = sizeof(struct ip6_tnl_net),
2251}; 2246};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index bcdc2d557de1..dbb74f3c57a7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1053,23 +1053,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
1053 .get_link_net = ip6_tnl_get_link_net, 1053 .get_link_net = ip6_tnl_get_link_net,
1054}; 1054};
1055 1055
1056static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) 1056static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
1057 struct list_head *list)
1057{ 1058{
1058 int h; 1059 int h;
1059 struct ip6_tnl *t; 1060 struct ip6_tnl *t;
1060 LIST_HEAD(list);
1061 1061
1062 for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { 1062 for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
1063 t = rtnl_dereference(ip6n->tnls_r_l[h]); 1063 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1064 while (t) { 1064 while (t) {
1065 unregister_netdevice_queue(t->dev, &list); 1065 unregister_netdevice_queue(t->dev, list);
1066 t = rtnl_dereference(t->next); 1066 t = rtnl_dereference(t->next);
1067 } 1067 }
1068 } 1068 }
1069 1069
1070 t = rtnl_dereference(ip6n->tnls_wc[0]); 1070 t = rtnl_dereference(ip6n->tnls_wc[0]);
1071 unregister_netdevice_queue(t->dev, &list); 1071 unregister_netdevice_queue(t->dev, list);
1072 unregister_netdevice_many(&list);
1073} 1072}
1074 1073
1075static int __net_init vti6_init_net(struct net *net) 1074static int __net_init vti6_init_net(struct net *net)
@@ -1109,18 +1108,24 @@ err_alloc_dev:
1109 return err; 1108 return err;
1110} 1109}
1111 1110
1112static void __net_exit vti6_exit_net(struct net *net) 1111static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
1113{ 1112{
1114 struct vti6_net *ip6n = net_generic(net, vti6_net_id); 1113 struct vti6_net *ip6n;
1114 struct net *net;
1115 LIST_HEAD(list);
1115 1116
1116 rtnl_lock(); 1117 rtnl_lock();
1117 vti6_destroy_tunnels(ip6n); 1118 list_for_each_entry(net, net_list, exit_list) {
1119 ip6n = net_generic(net, vti6_net_id);
1120 vti6_destroy_tunnels(ip6n, &list);
1121 }
1122 unregister_netdevice_many(&list);
1118 rtnl_unlock(); 1123 rtnl_unlock();
1119} 1124}
1120 1125
1121static struct pernet_operations vti6_net_ops = { 1126static struct pernet_operations vti6_net_ops = {
1122 .init = vti6_init_net, 1127 .init = vti6_init_net,
1123 .exit = vti6_exit_net, 1128 .exit_batch = vti6_exit_batch_net,
1124 .id = &vti6_net_id, 1129 .id = &vti6_net_id,
1125 .size = sizeof(struct vti6_net), 1130 .size = sizeof(struct vti6_net),
1126}; 1131};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f5500f5444e9..a2e1a864eb46 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -120,7 +120,7 @@ static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
120static int ip6mr_rtm_dumproute(struct sk_buff *skb, 120static int ip6mr_rtm_dumproute(struct sk_buff *skb,
121 struct netlink_callback *cb); 121 struct netlink_callback *cb);
122static void mroute_clean_tables(struct mr6_table *mrt, bool all); 122static void mroute_clean_tables(struct mr6_table *mrt, bool all);
123static void ipmr_expire_process(unsigned long arg); 123static void ipmr_expire_process(struct timer_list *t);
124 124
125#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 125#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
126#define ip6mr_for_each_table(mrt, net) \ 126#define ip6mr_for_each_table(mrt, net) \
@@ -320,8 +320,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
320 320
321 INIT_LIST_HEAD(&mrt->mfc6_unres_queue); 321 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
322 322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 323 timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
324 (unsigned long)mrt);
325 324
326#ifdef CONFIG_IPV6_PIMSM_V2 325#ifdef CONFIG_IPV6_PIMSM_V2
327 mrt->mroute_reg_vif_num = -1; 326 mrt->mroute_reg_vif_num = -1;
@@ -888,9 +887,9 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
888 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); 887 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
889} 888}
890 889
891static void ipmr_expire_process(unsigned long arg) 890static void ipmr_expire_process(struct timer_list *t)
892{ 891{
893 struct mr6_table *mrt = (struct mr6_table *)arg; 892 struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
894 893
895 if (!spin_trylock(&mfc_unres_lock)) { 894 if (!spin_trylock(&mfc_unres_lock)) {
896 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); 895 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
@@ -1617,6 +1616,10 @@ int ip6mr_sk_done(struct sock *sk)
1617 struct net *net = sock_net(sk); 1616 struct net *net = sock_net(sk);
1618 struct mr6_table *mrt; 1617 struct mr6_table *mrt;
1619 1618
1619 if (sk->sk_type != SOCK_RAW ||
1620 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1621 return err;
1622
1620 rtnl_lock(); 1623 rtnl_lock();
1621 ip6mr_for_each_table(mrt, net) { 1624 ip6mr_for_each_table(mrt, net) {
1622 if (sk == mrt->mroute6_sk) { 1625 if (sk == mrt->mroute6_sk) {
@@ -1722,6 +1725,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1722 case MRT6_ADD_MFC: 1725 case MRT6_ADD_MFC:
1723 case MRT6_DEL_MFC: 1726 case MRT6_DEL_MFC:
1724 parent = -1; 1727 parent = -1;
1728 /* fall through */
1725 case MRT6_ADD_MFC_PROXY: 1729 case MRT6_ADD_MFC_PROXY:
1726 case MRT6_DEL_MFC_PROXY: 1730 case MRT6_DEL_MFC_PROXY:
1727 if (optlen < sizeof(mfc)) 1731 if (optlen < sizeof(mfc))
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a5e466d4e093..b9404feabd78 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -377,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
377 retv = 0; 377 retv = 0;
378 break; 378 break;
379 379
380 case IPV6_FREEBIND:
381 if (optlen < sizeof(int))
382 goto e_inval;
383 /* we also don't have a separate freebind bit for IPV6 */
384 inet_sk(sk)->freebind = valbool;
385 retv = 0;
386 break;
387
380 case IPV6_RECVORIGDSTADDR: 388 case IPV6_RECVORIGDSTADDR:
381 if (optlen < sizeof(int)) 389 if (optlen < sizeof(int))
382 goto e_inval; 390 goto e_inval;
@@ -1214,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1214 val = inet_sk(sk)->transparent; 1222 val = inet_sk(sk)->transparent;
1215 break; 1223 break;
1216 1224
1225 case IPV6_FREEBIND:
1226 val = inet_sk(sk)->freebind;
1227 break;
1228
1217 case IPV6_RECVORIGDSTADDR: 1229 case IPV6_RECVORIGDSTADDR:
1218 val = np->rxopt.bits.rxorigdstaddr; 1230 val = np->rxopt.bits.rxorigdstaddr;
1219 break; 1231 break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 12b7c27ce5ce..fc6d7d143f2c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -75,10 +75,10 @@ static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
75 75
76static void igmp6_join_group(struct ifmcaddr6 *ma); 76static void igmp6_join_group(struct ifmcaddr6 *ma);
77static void igmp6_leave_group(struct ifmcaddr6 *ma); 77static void igmp6_leave_group(struct ifmcaddr6 *ma);
78static void igmp6_timer_handler(unsigned long data); 78static void igmp6_timer_handler(struct timer_list *t);
79 79
80static void mld_gq_timer_expire(unsigned long data); 80static void mld_gq_timer_expire(struct timer_list *t);
81static void mld_ifc_timer_expire(unsigned long data); 81static void mld_ifc_timer_expire(struct timer_list *t);
82static void mld_ifc_event(struct inet6_dev *idev); 82static void mld_ifc_event(struct inet6_dev *idev);
83static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); 83static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
84static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); 84static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
@@ -839,7 +839,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
839 if (!mc) 839 if (!mc)
840 return NULL; 840 return NULL;
841 841
842 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); 842 timer_setup(&mc->mca_timer, igmp6_timer_handler, 0);
843 843
844 mc->mca_addr = *addr; 844 mc->mca_addr = *addr;
845 mc->idev = idev; /* reference taken by caller */ 845 mc->idev = idev; /* reference taken by caller */
@@ -2083,9 +2083,9 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev)
2083 } 2083 }
2084} 2084}
2085 2085
2086static void mld_dad_timer_expire(unsigned long data) 2086static void mld_dad_timer_expire(struct timer_list *t)
2087{ 2087{
2088 struct inet6_dev *idev = (struct inet6_dev *)data; 2088 struct inet6_dev *idev = from_timer(idev, t, mc_dad_timer);
2089 2089
2090 mld_send_initial_cr(idev); 2090 mld_send_initial_cr(idev);
2091 if (idev->mc_dad_count) { 2091 if (idev->mc_dad_count) {
@@ -2432,18 +2432,18 @@ static void igmp6_leave_group(struct ifmcaddr6 *ma)
2432 } 2432 }
2433} 2433}
2434 2434
2435static void mld_gq_timer_expire(unsigned long data) 2435static void mld_gq_timer_expire(struct timer_list *t)
2436{ 2436{
2437 struct inet6_dev *idev = (struct inet6_dev *)data; 2437 struct inet6_dev *idev = from_timer(idev, t, mc_gq_timer);
2438 2438
2439 idev->mc_gq_running = 0; 2439 idev->mc_gq_running = 0;
2440 mld_send_report(idev, NULL); 2440 mld_send_report(idev, NULL);
2441 in6_dev_put(idev); 2441 in6_dev_put(idev);
2442} 2442}
2443 2443
2444static void mld_ifc_timer_expire(unsigned long data) 2444static void mld_ifc_timer_expire(struct timer_list *t)
2445{ 2445{
2446 struct inet6_dev *idev = (struct inet6_dev *)data; 2446 struct inet6_dev *idev = from_timer(idev, t, mc_ifc_timer);
2447 2447
2448 mld_send_cr(idev); 2448 mld_send_cr(idev);
2449 if (idev->mc_ifc_count) { 2449 if (idev->mc_ifc_count) {
@@ -2462,9 +2462,9 @@ static void mld_ifc_event(struct inet6_dev *idev)
2462 mld_ifc_start_timer(idev, 1); 2462 mld_ifc_start_timer(idev, 1);
2463} 2463}
2464 2464
2465static void igmp6_timer_handler(unsigned long data) 2465static void igmp6_timer_handler(struct timer_list *t)
2466{ 2466{
2467 struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; 2467 struct ifmcaddr6 *ma = from_timer(ma, t, mca_timer);
2468 2468
2469 if (mld_in_v1_mode(ma->idev)) 2469 if (mld_in_v1_mode(ma->idev))
2470 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); 2470 igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
@@ -2552,14 +2552,11 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
2552 write_lock_bh(&idev->lock); 2552 write_lock_bh(&idev->lock);
2553 spin_lock_init(&idev->mc_lock); 2553 spin_lock_init(&idev->mc_lock);
2554 idev->mc_gq_running = 0; 2554 idev->mc_gq_running = 0;
2555 setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire, 2555 timer_setup(&idev->mc_gq_timer, mld_gq_timer_expire, 0);
2556 (unsigned long)idev);
2557 idev->mc_tomb = NULL; 2556 idev->mc_tomb = NULL;
2558 idev->mc_ifc_count = 0; 2557 idev->mc_ifc_count = 0;
2559 setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, 2558 timer_setup(&idev->mc_ifc_timer, mld_ifc_timer_expire, 0);
2560 (unsigned long)idev); 2559 timer_setup(&idev->mc_dad_timer, mld_dad_timer_expire, 0);
2561 setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
2562 (unsigned long)idev);
2563 ipv6_mc_reset(idev); 2560 ipv6_mc_reset(idev);
2564 write_unlock_bh(&idev->lock); 2561 write_unlock_bh(&idev->lock);
2565} 2562}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 266a530414d7..b3cea200c85e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -46,6 +46,7 @@
46#endif 46#endif
47 47
48#include <linux/if_addr.h> 48#include <linux/if_addr.h>
49#include <linux/if_ether.h>
49#include <linux/if_arp.h> 50#include <linux/if_arp.h>
50#include <linux/ipv6.h> 51#include <linux/ipv6.h>
51#include <linux/icmpv6.h> 52#include <linux/icmpv6.h>
@@ -426,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb,
426 int hop_limit, int len) 427 int hop_limit, int len)
427{ 428{
428 struct ipv6hdr *hdr; 429 struct ipv6hdr *hdr;
430 struct inet6_dev *idev;
431 unsigned tclass;
432
433 rcu_read_lock();
434 idev = __in6_dev_get(skb->dev);
435 tclass = idev ? idev->cnf.ndisc_tclass : 0;
436 rcu_read_unlock();
429 437
430 skb_push(skb, sizeof(*hdr)); 438 skb_push(skb, sizeof(*hdr));
431 skb_reset_network_header(skb); 439 skb_reset_network_header(skb);
432 hdr = ipv6_hdr(skb); 440 hdr = ipv6_hdr(skb);
433 441
434 ip6_flow_hdr(hdr, 0, 0); 442 ip6_flow_hdr(hdr, tclass, 0);
435 443
436 hdr->payload_len = htons(len); 444 hdr->payload_len = htons(len);
437 hdr->nexthdr = IPPROTO_ICMPV6; 445 hdr->nexthdr = IPPROTO_ICMPV6;
@@ -822,7 +830,7 @@ have_ifp:
822 * who is doing DAD 830 * who is doing DAD
823 * so fail our DAD process 831 * so fail our DAD process
824 */ 832 */
825 addrconf_dad_failure(ifp); 833 addrconf_dad_failure(skb, ifp);
826 return; 834 return;
827 } else { 835 } else {
828 /* 836 /*
@@ -975,7 +983,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
975 if (ifp) { 983 if (ifp) {
976 if (skb->pkt_type != PACKET_LOOPBACK 984 if (skb->pkt_type != PACKET_LOOPBACK
977 && (ifp->flags & IFA_F_TENTATIVE)) { 985 && (ifp->flags & IFA_F_TENTATIVE)) {
978 addrconf_dad_failure(ifp); 986 addrconf_dad_failure(skb, ifp);
979 return; 987 return;
980 } 988 }
981 /* What should we make now? The advertisement 989 /* What should we make now? The advertisement
@@ -989,8 +997,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
989 */ 997 */
990 if (skb->pkt_type != PACKET_LOOPBACK) 998 if (skb->pkt_type != PACKET_LOOPBACK)
991 ND_PRINTK(1, warn, 999 ND_PRINTK(1, warn,
992 "NA: someone advertises our address %pI6 on %s!\n", 1000 "NA: %pM advertised our address %pI6c on %s!\n",
993 &ifp->addr, ifp->idev->dev->name); 1001 eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
994 in6_ifa_put(ifp); 1002 in6_ifa_put(ifp);
995 return; 1003 return;
996 } 1004 }
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 01bd3ee5ebc6..f06e25065a34 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -800,6 +800,25 @@ get_counters(const struct xt_table_info *t,
800 } 800 }
801} 801}
802 802
803static void get_old_counters(const struct xt_table_info *t,
804 struct xt_counters counters[])
805{
806 struct ip6t_entry *iter;
807 unsigned int cpu, i;
808
809 for_each_possible_cpu(cpu) {
810 i = 0;
811 xt_entry_foreach(iter, t->entries, t->size) {
812 const struct xt_counters *tmp;
813
814 tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
815 ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
816 ++i;
817 }
818 cond_resched();
819 }
820}
821
803static struct xt_counters *alloc_counters(const struct xt_table *table) 822static struct xt_counters *alloc_counters(const struct xt_table *table)
804{ 823{
805 unsigned int countersize; 824 unsigned int countersize;
@@ -1090,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1090 (newinfo->number <= oldinfo->initial_entries)) 1109 (newinfo->number <= oldinfo->initial_entries))
1091 module_put(t->me); 1110 module_put(t->me);
1092 1111
1093 /* Get the old counters, and synchronize with replace */ 1112 get_old_counters(oldinfo, counters);
1094 get_counters(oldinfo, counters);
1095 1113
1096 /* Decrease module usage counts and free resource */ 1114 /* Decrease module usage counts and free resource */
1097 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) 1115 xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index fe01dc953c56..3b80a38f62b8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -339,7 +339,7 @@ static void ipv6_hooks_unregister(struct net *net)
339 mutex_unlock(&register_ipv6_hooks); 339 mutex_unlock(&register_ipv6_hooks);
340} 340}
341 341
342struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { 342const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
343 .l3proto = PF_INET6, 343 .l3proto = PF_INET6,
344 .pkt_to_tuple = ipv6_pkt_to_tuple, 344 .pkt_to_tuple = ipv6_pkt_to_tuple,
345 .invert_tuple = ipv6_invert_tuple, 345 .invert_tuple = ipv6_invert_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a9e1fd1a8536..3ac0d826afc4 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -94,7 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct,
94 const struct sk_buff *skb, 94 const struct sk_buff *skb,
95 unsigned int dataoff, 95 unsigned int dataoff,
96 enum ip_conntrack_info ctinfo, 96 enum ip_conntrack_info ctinfo,
97 u_int8_t pf,
98 unsigned int *timeout) 97 unsigned int *timeout)
99{ 98{
100 /* Do not immediately delete the connection after the first 99 /* Do not immediately delete the connection after the first
@@ -176,6 +175,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
176 return NF_ACCEPT; 175 return NF_ACCEPT;
177} 176}
178 177
178static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
179 u8 pf, const char *msg)
180{
181 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
182}
183
179static int 184static int
180icmpv6_error(struct net *net, struct nf_conn *tmpl, 185icmpv6_error(struct net *net, struct nf_conn *tmpl,
181 struct sk_buff *skb, unsigned int dataoff, 186 struct sk_buff *skb, unsigned int dataoff,
@@ -187,17 +192,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
187 192
188 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); 193 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
189 if (icmp6h == NULL) { 194 if (icmp6h == NULL) {
190 if (LOG_INVALID(net, IPPROTO_ICMPV6)) 195 icmpv6_error_log(skb, net, pf, "short packet");
191 nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
192 "nf_ct_icmpv6: short packet ");
193 return -NF_ACCEPT; 196 return -NF_ACCEPT;
194 } 197 }
195 198
196 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 199 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
197 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { 200 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
198 if (LOG_INVALID(net, IPPROTO_ICMPV6)) 201 icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
199 nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
200 "nf_ct_icmpv6: ICMPv6 checksum failed ");
201 return -NF_ACCEPT; 202 return -NF_ACCEPT;
202 } 203 }
203 204
@@ -258,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
258 return 0; 259 return 0;
259} 260}
260 261
261static int icmpv6_nlattr_tuple_size(void) 262static unsigned int icmpv6_nlattr_tuple_size(void)
262{ 263{
263 return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); 264 static unsigned int size __read_mostly;
265
266 if (!size)
267 size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
268
269 return size;
264} 270}
265#endif 271#endif
266 272
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b263bf3a19f7..977d8900cfd1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
169 return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); 169 return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
170} 170}
171 171
172static void nf_ct_frag6_expire(unsigned long data) 172static void nf_ct_frag6_expire(struct timer_list *t)
173{ 173{
174 struct inet_frag_queue *frag = from_timer(frag, t, timer);
174 struct frag_queue *fq; 175 struct frag_queue *fq;
175 struct net *net; 176 struct net *net;
176 177
177 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); 178 fq = container_of(frag, struct frag_queue, q);
178 net = container_of(fq->q.net, struct net, nf_frag.frags); 179 net = container_of(fq->q.net, struct net, nf_frag.frags);
179 180
180 ip6_expire_frag_queue(net, fq, &nf_frags); 181 ip6_expire_frag_queue(net, fq, &nf_frags);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 46d6dba50698..1d2fb9267d6f 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -290,7 +290,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
290 else 290 else
291 return NF_ACCEPT; 291 return NF_ACCEPT;
292 } 292 }
293 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 293 /* Only ICMPs can be IP_CT_IS_REPLY: */
294 /* fall through */
294 case IP_CT_NEW: 295 case IP_CT_NEW:
295 /* Seen it before? This can happen for loopback, retrans, 296 /* Seen it before? This can happen for loopback, retrans,
296 * or local packets. 297 * or local packets.
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index a338bbc33cf3..4a7e5ffa5108 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -31,37 +31,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
31 return id; 31 return id;
32} 32}
33 33
34/* This function exists only for tap drivers that must support broken
35 * clients requesting UFO without specifying an IPv6 fragment ID.
36 *
37 * This is similar to ipv6_select_ident() but we use an independent hash
38 * seed to limit information leakage.
39 *
40 * The network header must be set before calling this.
41 */
42void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
43{
44 static u32 ip6_proxy_idents_hashrnd __read_mostly;
45 struct in6_addr buf[2];
46 struct in6_addr *addrs;
47 u32 id;
48
49 addrs = skb_header_pointer(skb,
50 skb_network_offset(skb) +
51 offsetof(struct ipv6hdr, saddr),
52 sizeof(buf), buf);
53 if (!addrs)
54 return;
55
56 net_get_random_once(&ip6_proxy_idents_hashrnd,
57 sizeof(ip6_proxy_idents_hashrnd));
58
59 id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
60 &addrs[1], &addrs[0]);
61 skb_shinfo(skb)->ip6_frag_id = htonl(id);
62}
63EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
64
65__be32 ipv6_select_ident(struct net *net, 34__be32 ipv6_select_ident(struct net *net,
66 const struct in6_addr *daddr, 35 const struct in6_addr *daddr,
67 const struct in6_addr *saddr) 36 const struct in6_addr *saddr)
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ac826dd338ff..d12c55dad7d1 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,9 +154,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
154 ICMP6_MIB_OUTERRORS); 154 ICMP6_MIB_OUTERRORS);
155 ip6_flush_pending_frames(sk); 155 ip6_flush_pending_frames(sk);
156 } else { 156 } else {
157 err = icmpv6_push_pending_frames(sk, &fl6, 157 icmpv6_push_pending_frames(sk, &fl6,
158 (struct icmp6hdr *) &pfh.icmph, 158 (struct icmp6hdr *)&pfh.icmph, len);
159 len);
160 } 159 }
161 release_sock(sk); 160 release_sock(sk);
162 161
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e4462b0ff801..761a473a07c5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1055,6 +1055,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
1055 if (optname == IPV6_CHECKSUM || 1055 if (optname == IPV6_CHECKSUM ||
1056 optname == IPV6_HDRINCL) 1056 optname == IPV6_HDRINCL)
1057 break; 1057 break;
1058 /* fall through */
1058 default: 1059 default:
1059 return ipv6_setsockopt(sk, level, optname, optval, optlen); 1060 return ipv6_setsockopt(sk, level, optname, optval, optlen);
1060 } 1061 }
@@ -1077,6 +1078,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
1077 if (optname == IPV6_CHECKSUM || 1078 if (optname == IPV6_CHECKSUM ||
1078 optname == IPV6_HDRINCL) 1079 optname == IPV6_HDRINCL)
1079 break; 1080 break;
1081 /* fall through */
1080 default: 1082 default:
1081 return compat_ipv6_setsockopt(sk, level, optname, 1083 return compat_ipv6_setsockopt(sk, level, optname,
1082 optval, optlen); 1084 optval, optlen);
@@ -1138,6 +1140,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
1138 if (optname == IPV6_CHECKSUM || 1140 if (optname == IPV6_CHECKSUM ||
1139 optname == IPV6_HDRINCL) 1141 optname == IPV6_HDRINCL)
1140 break; 1142 break;
1143 /* fall through */
1141 default: 1144 default:
1142 return ipv6_getsockopt(sk, level, optname, optval, optlen); 1145 return ipv6_getsockopt(sk, level, optname, optval, optlen);
1143 } 1146 }
@@ -1160,6 +1163,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
1160 if (optname == IPV6_CHECKSUM || 1163 if (optname == IPV6_CHECKSUM ||
1161 optname == IPV6_HDRINCL) 1164 optname == IPV6_HDRINCL)
1162 break; 1165 break;
1166 /* fall through */
1163 default: 1167 default:
1164 return compat_ipv6_getsockopt(sk, level, optname, 1168 return compat_ipv6_getsockopt(sk, level, optname,
1165 optval, optlen); 1169 optval, optlen);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 846012eae526..afbc000ad4f2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -170,12 +170,13 @@ out:
170} 170}
171EXPORT_SYMBOL(ip6_expire_frag_queue); 171EXPORT_SYMBOL(ip6_expire_frag_queue);
172 172
173static void ip6_frag_expire(unsigned long data) 173static void ip6_frag_expire(struct timer_list *t)
174{ 174{
175 struct inet_frag_queue *frag = from_timer(frag, t, timer);
175 struct frag_queue *fq; 176 struct frag_queue *fq;
176 struct net *net; 177 struct net *net;
177 178
178 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); 179 fq = container_of(frag, struct frag_queue, q);
179 net = container_of(fq->q.net, struct net, ipv6.frags); 180 net = container_of(fq->q.net, struct net, ipv6.frags);
180 181
181 ip6_expire_frag_queue(net, fq, &ip6_frags); 182 ip6_expire_frag_queue(net, fq, &ip6_frags);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a96d5b385d8f..05eb7bc36156 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -44,6 +44,7 @@
44#include <linux/seq_file.h> 44#include <linux/seq_file.h>
45#include <linux/nsproxy.h> 45#include <linux/nsproxy.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/jhash.h>
47#include <net/net_namespace.h> 48#include <net/net_namespace.h>
48#include <net/snmp.h> 49#include <net/snmp.h>
49#include <net/ipv6.h> 50#include <net/ipv6.h>
@@ -104,6 +105,9 @@ static int rt6_fill_node(struct net *net,
104 struct in6_addr *dst, struct in6_addr *src, 105 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq, 106 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags); 107 unsigned int flags);
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
107 111
108#ifdef CONFIG_IPV6_ROUTE_INFO 112#ifdef CONFIG_IPV6_ROUTE_INFO
109static struct rt6_info *rt6_add_route_info(struct net *net, 113static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -139,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt)
139{ 143{
140 if (!list_empty(&rt->rt6i_uncached)) { 144 if (!list_empty(&rt->rt6i_uncached)) {
141 struct uncached_list *ul = rt->rt6i_uncached_list; 145 struct uncached_list *ul = rt->rt6i_uncached_list;
146 struct net *net = dev_net(rt->dst.dev);
142 147
143 spin_lock_bh(&ul->lock); 148 spin_lock_bh(&ul->lock);
144 list_del(&rt->rt6i_uncached); 149 list_del(&rt->rt6i_uncached);
150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
145 spin_unlock_bh(&ul->lock); 151 spin_unlock_bh(&ul->lock);
146 } 152 }
147} 153}
@@ -355,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 1, DST_OBSOLETE_FORCE_CHK, flags); 362 1, DST_OBSOLETE_FORCE_CHK, flags);
357 363
358 if (rt) 364 if (rt) {
359 rt6_info_init(rt); 365 rt6_info_init(rt);
366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
360 368
361 return rt; 369 return rt;
362} 370}
@@ -369,17 +377,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net,
369 377
370 if (rt) { 378 if (rt) {
371 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); 379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 if (rt->rt6i_pcpu) { 380 if (!rt->rt6i_pcpu) {
373 int cpu;
374
375 for_each_possible_cpu(cpu) {
376 struct rt6_info **p;
377
378 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 /* no one shares rt */
380 *p = NULL;
381 }
382 } else {
383 dst_release_immediate(&rt->dst); 381 dst_release_immediate(&rt->dst);
384 return NULL; 382 return NULL;
385 } 383 }
@@ -392,6 +390,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
392static void ip6_dst_destroy(struct dst_entry *dst) 390static void ip6_dst_destroy(struct dst_entry *dst)
393{ 391{
394 struct rt6_info *rt = (struct rt6_info *)dst; 392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct rt6_exception_bucket *bucket;
395 struct dst_entry *from = dst->from; 394 struct dst_entry *from = dst->from;
396 struct inet6_dev *idev; 395 struct inet6_dev *idev;
397 396
@@ -404,6 +403,11 @@ static void ip6_dst_destroy(struct dst_entry *dst)
404 rt->rt6i_idev = NULL; 403 rt->rt6i_idev = NULL;
405 in6_dev_put(idev); 404 in6_dev_put(idev);
406 } 405 }
406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
407 411
408 dst->from = NULL; 412 dst->from = NULL;
409 dst_release(from); 413 dst_release(from);
@@ -478,7 +482,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
478} 482}
479 483
480/* 484/*
481 * Route lookup. Any table->tb6_lock is implied. 485 * Route lookup. rcu_read_lock() should be held.
482 */ 486 */
483 487
484static inline struct rt6_info *rt6_device_match(struct net *net, 488static inline struct rt6_info *rt6_device_match(struct net *net,
@@ -493,7 +497,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
493 if (!oif && ipv6_addr_any(saddr)) 497 if (!oif && ipv6_addr_any(saddr))
494 goto out; 498 goto out;
495 499
496 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 500 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
497 struct net_device *dev = sprt->dst.dev; 501 struct net_device *dev = sprt->dst.dev;
498 502
499 if (oif) { 503 if (oif) {
@@ -702,6 +706,7 @@ out:
702} 706}
703 707
704static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 708static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
709 struct rt6_info *leaf,
705 struct rt6_info *rr_head, 710 struct rt6_info *rr_head,
706 u32 metric, int oif, int strict, 711 u32 metric, int oif, int strict,
707 bool *do_rr) 712 bool *do_rr)
@@ -711,7 +716,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
711 716
712 match = NULL; 717 match = NULL;
713 cont = NULL; 718 cont = NULL;
714 for (rt = rr_head; rt; rt = rt->dst.rt6_next) { 719 for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
715 if (rt->rt6i_metric != metric) { 720 if (rt->rt6i_metric != metric) {
716 cont = rt; 721 cont = rt;
717 break; 722 break;
@@ -720,7 +725,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
720 match = find_match(rt, oif, strict, &mpri, match, do_rr); 725 match = find_match(rt, oif, strict, &mpri, match, do_rr);
721 } 726 }
722 727
723 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { 728 for (rt = leaf; rt && rt != rr_head;
729 rt = rcu_dereference(rt->dst.rt6_next)) {
724 if (rt->rt6i_metric != metric) { 730 if (rt->rt6i_metric != metric) {
725 cont = rt; 731 cont = rt;
726 break; 732 break;
@@ -732,37 +738,59 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
732 if (match || !cont) 738 if (match || !cont)
733 return match; 739 return match;
734 740
735 for (rt = cont; rt; rt = rt->dst.rt6_next) 741 for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
736 match = find_match(rt, oif, strict, &mpri, match, do_rr); 742 match = find_match(rt, oif, strict, &mpri, match, do_rr);
737 743
738 return match; 744 return match;
739} 745}
740 746
741static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 747static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
748 int oif, int strict)
742{ 749{
750 struct rt6_info *leaf = rcu_dereference(fn->leaf);
743 struct rt6_info *match, *rt0; 751 struct rt6_info *match, *rt0;
744 struct net *net;
745 bool do_rr = false; 752 bool do_rr = false;
753 int key_plen;
746 754
747 rt0 = fn->rr_ptr; 755 if (!leaf || leaf == net->ipv6.ip6_null_entry)
756 return net->ipv6.ip6_null_entry;
757
758 rt0 = rcu_dereference(fn->rr_ptr);
748 if (!rt0) 759 if (!rt0)
749 fn->rr_ptr = rt0 = fn->leaf; 760 rt0 = leaf;
750 761
751 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, 762 /* Double check to make sure fn is not an intermediate node
763 * and fn->leaf does not points to its child's leaf
764 * (This might happen if all routes under fn are deleted from
765 * the tree and fib6_repair_tree() is called on the node.)
766 */
767 key_plen = rt0->rt6i_dst.plen;
768#ifdef CONFIG_IPV6_SUBTREES
769 if (rt0->rt6i_src.plen)
770 key_plen = rt0->rt6i_src.plen;
771#endif
772 if (fn->fn_bit != key_plen)
773 return net->ipv6.ip6_null_entry;
774
775 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
752 &do_rr); 776 &do_rr);
753 777
754 if (do_rr) { 778 if (do_rr) {
755 struct rt6_info *next = rt0->dst.rt6_next; 779 struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
756 780
757 /* no entries matched; do round-robin */ 781 /* no entries matched; do round-robin */
758 if (!next || next->rt6i_metric != rt0->rt6i_metric) 782 if (!next || next->rt6i_metric != rt0->rt6i_metric)
759 next = fn->leaf; 783 next = leaf;
760 784
761 if (next != rt0) 785 if (next != rt0) {
762 fn->rr_ptr = next; 786 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
787 /* make sure next is not being deleted from the tree */
788 if (next->rt6i_node)
789 rcu_assign_pointer(fn->rr_ptr, next);
790 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
791 }
763 } 792 }
764 793
765 net = dev_net(rt0->dst.dev);
766 return match ? match : net->ipv6.ip6_null_entry; 794 return match ? match : net->ipv6.ip6_null_entry;
767} 795}
768 796
@@ -850,13 +878,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
850static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 878static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
851 struct in6_addr *saddr) 879 struct in6_addr *saddr)
852{ 880{
853 struct fib6_node *pn; 881 struct fib6_node *pn, *sn;
854 while (1) { 882 while (1) {
855 if (fn->fn_flags & RTN_TL_ROOT) 883 if (fn->fn_flags & RTN_TL_ROOT)
856 return NULL; 884 return NULL;
857 pn = fn->parent; 885 pn = rcu_dereference(fn->parent);
858 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) 886 sn = FIB6_SUBTREE(pn);
859 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); 887 if (sn && sn != fn)
888 fn = fib6_lookup(sn, NULL, saddr);
860 else 889 else
861 fn = pn; 890 fn = pn;
862 if (fn->fn_flags & RTN_RTINFO) 891 if (fn->fn_flags & RTN_RTINFO)
@@ -864,29 +893,59 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
864 } 893 }
865} 894}
866 895
896static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
897 bool null_fallback)
898{
899 struct rt6_info *rt = *prt;
900
901 if (dst_hold_safe(&rt->dst))
902 return true;
903 if (null_fallback) {
904 rt = net->ipv6.ip6_null_entry;
905 dst_hold(&rt->dst);
906 } else {
907 rt = NULL;
908 }
909 *prt = rt;
910 return false;
911}
912
867static struct rt6_info *ip6_pol_route_lookup(struct net *net, 913static struct rt6_info *ip6_pol_route_lookup(struct net *net,
868 struct fib6_table *table, 914 struct fib6_table *table,
869 struct flowi6 *fl6, int flags) 915 struct flowi6 *fl6, int flags)
870{ 916{
917 struct rt6_info *rt, *rt_cache;
871 struct fib6_node *fn; 918 struct fib6_node *fn;
872 struct rt6_info *rt;
873 919
874 read_lock_bh(&table->tb6_lock); 920 rcu_read_lock();
875 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 921 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
876restart: 922restart:
877 rt = fn->leaf; 923 rt = rcu_dereference(fn->leaf);
878 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 924 if (!rt) {
879 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 925 rt = net->ipv6.ip6_null_entry;
880 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); 926 } else {
927 rt = rt6_device_match(net, rt, &fl6->saddr,
928 fl6->flowi6_oif, flags);
929 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
930 rt = rt6_multipath_select(rt, fl6,
931 fl6->flowi6_oif, flags);
932 }
881 if (rt == net->ipv6.ip6_null_entry) { 933 if (rt == net->ipv6.ip6_null_entry) {
882 fn = fib6_backtrack(fn, &fl6->saddr); 934 fn = fib6_backtrack(fn, &fl6->saddr);
883 if (fn) 935 if (fn)
884 goto restart; 936 goto restart;
885 } 937 }
886 dst_use(&rt->dst, jiffies); 938 /* Search through exception table */
887 read_unlock_bh(&table->tb6_lock); 939 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
940 if (rt_cache)
941 rt = rt_cache;
942
943 if (ip6_hold_safe(net, &rt, true))
944 dst_use_noref(&rt->dst, jiffies);
945
946 rcu_read_unlock();
888 947
889 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 948 trace_fib6_table_lookup(net, rt, table, fl6);
890 949
891 return rt; 950 return rt;
892 951
@@ -938,9 +997,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
938 struct fib6_table *table; 997 struct fib6_table *table;
939 998
940 table = rt->rt6i_table; 999 table = rt->rt6i_table;
941 write_lock_bh(&table->tb6_lock); 1000 spin_lock_bh(&table->tb6_lock);
942 err = fib6_add(&table->tb6_root, rt, info, mxc, extack); 1001 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
943 write_unlock_bh(&table->tb6_lock); 1002 spin_unlock_bh(&table->tb6_lock);
944 1003
945 return err; 1004 return err;
946} 1005}
@@ -1038,7 +1097,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1038 return pcpu_rt; 1097 return pcpu_rt;
1039} 1098}
1040 1099
1041/* It should be called with read_lock_bh(&tb6_lock) acquired */ 1100/* It should be called with rcu_read_lock() acquired */
1042static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) 1101static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1043{ 1102{
1044 struct rt6_info *pcpu_rt, **p; 1103 struct rt6_info *pcpu_rt, **p;
@@ -1046,16 +1105,14 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1046 p = this_cpu_ptr(rt->rt6i_pcpu); 1105 p = this_cpu_ptr(rt->rt6i_pcpu);
1047 pcpu_rt = *p; 1106 pcpu_rt = *p;
1048 1107
1049 if (pcpu_rt) { 1108 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
1050 dst_hold(&pcpu_rt->dst);
1051 rt6_dst_from_metrics_check(pcpu_rt); 1109 rt6_dst_from_metrics_check(pcpu_rt);
1052 } 1110
1053 return pcpu_rt; 1111 return pcpu_rt;
1054} 1112}
1055 1113
1056static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) 1114static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1057{ 1115{
1058 struct fib6_table *table = rt->rt6i_table;
1059 struct rt6_info *pcpu_rt, *prev, **p; 1116 struct rt6_info *pcpu_rt, *prev, **p;
1060 1117
1061 pcpu_rt = ip6_rt_pcpu_alloc(rt); 1118 pcpu_rt = ip6_rt_pcpu_alloc(rt);
@@ -1066,36 +1123,526 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1066 return net->ipv6.ip6_null_entry; 1123 return net->ipv6.ip6_null_entry;
1067 } 1124 }
1068 1125
1069 read_lock_bh(&table->tb6_lock);
1070 if (rt->rt6i_pcpu) {
1071 p = this_cpu_ptr(rt->rt6i_pcpu);
1072 prev = cmpxchg(p, NULL, pcpu_rt);
1073 if (prev) {
1074 /* If someone did it before us, return prev instead */
1075 dst_release_immediate(&pcpu_rt->dst);
1076 pcpu_rt = prev;
1077 }
1078 } else {
1079 /* rt has been removed from the fib6 tree
1080 * before we have a chance to acquire the read_lock.
1081 * In this case, don't brother to create a pcpu rt
1082 * since rt is going away anyway. The next
1083 * dst_check() will trigger a re-lookup.
1084 */
1085 dst_release_immediate(&pcpu_rt->dst);
1086 pcpu_rt = rt;
1087 }
1088 dst_hold(&pcpu_rt->dst); 1126 dst_hold(&pcpu_rt->dst);
1127 p = this_cpu_ptr(rt->rt6i_pcpu);
1128 prev = cmpxchg(p, NULL, pcpu_rt);
1129 BUG_ON(prev);
1130
1089 rt6_dst_from_metrics_check(pcpu_rt); 1131 rt6_dst_from_metrics_check(pcpu_rt);
1090 read_unlock_bh(&table->tb6_lock);
1091 return pcpu_rt; 1132 return pcpu_rt;
1092} 1133}
1093 1134
1135/* exception hash table implementation
1136 */
1137static DEFINE_SPINLOCK(rt6_exception_lock);
1138
1139/* Remove rt6_ex from hash table and free the memory
1140 * Caller must hold rt6_exception_lock
1141 */
1142static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1143 struct rt6_exception *rt6_ex)
1144{
1145 struct net *net;
1146
1147 if (!bucket || !rt6_ex)
1148 return;
1149
1150 net = dev_net(rt6_ex->rt6i->dst.dev);
1151 rt6_ex->rt6i->rt6i_node = NULL;
1152 hlist_del_rcu(&rt6_ex->hlist);
1153 rt6_release(rt6_ex->rt6i);
1154 kfree_rcu(rt6_ex, rcu);
1155 WARN_ON_ONCE(!bucket->depth);
1156 bucket->depth--;
1157 net->ipv6.rt6_stats->fib_rt_cache--;
1158}
1159
1160/* Remove oldest rt6_ex in bucket and free the memory
1161 * Caller must hold rt6_exception_lock
1162 */
1163static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1164{
1165 struct rt6_exception *rt6_ex, *oldest = NULL;
1166
1167 if (!bucket)
1168 return;
1169
1170 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1171 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1172 oldest = rt6_ex;
1173 }
1174 rt6_remove_exception(bucket, oldest);
1175}
1176
1177static u32 rt6_exception_hash(const struct in6_addr *dst,
1178 const struct in6_addr *src)
1179{
1180 static u32 seed __read_mostly;
1181 u32 val;
1182
1183 net_get_random_once(&seed, sizeof(seed));
1184 val = jhash(dst, sizeof(*dst), seed);
1185
1186#ifdef CONFIG_IPV6_SUBTREES
1187 if (src)
1188 val = jhash(src, sizeof(*src), val);
1189#endif
1190 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1191}
1192
1193/* Helper function to find the cached rt in the hash table
1194 * and update bucket pointer to point to the bucket for this
1195 * (daddr, saddr) pair
1196 * Caller must hold rt6_exception_lock
1197 */
1198static struct rt6_exception *
1199__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1200 const struct in6_addr *daddr,
1201 const struct in6_addr *saddr)
1202{
1203 struct rt6_exception *rt6_ex;
1204 u32 hval;
1205
1206 if (!(*bucket) || !daddr)
1207 return NULL;
1208
1209 hval = rt6_exception_hash(daddr, saddr);
1210 *bucket += hval;
1211
1212 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1213 struct rt6_info *rt6 = rt6_ex->rt6i;
1214 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1215
1216#ifdef CONFIG_IPV6_SUBTREES
1217 if (matched && saddr)
1218 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1219#endif
1220 if (matched)
1221 return rt6_ex;
1222 }
1223 return NULL;
1224}
1225
1226/* Helper function to find the cached rt in the hash table
1227 * and update bucket pointer to point to the bucket for this
1228 * (daddr, saddr) pair
1229 * Caller must hold rcu_read_lock()
1230 */
1231static struct rt6_exception *
1232__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1233 const struct in6_addr *daddr,
1234 const struct in6_addr *saddr)
1235{
1236 struct rt6_exception *rt6_ex;
1237 u32 hval;
1238
1239 WARN_ON_ONCE(!rcu_read_lock_held());
1240
1241 if (!(*bucket) || !daddr)
1242 return NULL;
1243
1244 hval = rt6_exception_hash(daddr, saddr);
1245 *bucket += hval;
1246
1247 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1248 struct rt6_info *rt6 = rt6_ex->rt6i;
1249 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1250
1251#ifdef CONFIG_IPV6_SUBTREES
1252 if (matched && saddr)
1253 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1254#endif
1255 if (matched)
1256 return rt6_ex;
1257 }
1258 return NULL;
1259}
1260
1261static int rt6_insert_exception(struct rt6_info *nrt,
1262 struct rt6_info *ort)
1263{
1264 struct net *net = dev_net(ort->dst.dev);
1265 struct rt6_exception_bucket *bucket;
1266 struct in6_addr *src_key = NULL;
1267 struct rt6_exception *rt6_ex;
1268 int err = 0;
1269
1270 /* ort can't be a cache or pcpu route */
1271 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1272 ort = (struct rt6_info *)ort->dst.from;
1273 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1274
1275 spin_lock_bh(&rt6_exception_lock);
1276
1277 if (ort->exception_bucket_flushed) {
1278 err = -EINVAL;
1279 goto out;
1280 }
1281
1282 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1283 lockdep_is_held(&rt6_exception_lock));
1284 if (!bucket) {
1285 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1286 GFP_ATOMIC);
1287 if (!bucket) {
1288 err = -ENOMEM;
1289 goto out;
1290 }
1291 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1292 }
1293
1294#ifdef CONFIG_IPV6_SUBTREES
1295 /* rt6i_src.plen != 0 indicates ort is in subtree
1296 * and exception table is indexed by a hash of
1297 * both rt6i_dst and rt6i_src.
1298 * Otherwise, the exception table is indexed by
1299 * a hash of only rt6i_dst.
1300 */
1301 if (ort->rt6i_src.plen)
1302 src_key = &nrt->rt6i_src.addr;
1303#endif
1304
1305 /* Update rt6i_prefsrc as it could be changed
1306 * in rt6_remove_prefsrc()
1307 */
1308 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
1309 /* rt6_mtu_change() might lower mtu on ort.
1310 * Only insert this exception route if its mtu
1311 * is less than ort's mtu value.
1312 */
1313 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1314 err = -EINVAL;
1315 goto out;
1316 }
1317
1318 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1319 src_key);
1320 if (rt6_ex)
1321 rt6_remove_exception(bucket, rt6_ex);
1322
1323 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1324 if (!rt6_ex) {
1325 err = -ENOMEM;
1326 goto out;
1327 }
1328 rt6_ex->rt6i = nrt;
1329 rt6_ex->stamp = jiffies;
1330 atomic_inc(&nrt->rt6i_ref);
1331 nrt->rt6i_node = ort->rt6i_node;
1332 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1333 bucket->depth++;
1334 net->ipv6.rt6_stats->fib_rt_cache++;
1335
1336 if (bucket->depth > FIB6_MAX_DEPTH)
1337 rt6_exception_remove_oldest(bucket);
1338
1339out:
1340 spin_unlock_bh(&rt6_exception_lock);
1341
1342 /* Update fn->fn_sernum to invalidate all cached dst */
1343 if (!err) {
1344 fib6_update_sernum(ort);
1345 fib6_force_start_gc(net);
1346 }
1347
1348 return err;
1349}
1350
1351void rt6_flush_exceptions(struct rt6_info *rt)
1352{
1353 struct rt6_exception_bucket *bucket;
1354 struct rt6_exception *rt6_ex;
1355 struct hlist_node *tmp;
1356 int i;
1357
1358 spin_lock_bh(&rt6_exception_lock);
1359 /* Prevent rt6_insert_exception() to recreate the bucket list */
1360 rt->exception_bucket_flushed = 1;
1361
1362 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1363 lockdep_is_held(&rt6_exception_lock));
1364 if (!bucket)
1365 goto out;
1366
1367 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1368 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1369 rt6_remove_exception(bucket, rt6_ex);
1370 WARN_ON_ONCE(bucket->depth);
1371 bucket++;
1372 }
1373
1374out:
1375 spin_unlock_bh(&rt6_exception_lock);
1376}
1377
1378/* Find cached rt in the hash table inside passed in rt
1379 * Caller has to hold rcu_read_lock()
1380 */
1381static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1382 struct in6_addr *daddr,
1383 struct in6_addr *saddr)
1384{
1385 struct rt6_exception_bucket *bucket;
1386 struct in6_addr *src_key = NULL;
1387 struct rt6_exception *rt6_ex;
1388 struct rt6_info *res = NULL;
1389
1390 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1391
1392#ifdef CONFIG_IPV6_SUBTREES
1393 /* rt6i_src.plen != 0 indicates rt is in subtree
1394 * and exception table is indexed by a hash of
1395 * both rt6i_dst and rt6i_src.
1396 * Otherwise, the exception table is indexed by
1397 * a hash of only rt6i_dst.
1398 */
1399 if (rt->rt6i_src.plen)
1400 src_key = saddr;
1401#endif
1402 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1403
1404 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1405 res = rt6_ex->rt6i;
1406
1407 return res;
1408}
1409
1410/* Remove the passed in cached rt from the hash table that contains it */
1411int rt6_remove_exception_rt(struct rt6_info *rt)
1412{
1413 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1414 struct rt6_exception_bucket *bucket;
1415 struct in6_addr *src_key = NULL;
1416 struct rt6_exception *rt6_ex;
1417 int err;
1418
1419 if (!from ||
1420 !(rt->rt6i_flags & RTF_CACHE))
1421 return -EINVAL;
1422
1423 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1424 return -ENOENT;
1425
1426 spin_lock_bh(&rt6_exception_lock);
1427 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1428 lockdep_is_held(&rt6_exception_lock));
1429#ifdef CONFIG_IPV6_SUBTREES
1430 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1431 * and exception table is indexed by a hash of
1432 * both rt6i_dst and rt6i_src.
1433 * Otherwise, the exception table is indexed by
1434 * a hash of only rt6i_dst.
1435 */
1436 if (from->rt6i_src.plen)
1437 src_key = &rt->rt6i_src.addr;
1438#endif
1439 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1440 &rt->rt6i_dst.addr,
1441 src_key);
1442 if (rt6_ex) {
1443 rt6_remove_exception(bucket, rt6_ex);
1444 err = 0;
1445 } else {
1446 err = -ENOENT;
1447 }
1448
1449 spin_unlock_bh(&rt6_exception_lock);
1450 return err;
1451}
1452
1453/* Find rt6_ex which contains the passed in rt cache and
1454 * refresh its stamp
1455 */
1456static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1457{
1458 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1459 struct rt6_exception_bucket *bucket;
1460 struct in6_addr *src_key = NULL;
1461 struct rt6_exception *rt6_ex;
1462
1463 if (!from ||
1464 !(rt->rt6i_flags & RTF_CACHE))
1465 return;
1466
1467 rcu_read_lock();
1468 bucket = rcu_dereference(from->rt6i_exception_bucket);
1469
1470#ifdef CONFIG_IPV6_SUBTREES
1471 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1472 * and exception table is indexed by a hash of
1473 * both rt6i_dst and rt6i_src.
1474 * Otherwise, the exception table is indexed by
1475 * a hash of only rt6i_dst.
1476 */
1477 if (from->rt6i_src.plen)
1478 src_key = &rt->rt6i_src.addr;
1479#endif
1480 rt6_ex = __rt6_find_exception_rcu(&bucket,
1481 &rt->rt6i_dst.addr,
1482 src_key);
1483 if (rt6_ex)
1484 rt6_ex->stamp = jiffies;
1485
1486 rcu_read_unlock();
1487}
1488
1489static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1490{
1491 struct rt6_exception_bucket *bucket;
1492 struct rt6_exception *rt6_ex;
1493 int i;
1494
1495 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1496 lockdep_is_held(&rt6_exception_lock));
1497
1498 if (bucket) {
1499 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1500 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1501 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1502 }
1503 bucket++;
1504 }
1505 }
1506}
1507
1508static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1509{
1510 struct rt6_exception_bucket *bucket;
1511 struct rt6_exception *rt6_ex;
1512 int i;
1513
1514 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1515 lockdep_is_held(&rt6_exception_lock));
1516
1517 if (bucket) {
1518 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1519 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1520 struct rt6_info *entry = rt6_ex->rt6i;
1521 /* For RTF_CACHE with rt6i_pmtu == 0
1522 * (i.e. a redirected route),
1523 * the metrics of its rt->dst.from has already
1524 * been updated.
1525 */
1526 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1527 entry->rt6i_pmtu = mtu;
1528 }
1529 bucket++;
1530 }
1531 }
1532}
1533
1534#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1535
1536static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1537 struct in6_addr *gateway)
1538{
1539 struct rt6_exception_bucket *bucket;
1540 struct rt6_exception *rt6_ex;
1541 struct hlist_node *tmp;
1542 int i;
1543
1544 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1545 return;
1546
1547 spin_lock_bh(&rt6_exception_lock);
1548 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1549 lockdep_is_held(&rt6_exception_lock));
1550
1551 if (bucket) {
1552 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1553 hlist_for_each_entry_safe(rt6_ex, tmp,
1554 &bucket->chain, hlist) {
1555 struct rt6_info *entry = rt6_ex->rt6i;
1556
1557 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1558 RTF_CACHE_GATEWAY &&
1559 ipv6_addr_equal(gateway,
1560 &entry->rt6i_gateway)) {
1561 rt6_remove_exception(bucket, rt6_ex);
1562 }
1563 }
1564 bucket++;
1565 }
1566 }
1567
1568 spin_unlock_bh(&rt6_exception_lock);
1569}
1570
1571static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1572 struct rt6_exception *rt6_ex,
1573 struct fib6_gc_args *gc_args,
1574 unsigned long now)
1575{
1576 struct rt6_info *rt = rt6_ex->rt6i;
1577
1578 /* we are pruning and obsoleting aged-out and non gateway exceptions
1579 * even if others have still references to them, so that on next
1580 * dst_check() such references can be dropped.
1581 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1582 * expired, independently from their aging, as per RFC 8201 section 4
1583 */
1584 if (!(rt->rt6i_flags & RTF_EXPIRES) &&
1585 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1586 RT6_TRACE("aging clone %p\n", rt);
1587 rt6_remove_exception(bucket, rt6_ex);
1588 return;
1589 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1590 struct neighbour *neigh;
1591 __u8 neigh_flags = 0;
1592
1593 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1594 if (neigh) {
1595 neigh_flags = neigh->flags;
1596 neigh_release(neigh);
1597 }
1598 if (!(neigh_flags & NTF_ROUTER)) {
1599 RT6_TRACE("purging route %p via non-router but gateway\n",
1600 rt);
1601 rt6_remove_exception(bucket, rt6_ex);
1602 return;
1603 }
1604 } else if (__rt6_check_expired(rt)) {
1605 RT6_TRACE("purging expired route %p\n", rt);
1606 rt6_remove_exception(bucket, rt6_ex);
1607 return;
1608 }
1609 gc_args->more++;
1610}
1611
1612void rt6_age_exceptions(struct rt6_info *rt,
1613 struct fib6_gc_args *gc_args,
1614 unsigned long now)
1615{
1616 struct rt6_exception_bucket *bucket;
1617 struct rt6_exception *rt6_ex;
1618 struct hlist_node *tmp;
1619 int i;
1620
1621 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1622 return;
1623
1624 spin_lock_bh(&rt6_exception_lock);
1625 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1626 lockdep_is_held(&rt6_exception_lock));
1627
1628 if (bucket) {
1629 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1630 hlist_for_each_entry_safe(rt6_ex, tmp,
1631 &bucket->chain, hlist) {
1632 rt6_age_examine_exception(bucket, rt6_ex,
1633 gc_args, now);
1634 }
1635 bucket++;
1636 }
1637 }
1638 spin_unlock_bh(&rt6_exception_lock);
1639}
1640
1094struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 1641struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1095 int oif, struct flowi6 *fl6, int flags) 1642 int oif, struct flowi6 *fl6, int flags)
1096{ 1643{
1097 struct fib6_node *fn, *saved_fn; 1644 struct fib6_node *fn, *saved_fn;
1098 struct rt6_info *rt; 1645 struct rt6_info *rt, *rt_cache;
1099 int strict = 0; 1646 int strict = 0;
1100 1647
1101 strict |= flags & RT6_LOOKUP_F_IFACE; 1648 strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1103,7 +1650,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1103 if (net->ipv6.devconf_all->forwarding == 0) 1650 if (net->ipv6.devconf_all->forwarding == 0)
1104 strict |= RT6_LOOKUP_F_REACHABLE; 1651 strict |= RT6_LOOKUP_F_REACHABLE;
1105 1652
1106 read_lock_bh(&table->tb6_lock); 1653 rcu_read_lock();
1107 1654
1108 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1655 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1109 saved_fn = fn; 1656 saved_fn = fn;
@@ -1112,7 +1659,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1112 oif = 0; 1659 oif = 0;
1113 1660
1114redo_rt6_select: 1661redo_rt6_select:
1115 rt = rt6_select(fn, oif, strict); 1662 rt = rt6_select(net, fn, oif, strict);
1116 if (rt->rt6i_nsiblings) 1663 if (rt->rt6i_nsiblings)
1117 rt = rt6_multipath_select(rt, fl6, oif, strict); 1664 rt = rt6_multipath_select(rt, fl6, oif, strict);
1118 if (rt == net->ipv6.ip6_null_entry) { 1665 if (rt == net->ipv6.ip6_null_entry) {
@@ -1127,14 +1674,23 @@ redo_rt6_select:
1127 } 1674 }
1128 } 1675 }
1129 1676
1677 /*Search through exception table */
1678 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1679 if (rt_cache)
1680 rt = rt_cache;
1130 1681
1131 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { 1682 if (rt == net->ipv6.ip6_null_entry) {
1132 dst_use(&rt->dst, jiffies); 1683 rcu_read_unlock();
1133 read_unlock_bh(&table->tb6_lock); 1684 dst_hold(&rt->dst);
1134 1685 trace_fib6_table_lookup(net, rt, table, fl6);
1135 rt6_dst_from_metrics_check(rt); 1686 return rt;
1136 1687 } else if (rt->rt6i_flags & RTF_CACHE) {
1137 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 1688 if (ip6_hold_safe(net, &rt, true)) {
1689 dst_use_noref(&rt->dst, jiffies);
1690 rt6_dst_from_metrics_check(rt);
1691 }
1692 rcu_read_unlock();
1693 trace_fib6_table_lookup(net, rt, table, fl6);
1138 return rt; 1694 return rt;
1139 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1695 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1140 !(rt->rt6i_flags & RTF_GATEWAY))) { 1696 !(rt->rt6i_flags & RTF_GATEWAY))) {
@@ -1146,8 +1702,14 @@ redo_rt6_select:
1146 1702
1147 struct rt6_info *uncached_rt; 1703 struct rt6_info *uncached_rt;
1148 1704
1149 dst_use(&rt->dst, jiffies); 1705 if (ip6_hold_safe(net, &rt, true)) {
1150 read_unlock_bh(&table->tb6_lock); 1706 dst_use_noref(&rt->dst, jiffies);
1707 } else {
1708 rcu_read_unlock();
1709 uncached_rt = rt;
1710 goto uncached_rt_out;
1711 }
1712 rcu_read_unlock();
1151 1713
1152 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); 1714 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1153 dst_release(&rt->dst); 1715 dst_release(&rt->dst);
@@ -1157,12 +1719,14 @@ redo_rt6_select:
1157 * No need for another dst_hold() 1719 * No need for another dst_hold()
1158 */ 1720 */
1159 rt6_uncached_list_add(uncached_rt); 1721 rt6_uncached_list_add(uncached_rt);
1722 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1160 } else { 1723 } else {
1161 uncached_rt = net->ipv6.ip6_null_entry; 1724 uncached_rt = net->ipv6.ip6_null_entry;
1162 dst_hold(&uncached_rt->dst); 1725 dst_hold(&uncached_rt->dst);
1163 } 1726 }
1164 1727
1165 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); 1728uncached_rt_out:
1729 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
1166 return uncached_rt; 1730 return uncached_rt;
1167 1731
1168 } else { 1732 } else {
@@ -1170,26 +1734,28 @@ redo_rt6_select:
1170 1734
1171 struct rt6_info *pcpu_rt; 1735 struct rt6_info *pcpu_rt;
1172 1736
1173 rt->dst.lastuse = jiffies; 1737 dst_use_noref(&rt->dst, jiffies);
1174 rt->dst.__use++; 1738 local_bh_disable();
1175 pcpu_rt = rt6_get_pcpu_route(rt); 1739 pcpu_rt = rt6_get_pcpu_route(rt);
1176 1740
1177 if (pcpu_rt) { 1741 if (!pcpu_rt) {
1178 read_unlock_bh(&table->tb6_lock); 1742 /* atomic_inc_not_zero() is needed when using rcu */
1179 } else { 1743 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
1180 /* We have to do the read_unlock first 1744 /* No dst_hold() on rt is needed because grabbing
1181 * because rt6_make_pcpu_route() may trigger 1745 * rt->rt6i_ref makes sure rt can't be released.
1182 * ip6_dst_gc() which will take the write_lock. 1746 */
1183 */ 1747 pcpu_rt = rt6_make_pcpu_route(rt);
1184 dst_hold(&rt->dst); 1748 rt6_release(rt);
1185 read_unlock_bh(&table->tb6_lock); 1749 } else {
1186 pcpu_rt = rt6_make_pcpu_route(rt); 1750 /* rt is already removed from tree */
1187 dst_release(&rt->dst); 1751 pcpu_rt = net->ipv6.ip6_null_entry;
1752 dst_hold(&pcpu_rt->dst);
1753 }
1188 } 1754 }
1189 1755 local_bh_enable();
1190 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); 1756 rcu_read_unlock();
1757 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
1191 return pcpu_rt; 1758 return pcpu_rt;
1192
1193 } 1759 }
1194} 1760}
1195EXPORT_SYMBOL_GPL(ip6_pol_route); 1761EXPORT_SYMBOL_GPL(ip6_pol_route);
@@ -1328,6 +1894,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1328 DST_OBSOLETE_DEAD, 0); 1894 DST_OBSOLETE_DEAD, 0);
1329 if (rt) { 1895 if (rt) {
1330 rt6_info_init(rt); 1896 rt6_info_init(rt);
1897 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
1331 1898
1332 new = &rt->dst; 1899 new = &rt->dst;
1333 new->__use = 1; 1900 new->__use = 1;
@@ -1491,23 +2058,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1491 2058
1492 if (!rt6_cache_allowed_for_pmtu(rt6)) { 2059 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1493 rt6_do_update_pmtu(rt6, mtu); 2060 rt6_do_update_pmtu(rt6, mtu);
2061 /* update rt6_ex->stamp for cache */
2062 if (rt6->rt6i_flags & RTF_CACHE)
2063 rt6_update_exception_stamp_rt(rt6);
1494 } else if (daddr) { 2064 } else if (daddr) {
1495 struct rt6_info *nrt6; 2065 struct rt6_info *nrt6;
1496 2066
1497 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 2067 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1498 if (nrt6) { 2068 if (nrt6) {
1499 rt6_do_update_pmtu(nrt6, mtu); 2069 rt6_do_update_pmtu(nrt6, mtu);
1500 2070 if (rt6_insert_exception(nrt6, rt6))
1501 /* ip6_ins_rt(nrt6) will bump the 2071 dst_release_immediate(&nrt6->dst);
1502 * rt6->rt6i_node->fn_sernum
1503 * which will fail the next rt6_check() and
1504 * invalidate the sk->sk_dst_cache.
1505 */
1506 ip6_ins_rt(nrt6);
1507 /* Release the reference taken in
1508 * ip6_rt_cache_alloc()
1509 */
1510 dst_release(&nrt6->dst);
1511 } 2072 }
1512 } 2073 }
1513} 2074}
@@ -1571,7 +2132,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
1571 int flags) 2132 int flags)
1572{ 2133{
1573 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 2134 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1574 struct rt6_info *rt; 2135 struct rt6_info *rt, *rt_cache;
1575 struct fib6_node *fn; 2136 struct fib6_node *fn;
1576 2137
1577 /* Get the "current" route for this destination and 2138 /* Get the "current" route for this destination and
@@ -1584,10 +2145,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
1584 * routes. 2145 * routes.
1585 */ 2146 */
1586 2147
1587 read_lock_bh(&table->tb6_lock); 2148 rcu_read_lock();
1588 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2149 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1589restart: 2150restart:
1590 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2151 for_each_fib6_node_rt_rcu(fn) {
1591 if (rt6_check_expired(rt)) 2152 if (rt6_check_expired(rt))
1592 continue; 2153 continue;
1593 if (rt->dst.error) 2154 if (rt->dst.error)
@@ -1596,8 +2157,23 @@ restart:
1596 continue; 2157 continue;
1597 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 2158 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1598 continue; 2159 continue;
1599 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 2160 /* rt_cache's gateway might be different from its 'parent'
2161 * in the case of an ip redirect.
2162 * So we keep searching in the exception table if the gateway
2163 * is different.
2164 */
2165 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2166 rt_cache = rt6_find_cached_rt(rt,
2167 &fl6->daddr,
2168 &fl6->saddr);
2169 if (rt_cache &&
2170 ipv6_addr_equal(&rdfl->gateway,
2171 &rt_cache->rt6i_gateway)) {
2172 rt = rt_cache;
2173 break;
2174 }
1600 continue; 2175 continue;
2176 }
1601 break; 2177 break;
1602 } 2178 }
1603 2179
@@ -1615,11 +2191,11 @@ restart:
1615 } 2191 }
1616 2192
1617out: 2193out:
1618 dst_hold(&rt->dst); 2194 ip6_hold_safe(net, &rt, true);
1619 2195
1620 read_unlock_bh(&table->tb6_lock); 2196 rcu_read_unlock();
1621 2197
1622 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); 2198 trace_fib6_table_lookup(net, rt, table, fl6);
1623 return rt; 2199 return rt;
1624}; 2200};
1625 2201
@@ -1766,6 +2342,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1766 * do proper release of the net_device 2342 * do proper release of the net_device
1767 */ 2343 */
1768 rt6_uncached_list_add(rt); 2344 rt6_uncached_list_add(rt);
2345 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1769 2346
1770 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 2347 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1771 2348
@@ -1801,6 +2378,7 @@ out:
1801static int ip6_convert_metrics(struct mx6_config *mxc, 2378static int ip6_convert_metrics(struct mx6_config *mxc,
1802 const struct fib6_config *cfg) 2379 const struct fib6_config *cfg)
1803{ 2380{
2381 struct net *net = cfg->fc_nlinfo.nl_net;
1804 bool ecn_ca = false; 2382 bool ecn_ca = false;
1805 struct nlattr *nla; 2383 struct nlattr *nla;
1806 int remaining; 2384 int remaining;
@@ -1826,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
1826 char tmp[TCP_CA_NAME_MAX]; 2404 char tmp[TCP_CA_NAME_MAX];
1827 2405
1828 nla_strlcpy(tmp, nla, sizeof(tmp)); 2406 nla_strlcpy(tmp, nla, sizeof(tmp));
1829 val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 2407 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
1830 if (val == TCP_CA_UNSPEC) 2408 if (val == TCP_CA_UNSPEC)
1831 goto err; 2409 goto err;
1832 } else { 2410 } else {
@@ -1901,6 +2479,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1901 goto out; 2479 goto out;
1902 } 2480 }
1903 2481
2482 /* RTF_CACHE is an internal flag; can not be set by userspace */
2483 if (cfg->fc_flags & RTF_CACHE) {
2484 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2485 goto out;
2486 }
2487
1904 if (cfg->fc_dst_len > 128) { 2488 if (cfg->fc_dst_len > 128) {
1905 NL_SET_ERR_MSG(extack, "Invalid prefix length"); 2489 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1906 goto out; 2490 goto out;
@@ -2216,9 +2800,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2216 } 2800 }
2217 2801
2218 table = rt->rt6i_table; 2802 table = rt->rt6i_table;
2219 write_lock_bh(&table->tb6_lock); 2803 spin_lock_bh(&table->tb6_lock);
2220 err = fib6_del(rt, info); 2804 err = fib6_del(rt, info);
2221 write_unlock_bh(&table->tb6_lock); 2805 spin_unlock_bh(&table->tb6_lock);
2222 2806
2223out: 2807out:
2224 ip6_rt_put(rt); 2808 ip6_rt_put(rt);
@@ -2244,7 +2828,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2244 if (rt == net->ipv6.ip6_null_entry) 2828 if (rt == net->ipv6.ip6_null_entry)
2245 goto out_put; 2829 goto out_put;
2246 table = rt->rt6i_table; 2830 table = rt->rt6i_table;
2247 write_lock_bh(&table->tb6_lock); 2831 spin_lock_bh(&table->tb6_lock);
2248 2832
2249 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { 2833 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2250 struct rt6_info *sibling, *next_sibling; 2834 struct rt6_info *sibling, *next_sibling;
@@ -2274,7 +2858,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2274 2858
2275 err = fib6_del(rt, info); 2859 err = fib6_del(rt, info);
2276out_unlock: 2860out_unlock:
2277 write_unlock_bh(&table->tb6_lock); 2861 spin_unlock_bh(&table->tb6_lock);
2278out_put: 2862out_put:
2279 ip6_rt_put(rt); 2863 ip6_rt_put(rt);
2280 2864
@@ -2288,9 +2872,9 @@ out_put:
2288static int ip6_route_del(struct fib6_config *cfg, 2872static int ip6_route_del(struct fib6_config *cfg,
2289 struct netlink_ext_ack *extack) 2873 struct netlink_ext_ack *extack)
2290{ 2874{
2875 struct rt6_info *rt, *rt_cache;
2291 struct fib6_table *table; 2876 struct fib6_table *table;
2292 struct fib6_node *fn; 2877 struct fib6_node *fn;
2293 struct rt6_info *rt;
2294 int err = -ESRCH; 2878 int err = -ESRCH;
2295 2879
2296 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 2880 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
@@ -2299,17 +2883,22 @@ static int ip6_route_del(struct fib6_config *cfg,
2299 return err; 2883 return err;
2300 } 2884 }
2301 2885
2302 read_lock_bh(&table->tb6_lock); 2886 rcu_read_lock();
2303 2887
2304 fn = fib6_locate(&table->tb6_root, 2888 fn = fib6_locate(&table->tb6_root,
2305 &cfg->fc_dst, cfg->fc_dst_len, 2889 &cfg->fc_dst, cfg->fc_dst_len,
2306 &cfg->fc_src, cfg->fc_src_len); 2890 &cfg->fc_src, cfg->fc_src_len,
2891 !(cfg->fc_flags & RTF_CACHE));
2307 2892
2308 if (fn) { 2893 if (fn) {
2309 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2894 for_each_fib6_node_rt_rcu(fn) {
2310 if ((rt->rt6i_flags & RTF_CACHE) && 2895 if (cfg->fc_flags & RTF_CACHE) {
2311 !(cfg->fc_flags & RTF_CACHE)) 2896 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
2312 continue; 2897 &cfg->fc_src);
2898 if (!rt_cache)
2899 continue;
2900 rt = rt_cache;
2901 }
2313 if (cfg->fc_ifindex && 2902 if (cfg->fc_ifindex &&
2314 (!rt->dst.dev || 2903 (!rt->dst.dev ||
2315 rt->dst.dev->ifindex != cfg->fc_ifindex)) 2904 rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -2321,8 +2910,9 @@ static int ip6_route_del(struct fib6_config *cfg,
2321 continue; 2910 continue;
2322 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) 2911 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2323 continue; 2912 continue;
2324 dst_hold(&rt->dst); 2913 if (!dst_hold_safe(&rt->dst))
2325 read_unlock_bh(&table->tb6_lock); 2914 break;
2915 rcu_read_unlock();
2326 2916
2327 /* if gateway was specified only delete the one hop */ 2917 /* if gateway was specified only delete the one hop */
2328 if (cfg->fc_flags & RTF_GATEWAY) 2918 if (cfg->fc_flags & RTF_GATEWAY)
@@ -2331,7 +2921,7 @@ static int ip6_route_del(struct fib6_config *cfg,
2331 return __ip6_del_rt_siblings(rt, cfg); 2921 return __ip6_del_rt_siblings(rt, cfg);
2332 } 2922 }
2333 } 2923 }
2334 read_unlock_bh(&table->tb6_lock); 2924 rcu_read_unlock();
2335 2925
2336 return err; 2926 return err;
2337} 2927}
@@ -2435,8 +3025,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
2435 nrt->rt6i_protocol = RTPROT_REDIRECT; 3025 nrt->rt6i_protocol = RTPROT_REDIRECT;
2436 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 3026 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2437 3027
2438 if (ip6_ins_rt(nrt)) 3028 /* No need to remove rt from the exception table if rt is
2439 goto out_release; 3029 * a cached route because rt6_insert_exception() will
3030 * takes care of it
3031 */
3032 if (rt6_insert_exception(nrt, rt)) {
3033 dst_release_immediate(&nrt->dst);
3034 goto out;
3035 }
2440 3036
2441 netevent.old = &rt->dst; 3037 netevent.old = &rt->dst;
2442 netevent.new = &nrt->dst; 3038 netevent.new = &nrt->dst;
@@ -2444,17 +3040,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
2444 netevent.neigh = neigh; 3040 netevent.neigh = neigh;
2445 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 3041 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2446 3042
2447 if (rt->rt6i_flags & RTF_CACHE) {
2448 rt = (struct rt6_info *) dst_clone(&rt->dst);
2449 ip6_del_rt(rt);
2450 }
2451
2452out_release:
2453 /* Release the reference taken in
2454 * ip6_rt_cache_alloc()
2455 */
2456 dst_release(&nrt->dst);
2457
2458out: 3043out:
2459 neigh_release(neigh); 3044 neigh_release(neigh);
2460} 3045}
@@ -2511,23 +3096,23 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
2511 if (!table) 3096 if (!table)
2512 return NULL; 3097 return NULL;
2513 3098
2514 read_lock_bh(&table->tb6_lock); 3099 rcu_read_lock();
2515 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); 3100 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
2516 if (!fn) 3101 if (!fn)
2517 goto out; 3102 goto out;
2518 3103
2519 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 3104 for_each_fib6_node_rt_rcu(fn) {
2520 if (rt->dst.dev->ifindex != ifindex) 3105 if (rt->dst.dev->ifindex != ifindex)
2521 continue; 3106 continue;
2522 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 3107 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2523 continue; 3108 continue;
2524 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 3109 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2525 continue; 3110 continue;
2526 dst_hold(&rt->dst); 3111 ip6_hold_safe(NULL, &rt, false);
2527 break; 3112 break;
2528 } 3113 }
2529out: 3114out:
2530 read_unlock_bh(&table->tb6_lock); 3115 rcu_read_unlock();
2531 return rt; 3116 return rt;
2532} 3117}
2533 3118
@@ -2573,16 +3158,16 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
2573 if (!table) 3158 if (!table)
2574 return NULL; 3159 return NULL;
2575 3160
2576 read_lock_bh(&table->tb6_lock); 3161 rcu_read_lock();
2577 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 3162 for_each_fib6_node_rt_rcu(&table->tb6_root) {
2578 if (dev == rt->dst.dev && 3163 if (dev == rt->dst.dev &&
2579 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 3164 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2580 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 3165 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2581 break; 3166 break;
2582 } 3167 }
2583 if (rt) 3168 if (rt)
2584 dst_hold(&rt->dst); 3169 ip6_hold_safe(NULL, &rt, false);
2585 read_unlock_bh(&table->tb6_lock); 3170 rcu_read_unlock();
2586 return rt; 3171 return rt;
2587} 3172}
2588 3173
@@ -2620,17 +3205,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
2620 struct rt6_info *rt; 3205 struct rt6_info *rt;
2621 3206
2622restart: 3207restart:
2623 read_lock_bh(&table->tb6_lock); 3208 rcu_read_lock();
2624 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 3209 for_each_fib6_node_rt_rcu(&table->tb6_root) {
2625 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 3210 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2626 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 3211 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2627 dst_hold(&rt->dst); 3212 if (dst_hold_safe(&rt->dst)) {
2628 read_unlock_bh(&table->tb6_lock); 3213 rcu_read_unlock();
2629 ip6_del_rt(rt); 3214 ip6_del_rt(rt);
3215 } else {
3216 rcu_read_unlock();
3217 }
2630 goto restart; 3218 goto restart;
2631 } 3219 }
2632 } 3220 }
2633 read_unlock_bh(&table->tb6_lock); 3221 rcu_read_unlock();
2634 3222
2635 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; 3223 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2636} 3224}
@@ -2818,8 +3406,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2818 if (((void *)rt->dst.dev == dev || !dev) && 3406 if (((void *)rt->dst.dev == dev || !dev) &&
2819 rt != net->ipv6.ip6_null_entry && 3407 rt != net->ipv6.ip6_null_entry &&
2820 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 3408 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
3409 spin_lock_bh(&rt6_exception_lock);
2821 /* remove prefsrc entry */ 3410 /* remove prefsrc entry */
2822 rt->rt6i_prefsrc.plen = 0; 3411 rt->rt6i_prefsrc.plen = 0;
3412 /* need to update cache as well */
3413 rt6_exceptions_remove_prefsrc(rt);
3414 spin_unlock_bh(&rt6_exception_lock);
2823 } 3415 }
2824 return 0; 3416 return 0;
2825} 3417}
@@ -2836,18 +3428,23 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2836} 3428}
2837 3429
2838#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 3430#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2839#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2840 3431
2841/* Remove routers and update dst entries when gateway turn into host. */ 3432/* Remove routers and update dst entries when gateway turn into host. */
2842static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 3433static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2843{ 3434{
2844 struct in6_addr *gateway = (struct in6_addr *)arg; 3435 struct in6_addr *gateway = (struct in6_addr *)arg;
2845 3436
2846 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || 3437 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
2847 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && 3438 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2848 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2849 return -1; 3439 return -1;
2850 } 3440 }
3441
3442 /* Further clean up cached routes in exception table.
3443 * This is needed because cached route may have a different
3444 * gateway than its 'parent' in the case of an ip redirect.
3445 */
3446 rt6_exceptions_clean_tohost(rt, gateway);
3447
2851 return 0; 3448 return 0;
2852} 3449}
2853 3450
@@ -2926,19 +3523,14 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2926 if (rt->dst.dev == arg->dev && 3523 if (rt->dst.dev == arg->dev &&
2927 dst_metric_raw(&rt->dst, RTAX_MTU) && 3524 dst_metric_raw(&rt->dst, RTAX_MTU) &&
2928 !dst_metric_locked(&rt->dst, RTAX_MTU)) { 3525 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2929 if (rt->rt6i_flags & RTF_CACHE) { 3526 spin_lock_bh(&rt6_exception_lock);
2930 /* For RTF_CACHE with rt6i_pmtu == 0 3527 if (dst_mtu(&rt->dst) >= arg->mtu ||
2931 * (i.e. a redirected route), 3528 (dst_mtu(&rt->dst) < arg->mtu &&
2932 * the metrics of its rt->dst.from has already 3529 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2933 * been updated.
2934 */
2935 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2936 rt->rt6i_pmtu = arg->mtu;
2937 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2938 (dst_mtu(&rt->dst) < arg->mtu &&
2939 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2940 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 3530 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2941 } 3531 }
3532 rt6_exceptions_update_pmtu(rt, arg->mtu);
3533 spin_unlock_bh(&rt6_exception_lock);
2942 } 3534 }
2943 return 0; 3535 return 0;
2944} 3536}
@@ -3839,7 +4431,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3839 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 4431 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3840 net->ipv6.rt6_stats->fib_nodes, 4432 net->ipv6.rt6_stats->fib_nodes,
3841 net->ipv6.rt6_stats->fib_route_nodes, 4433 net->ipv6.rt6_stats->fib_route_nodes,
3842 net->ipv6.rt6_stats->fib_rt_alloc, 4434 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
3843 net->ipv6.rt6_stats->fib_rt_entries, 4435 net->ipv6.rt6_stats->fib_rt_entries,
3844 net->ipv6.rt6_stats->fib_rt_cache, 4436 net->ipv6.rt6_stats->fib_rt_cache,
3845 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 4437 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ac912bb21747..d60ddcb0bfe2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -91,29 +91,35 @@ struct sit_net {
91 * Must be invoked with rcu_read_lock 91 * Must be invoked with rcu_read_lock
92 */ 92 */
93static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, 93static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
94 struct net_device *dev, __be32 remote, __be32 local) 94 struct net_device *dev,
95 __be32 remote, __be32 local,
96 int sifindex)
95{ 97{
96 unsigned int h0 = HASH(remote); 98 unsigned int h0 = HASH(remote);
97 unsigned int h1 = HASH(local); 99 unsigned int h1 = HASH(local);
98 struct ip_tunnel *t; 100 struct ip_tunnel *t;
99 struct sit_net *sitn = net_generic(net, sit_net_id); 101 struct sit_net *sitn = net_generic(net, sit_net_id);
102 int ifindex = dev ? dev->ifindex : 0;
100 103
101 for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { 104 for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
102 if (local == t->parms.iph.saddr && 105 if (local == t->parms.iph.saddr &&
103 remote == t->parms.iph.daddr && 106 remote == t->parms.iph.daddr &&
104 (!dev || !t->parms.link || dev->ifindex == t->parms.link) && 107 (!dev || !t->parms.link || ifindex == t->parms.link ||
108 sifindex == t->parms.link) &&
105 (t->dev->flags & IFF_UP)) 109 (t->dev->flags & IFF_UP))
106 return t; 110 return t;
107 } 111 }
108 for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { 112 for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
109 if (remote == t->parms.iph.daddr && 113 if (remote == t->parms.iph.daddr &&
110 (!dev || !t->parms.link || dev->ifindex == t->parms.link) && 114 (!dev || !t->parms.link || ifindex == t->parms.link ||
115 sifindex == t->parms.link) &&
111 (t->dev->flags & IFF_UP)) 116 (t->dev->flags & IFF_UP))
112 return t; 117 return t;
113 } 118 }
114 for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { 119 for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
115 if (local == t->parms.iph.saddr && 120 if (local == t->parms.iph.saddr &&
116 (!dev || !t->parms.link || dev->ifindex == t->parms.link) && 121 (!dev || !t->parms.link || ifindex == t->parms.link ||
122 sifindex == t->parms.link) &&
117 (t->dev->flags & IFF_UP)) 123 (t->dev->flags & IFF_UP))
118 return t; 124 return t;
119 } 125 }
@@ -486,6 +492,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
486 const int code = icmp_hdr(skb)->code; 492 const int code = icmp_hdr(skb)->code;
487 unsigned int data_len = 0; 493 unsigned int data_len = 0;
488 struct ip_tunnel *t; 494 struct ip_tunnel *t;
495 int sifindex;
489 int err; 496 int err;
490 497
491 switch (type) { 498 switch (type) {
@@ -517,10 +524,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
517 524
518 err = -ENOENT; 525 err = -ENOENT;
519 526
520 t = ipip6_tunnel_lookup(dev_net(skb->dev), 527 sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
521 skb->dev, 528 t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
522 iph->daddr, 529 iph->daddr, iph->saddr, sifindex);
523 iph->saddr);
524 if (!t) 530 if (!t)
525 goto out; 531 goto out;
526 532
@@ -633,10 +639,12 @@ static int ipip6_rcv(struct sk_buff *skb)
633{ 639{
634 const struct iphdr *iph = ip_hdr(skb); 640 const struct iphdr *iph = ip_hdr(skb);
635 struct ip_tunnel *tunnel; 641 struct ip_tunnel *tunnel;
642 int sifindex;
636 int err; 643 int err;
637 644
645 sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
638 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 646 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
639 iph->saddr, iph->daddr); 647 iph->saddr, iph->daddr, sifindex);
640 if (tunnel) { 648 if (tunnel) {
641 struct pcpu_sw_netstats *tstats; 649 struct pcpu_sw_netstats *tstats;
642 650
@@ -704,10 +712,13 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
704{ 712{
705 const struct iphdr *iph; 713 const struct iphdr *iph;
706 struct ip_tunnel *tunnel; 714 struct ip_tunnel *tunnel;
715 int sifindex;
716
717 sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
707 718
708 iph = ip_hdr(skb); 719 iph = ip_hdr(skb);
709 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 720 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
710 iph->saddr, iph->daddr); 721 iph->saddr, iph->daddr, sifindex);
711 if (tunnel) { 722 if (tunnel) {
712 const struct tnl_ptk_info *tpi; 723 const struct tnl_ptk_info *tpi;
713 724
@@ -1848,19 +1859,22 @@ err_alloc_dev:
1848 return err; 1859 return err;
1849} 1860}
1850 1861
1851static void __net_exit sit_exit_net(struct net *net) 1862static void __net_exit sit_exit_batch_net(struct list_head *net_list)
1852{ 1863{
1853 LIST_HEAD(list); 1864 LIST_HEAD(list);
1865 struct net *net;
1854 1866
1855 rtnl_lock(); 1867 rtnl_lock();
1856 sit_destroy_tunnels(net, &list); 1868 list_for_each_entry(net, net_list, exit_list)
1869 sit_destroy_tunnels(net, &list);
1870
1857 unregister_netdevice_many(&list); 1871 unregister_netdevice_many(&list);
1858 rtnl_unlock(); 1872 rtnl_unlock();
1859} 1873}
1860 1874
1861static struct pernet_operations sit_net_ops = { 1875static struct pernet_operations sit_net_ops = {
1862 .init = sit_init_net, 1876 .init = sit_init_net,
1863 .exit = sit_exit_net, 1877 .exit_batch = sit_exit_batch_net,
1864 .id = &sit_net_id, 1878 .id = &sit_net_id,
1865 .size = sizeof(struct sit_net), 1879 .size = sizeof(struct sit_net),
1866}; 1880};
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 4e7817abc0b9..e7a3a6b6cf56 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
244 } 244 }
245 245
246 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); 246 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
247 tcp_select_initial_window(tcp_full_space(sk), req->mss, 247 tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
248 &req->rsk_rcv_wnd, &req->rsk_window_clamp, 248 &req->rsk_rcv_wnd, &req->rsk_window_clamp,
249 ireq->wscale_ok, &rcv_wscale, 249 ireq->wscale_ok, &rcv_wscale,
250 dst_metric(dst, RTAX_INITRWND)); 250 dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index f7051ba5b8af..a789a8ac6a64 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -98,6 +98,34 @@ static struct ctl_table ipv6_table_template[] = {
98 .mode = 0644, 98 .mode = 0644,
99 .proc_handler = proc_dointvec, 99 .proc_handler = proc_dointvec,
100 }, 100 },
101 {
102 .procname = "max_dst_opts_number",
103 .data = &init_net.ipv6.sysctl.max_dst_opts_cnt,
104 .maxlen = sizeof(int),
105 .mode = 0644,
106 .proc_handler = proc_dointvec
107 },
108 {
109 .procname = "max_hbh_opts_number",
110 .data = &init_net.ipv6.sysctl.max_hbh_opts_cnt,
111 .maxlen = sizeof(int),
112 .mode = 0644,
113 .proc_handler = proc_dointvec
114 },
115 {
116 .procname = "max_dst_opts_length",
117 .data = &init_net.ipv6.sysctl.max_dst_opts_len,
118 .maxlen = sizeof(int),
119 .mode = 0644,
120 .proc_handler = proc_dointvec
121 },
122 {
123 .procname = "max_hbh_length",
124 .data = &init_net.ipv6.sysctl.max_hbh_opts_len,
125 .maxlen = sizeof(int),
126 .mode = 0644,
127 .proc_handler = proc_dointvec
128 },
101 { } 129 { }
102}; 130};
103 131
@@ -158,6 +186,10 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
158 ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; 186 ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
159 ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind; 187 ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
160 ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect; 188 ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
189 ipv6_table[10].data = &net->ipv6.sysctl.max_dst_opts_cnt;
190 ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
191 ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
192 ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
161 193
162 ipv6_route_table = ipv6_route_sysctl_init(net); 194 ipv6_route_table = ipv6_route_sysctl_init(net);
163 if (!ipv6_route_table) 195 if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 64d94afa427f..6bb98c93edfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -69,6 +69,8 @@
69#include <crypto/hash.h> 69#include <crypto/hash.h>
70#include <linux/scatterlist.h> 70#include <linux/scatterlist.h>
71 71
72#include <trace/events/tcp.h>
73
72static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 74static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 75static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req); 76 struct request_sock *req);
@@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
890 int genhash; 892 int genhash;
891 struct sock *sk1 = NULL; 893 struct sock *sk1 = NULL;
892#endif 894#endif
893 int oif; 895 int oif = 0;
894 896
895 if (th->rst) 897 if (th->rst)
896 return; 898 return;
@@ -939,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
939 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 941 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
940 (th->doff << 2); 942 (th->doff << 2);
941 943
942 oif = sk ? sk->sk_bound_dev_if : 0; 944 if (sk) {
945 oif = sk->sk_bound_dev_if;
946 trace_tcp_send_reset(sk, skb);
947 }
948
943 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); 949 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
944 950
945#ifdef CONFIG_TCP_MD5SIG 951#ifdef CONFIG_TCP_MD5SIG
@@ -1577,8 +1583,9 @@ do_time_wait:
1577 refcounted = false; 1583 refcounted = false;
1578 goto process; 1584 goto process;
1579 } 1585 }
1580 /* Fall through to ACK */
1581 } 1586 }
1587 /* to ACK */
1588 /* fall through */
1582 case TCP_TW_ACK: 1589 case TCP_TW_ACK:
1583 tcp_v6_timewait_ack(sk, skb); 1590 tcp_v6_timewait_ack(sk, skb);
1584 break; 1591 break;
@@ -1933,8 +1940,8 @@ struct proto tcpv6_prot = {
1933 .memory_pressure = &tcp_memory_pressure, 1940 .memory_pressure = &tcp_memory_pressure,
1934 .orphan_count = &tcp_orphan_count, 1941 .orphan_count = &tcp_orphan_count,
1935 .sysctl_mem = sysctl_tcp_mem, 1942 .sysctl_mem = sysctl_tcp_mem,
1936 .sysctl_wmem = sysctl_tcp_wmem, 1943 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
1937 .sysctl_rmem = sysctl_tcp_rmem, 1944 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
1938 .max_header = MAX_TCP_HEADER, 1945 .max_header = MAX_TCP_HEADER,
1939 .obj_size = sizeof(struct tcp6_sock), 1946 .obj_size = sizeof(struct tcp6_sock),
1940 .slab_flags = SLAB_TYPESAFE_BY_RCU, 1947 .slab_flags = SLAB_TYPESAFE_BY_RCU,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 17e95a0386b3..885ade234a49 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -153,6 +153,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
153 switch (nexthdr) { 153 switch (nexthdr) {
154 case NEXTHDR_FRAGMENT: 154 case NEXTHDR_FRAGMENT:
155 onlyproto = 1; 155 onlyproto = 1;
156 /* fall through */
156 case NEXTHDR_ROUTING: 157 case NEXTHDR_ROUTING:
157 case NEXTHDR_HOP: 158 case NEXTHDR_HOP:
158 case NEXTHDR_DEST: 159 case NEXTHDR_DEST:
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 4e438bc7ee87..f85f0d7480ac 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -338,6 +338,14 @@ static int __net_init xfrm6_tunnel_net_init(struct net *net)
338 338
339static void __net_exit xfrm6_tunnel_net_exit(struct net *net) 339static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
340{ 340{
341 struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
342 unsigned int i;
343
344 for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
345 WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
346
347 for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
348 WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byspi[i]));
341} 349}
342 350
343static struct pernet_operations xfrm6_tunnel_net_ops = { 351static struct pernet_operations xfrm6_tunnel_net_ops = {
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index ac598ec90589..d21a9d128d3e 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1867,6 +1867,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1867 rc = -EPERM; 1867 rc = -EPERM;
1868 if (!capable(CAP_NET_ADMIN)) 1868 if (!capable(CAP_NET_ADMIN))
1869 break; 1869 break;
1870 /* fall through */
1870 case SIOCGIFADDR: 1871 case SIOCGIFADDR:
1871 rc = ipxitf_ioctl(cmd, argp); 1872 rc = ipxitf_ioctl(cmd, argp);
1872 break; 1873 break;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index af4e76ac88ff..0b750a22c4b9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1650,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info,
1650 } 1650 }
1651 1651
1652 newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); 1652 newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
1653 if (unlikely(IS_ERR(newfile))) { 1653 if (IS_ERR(newfile)) {
1654 err = PTR_ERR(newfile); 1654 err = PTR_ERR(newfile);
1655 goto out_sock_alloc_fail; 1655 goto out_sock_alloc_fail;
1656 } 1656 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a00d607e7224..3dffb892d52c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3845,7 +3845,7 @@ static void __net_exit pfkey_net_exit(struct net *net)
3845 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); 3845 struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
3846 3846
3847 pfkey_exit_proc(net); 3847 pfkey_exit_proc(net);
3848 BUG_ON(!hlist_empty(&net_pfkey->table)); 3848 WARN_ON(!hlist_empty(&net_pfkey->table));
3849} 3849}
3850 3850
3851static struct pernet_operations pfkey_net_ops = { 3851static struct pernet_operations pfkey_net_ops = {
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 02d61101b108..115918ad8eca 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -100,8 +100,6 @@ struct l2tp_skb_cb {
100 100
101#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)]) 101#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
102 102
103static atomic_t l2tp_tunnel_count;
104static atomic_t l2tp_session_count;
105static struct workqueue_struct *l2tp_wq; 103static struct workqueue_struct *l2tp_wq;
106 104
107/* per-net private data for this module */ 105/* per-net private data for this module */
@@ -216,12 +214,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
216} 214}
217EXPORT_SYMBOL_GPL(l2tp_tunnel_get); 215EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
218 216
219/* Lookup a session. A new reference is held on the returned session. 217/* Lookup a session. A new reference is held on the returned session. */
220 * Optionally calls session->ref() too if do_ref is true.
221 */
222struct l2tp_session *l2tp_session_get(const struct net *net, 218struct l2tp_session *l2tp_session_get(const struct net *net,
223 struct l2tp_tunnel *tunnel, 219 struct l2tp_tunnel *tunnel,
224 u32 session_id, bool do_ref) 220 u32 session_id)
225{ 221{
226 struct hlist_head *session_list; 222 struct hlist_head *session_list;
227 struct l2tp_session *session; 223 struct l2tp_session *session;
@@ -235,8 +231,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
235 hlist_for_each_entry_rcu(session, session_list, global_hlist) { 231 hlist_for_each_entry_rcu(session, session_list, global_hlist) {
236 if (session->session_id == session_id) { 232 if (session->session_id == session_id) {
237 l2tp_session_inc_refcount(session); 233 l2tp_session_inc_refcount(session);
238 if (do_ref && session->ref)
239 session->ref(session);
240 rcu_read_unlock_bh(); 234 rcu_read_unlock_bh();
241 235
242 return session; 236 return session;
@@ -252,8 +246,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
252 hlist_for_each_entry(session, session_list, hlist) { 246 hlist_for_each_entry(session, session_list, hlist) {
253 if (session->session_id == session_id) { 247 if (session->session_id == session_id) {
254 l2tp_session_inc_refcount(session); 248 l2tp_session_inc_refcount(session);
255 if (do_ref && session->ref)
256 session->ref(session);
257 read_unlock_bh(&tunnel->hlist_lock); 249 read_unlock_bh(&tunnel->hlist_lock);
258 250
259 return session; 251 return session;
@@ -265,8 +257,7 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
265} 257}
266EXPORT_SYMBOL_GPL(l2tp_session_get); 258EXPORT_SYMBOL_GPL(l2tp_session_get);
267 259
268struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, 260struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
269 bool do_ref)
270{ 261{
271 int hash; 262 int hash;
272 struct l2tp_session *session; 263 struct l2tp_session *session;
@@ -277,8 +268,6 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
277 hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { 268 hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
278 if (++count > nth) { 269 if (++count > nth) {
279 l2tp_session_inc_refcount(session); 270 l2tp_session_inc_refcount(session);
280 if (do_ref && session->ref)
281 session->ref(session);
282 read_unlock_bh(&tunnel->hlist_lock); 271 read_unlock_bh(&tunnel->hlist_lock);
283 return session; 272 return session;
284 } 273 }
@@ -295,8 +284,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
295 * This is very inefficient but is only used by management interfaces. 284 * This is very inefficient but is only used by management interfaces.
296 */ 285 */
297struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, 286struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
298 const char *ifname, 287 const char *ifname)
299 bool do_ref)
300{ 288{
301 struct l2tp_net *pn = l2tp_pernet(net); 289 struct l2tp_net *pn = l2tp_pernet(net);
302 int hash; 290 int hash;
@@ -307,8 +295,6 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
307 hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { 295 hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
308 if (!strcmp(session->ifname, ifname)) { 296 if (!strcmp(session->ifname, ifname)) {
309 l2tp_session_inc_refcount(session); 297 l2tp_session_inc_refcount(session);
310 if (do_ref && session->ref)
311 session->ref(session);
312 rcu_read_unlock_bh(); 298 rcu_read_unlock_bh();
313 299
314 return session; 300 return session;
@@ -322,8 +308,8 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
322} 308}
323EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); 309EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
324 310
325static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, 311int l2tp_session_register(struct l2tp_session *session,
326 struct l2tp_session *session) 312 struct l2tp_tunnel *tunnel)
327{ 313{
328 struct l2tp_session *session_walk; 314 struct l2tp_session *session_walk;
329 struct hlist_head *g_head; 315 struct hlist_head *g_head;
@@ -380,6 +366,7 @@ err_tlock:
380 366
381 return err; 367 return err;
382} 368}
369EXPORT_SYMBOL_GPL(l2tp_session_register);
383 370
384/* Lookup a tunnel by id 371/* Lookup a tunnel by id
385 */ 372 */
@@ -484,9 +471,6 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
484 (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length); 471 (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
485 else 472 else
486 kfree_skb(skb); 473 kfree_skb(skb);
487
488 if (session->deref)
489 (*session->deref)(session);
490} 474}
491 475
492/* Dequeue skbs from the session's reorder_q, subject to packet order. 476/* Dequeue skbs from the session's reorder_q, subject to packet order.
@@ -515,8 +499,6 @@ start:
515 session->reorder_skip = 1; 499 session->reorder_skip = 1;
516 __skb_unlink(skb, &session->reorder_q); 500 __skb_unlink(skb, &session->reorder_q);
517 kfree_skb(skb); 501 kfree_skb(skb);
518 if (session->deref)
519 (*session->deref)(session);
520 continue; 502 continue;
521 } 503 }
522 504
@@ -689,9 +671,6 @@ discard:
689 * a data (not control) frame before coming here. Fields up to the 671 * a data (not control) frame before coming here. Fields up to the
690 * session-id have already been parsed and ptr points to the data 672 * session-id have already been parsed and ptr points to the data
691 * after the session-id. 673 * after the session-id.
692 *
693 * session->ref() must have been called prior to l2tp_recv_common().
694 * session->deref() will be called automatically after skb is processed.
695 */ 674 */
696void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, 675void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
697 unsigned char *ptr, unsigned char *optr, u16 hdrflags, 676 unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -858,9 +837,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
858discard: 837discard:
859 atomic_long_inc(&session->stats.rx_errors); 838 atomic_long_inc(&session->stats.rx_errors);
860 kfree_skb(skb); 839 kfree_skb(skb);
861
862 if (session->deref)
863 (*session->deref)(session);
864} 840}
865EXPORT_SYMBOL(l2tp_recv_common); 841EXPORT_SYMBOL(l2tp_recv_common);
866 842
@@ -874,8 +850,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session)
874 while ((skb = skb_dequeue(&session->reorder_q))) { 850 while ((skb = skb_dequeue(&session->reorder_q))) {
875 atomic_long_inc(&session->stats.rx_errors); 851 atomic_long_inc(&session->stats.rx_errors);
876 kfree_skb(skb); 852 kfree_skb(skb);
877 if (session->deref)
878 (*session->deref)(session);
879 } 853 }
880 return 0; 854 return 0;
881} 855}
@@ -967,13 +941,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
967 } 941 }
968 942
969 /* Find the session context */ 943 /* Find the session context */
970 session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); 944 session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id);
971 if (!session || !session->recv_skb) { 945 if (!session || !session->recv_skb) {
972 if (session) { 946 if (session)
973 if (session->deref)
974 session->deref(session);
975 l2tp_session_dec_refcount(session); 947 l2tp_session_dec_refcount(session);
976 }
977 948
978 /* Not found? Pass to userspace to deal with */ 949 /* Not found? Pass to userspace to deal with */
979 l2tp_info(tunnel, L2TP_MSG_DATA, 950 l2tp_info(tunnel, L2TP_MSG_DATA,
@@ -1274,9 +1245,6 @@ static void l2tp_tunnel_destruct(struct sock *sk)
1274 spin_lock_bh(&pn->l2tp_tunnel_list_lock); 1245 spin_lock_bh(&pn->l2tp_tunnel_list_lock);
1275 list_del_rcu(&tunnel->list); 1246 list_del_rcu(&tunnel->list);
1276 spin_unlock_bh(&pn->l2tp_tunnel_list_lock); 1247 spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
1277 atomic_dec(&l2tp_tunnel_count);
1278
1279 l2tp_tunnel_closeall(tunnel);
1280 1248
1281 tunnel->sock = NULL; 1249 tunnel->sock = NULL;
1282 l2tp_tunnel_dec_refcount(tunnel); 1250 l2tp_tunnel_dec_refcount(tunnel);
@@ -1317,9 +1285,6 @@ again:
1317 if (test_and_set_bit(0, &session->dead)) 1285 if (test_and_set_bit(0, &session->dead))
1318 goto again; 1286 goto again;
1319 1287
1320 if (session->ref != NULL)
1321 (*session->ref)(session);
1322
1323 write_unlock_bh(&tunnel->hlist_lock); 1288 write_unlock_bh(&tunnel->hlist_lock);
1324 1289
1325 __l2tp_session_unhash(session); 1290 __l2tp_session_unhash(session);
@@ -1328,9 +1293,6 @@ again:
1328 if (session->session_close != NULL) 1293 if (session->session_close != NULL)
1329 (*session->session_close)(session); 1294 (*session->session_close)(session);
1330 1295
1331 if (session->deref != NULL)
1332 (*session->deref)(session);
1333
1334 l2tp_session_dec_refcount(session); 1296 l2tp_session_dec_refcount(session);
1335 1297
1336 write_lock_bh(&tunnel->hlist_lock); 1298 write_lock_bh(&tunnel->hlist_lock);
@@ -1661,7 +1623,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
1661 1623
1662 /* Add tunnel to our list */ 1624 /* Add tunnel to our list */
1663 INIT_LIST_HEAD(&tunnel->list); 1625 INIT_LIST_HEAD(&tunnel->list);
1664 atomic_inc(&l2tp_tunnel_count);
1665 1626
1666 /* Bump the reference count. The tunnel context is deleted 1627 /* Bump the reference count. The tunnel context is deleted
1667 * only when this drops to zero. Must be done before list insertion 1628 * only when this drops to zero. Must be done before list insertion
@@ -1707,8 +1668,6 @@ void l2tp_session_free(struct l2tp_session *session)
1707 1668
1708 if (tunnel) { 1669 if (tunnel) {
1709 BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); 1670 BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
1710 if (session->session_id != 0)
1711 atomic_dec(&l2tp_session_count);
1712 sock_put(tunnel->sock); 1671 sock_put(tunnel->sock);
1713 session->tunnel = NULL; 1672 session->tunnel = NULL;
1714 l2tp_tunnel_dec_refcount(tunnel); 1673 l2tp_tunnel_dec_refcount(tunnel);
@@ -1754,15 +1713,13 @@ int l2tp_session_delete(struct l2tp_session *session)
1754 if (test_and_set_bit(0, &session->dead)) 1713 if (test_and_set_bit(0, &session->dead))
1755 return 0; 1714 return 0;
1756 1715
1757 if (session->ref)
1758 (*session->ref)(session);
1759 __l2tp_session_unhash(session); 1716 __l2tp_session_unhash(session);
1760 l2tp_session_queue_purge(session); 1717 l2tp_session_queue_purge(session);
1761 if (session->session_close != NULL) 1718 if (session->session_close != NULL)
1762 (*session->session_close)(session); 1719 (*session->session_close)(session);
1763 if (session->deref) 1720
1764 (*session->deref)(session);
1765 l2tp_session_dec_refcount(session); 1721 l2tp_session_dec_refcount(session);
1722
1766 return 0; 1723 return 0;
1767} 1724}
1768EXPORT_SYMBOL_GPL(l2tp_session_delete); 1725EXPORT_SYMBOL_GPL(l2tp_session_delete);
@@ -1788,7 +1745,6 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
1788struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) 1745struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
1789{ 1746{
1790 struct l2tp_session *session; 1747 struct l2tp_session *session;
1791 int err;
1792 1748
1793 session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); 1749 session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
1794 if (session != NULL) { 1750 if (session != NULL) {
@@ -1846,17 +1802,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
1846 1802
1847 refcount_set(&session->ref_count, 1); 1803 refcount_set(&session->ref_count, 1);
1848 1804
1849 err = l2tp_session_add_to_tunnel(tunnel, session);
1850 if (err) {
1851 kfree(session);
1852
1853 return ERR_PTR(err);
1854 }
1855
1856 /* Ignore management session in session count value */
1857 if (session->session_id != 0)
1858 atomic_inc(&l2tp_session_count);
1859
1860 return session; 1805 return session;
1861 } 1806 }
1862 1807
@@ -1888,15 +1833,19 @@ static __net_exit void l2tp_exit_net(struct net *net)
1888{ 1833{
1889 struct l2tp_net *pn = l2tp_pernet(net); 1834 struct l2tp_net *pn = l2tp_pernet(net);
1890 struct l2tp_tunnel *tunnel = NULL; 1835 struct l2tp_tunnel *tunnel = NULL;
1836 int hash;
1891 1837
1892 rcu_read_lock_bh(); 1838 rcu_read_lock_bh();
1893 list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { 1839 list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
1894 (void)l2tp_tunnel_delete(tunnel); 1840 l2tp_tunnel_delete(tunnel);
1895 } 1841 }
1896 rcu_read_unlock_bh(); 1842 rcu_read_unlock_bh();
1897 1843
1898 flush_workqueue(l2tp_wq); 1844 flush_workqueue(l2tp_wq);
1899 rcu_barrier(); 1845 rcu_barrier();
1846
1847 for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
1848 WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
1900} 1849}
1901 1850
1902static struct pernet_operations l2tp_net_ops = { 1851static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 67c79d9b5c6c..9534e16965cc 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -129,8 +129,6 @@ struct l2tp_session {
129 int (*build_header)(struct l2tp_session *session, void *buf); 129 int (*build_header)(struct l2tp_session *session, void *buf);
130 void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); 130 void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
131 void (*session_close)(struct l2tp_session *session); 131 void (*session_close)(struct l2tp_session *session);
132 void (*ref)(struct l2tp_session *session);
133 void (*deref)(struct l2tp_session *session);
134#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) 132#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
135 void (*show)(struct seq_file *m, void *priv); 133 void (*show)(struct seq_file *m, void *priv);
136#endif 134#endif
@@ -245,12 +243,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
245 243
246struct l2tp_session *l2tp_session_get(const struct net *net, 244struct l2tp_session *l2tp_session_get(const struct net *net,
247 struct l2tp_tunnel *tunnel, 245 struct l2tp_tunnel *tunnel,
248 u32 session_id, bool do_ref); 246 u32 session_id);
249struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, 247struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
250 bool do_ref);
251struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, 248struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
252 const char *ifname, 249 const char *ifname);
253 bool do_ref);
254struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id); 250struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
255struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth); 251struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
256 252
@@ -263,6 +259,9 @@ struct l2tp_session *l2tp_session_create(int priv_size,
263 struct l2tp_tunnel *tunnel, 259 struct l2tp_tunnel *tunnel,
264 u32 session_id, u32 peer_session_id, 260 u32 session_id, u32 peer_session_id,
265 struct l2tp_session_cfg *cfg); 261 struct l2tp_session_cfg *cfg);
262int l2tp_session_register(struct l2tp_session *session,
263 struct l2tp_tunnel *tunnel);
264
266void __l2tp_session_unhash(struct l2tp_session *session); 265void __l2tp_session_unhash(struct l2tp_session *session);
267int l2tp_session_delete(struct l2tp_session *session); 266int l2tp_session_delete(struct l2tp_session *session);
268void l2tp_session_free(struct l2tp_session *session); 267void l2tp_session_free(struct l2tp_session *session);
@@ -295,37 +294,17 @@ static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
295/* Session reference counts. Incremented when code obtains a reference 294/* Session reference counts. Incremented when code obtains a reference
296 * to a session. 295 * to a session.
297 */ 296 */
298static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session) 297static inline void l2tp_session_inc_refcount(struct l2tp_session *session)
299{ 298{
300 refcount_inc(&session->ref_count); 299 refcount_inc(&session->ref_count);
301} 300}
302 301
303static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session) 302static inline void l2tp_session_dec_refcount(struct l2tp_session *session)
304{ 303{
305 if (refcount_dec_and_test(&session->ref_count)) 304 if (refcount_dec_and_test(&session->ref_count))
306 l2tp_session_free(session); 305 l2tp_session_free(session);
307} 306}
308 307
309#ifdef L2TP_REFCNT_DEBUG
310#define l2tp_session_inc_refcount(_s) \
311do { \
312 pr_debug("l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", \
313 __func__, __LINE__, (_s)->name, \
314 refcount_read(&_s->ref_count)); \
315 l2tp_session_inc_refcount_1(_s); \
316} while (0)
317#define l2tp_session_dec_refcount(_s) \
318do { \
319 pr_debug("l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", \
320 __func__, __LINE__, (_s)->name, \
321 refcount_read(&_s->ref_count)); \
322 l2tp_session_dec_refcount_1(_s); \
323} while (0)
324#else
325#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
326#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
327#endif
328
329#define l2tp_printk(ptr, type, func, fmt, ...) \ 308#define l2tp_printk(ptr, type, func, fmt, ...) \
330do { \ 309do { \
331 if (((ptr)->debug) & (type)) \ 310 if (((ptr)->debug) & (type)) \
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 53bae54c4d6e..eb69411bcb47 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
53 53
54static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) 54static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
55{ 55{
56 pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); 56 pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
57 pd->session_idx++; 57 pd->session_idx++;
58 58
59 if (pd->session == NULL) { 59 if (pd->session == NULL) {
@@ -241,8 +241,6 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
241 l2tp_dfs_seq_tunnel_show(m, pd->tunnel); 241 l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
242 } else { 242 } else {
243 l2tp_dfs_seq_session_show(m, pd->session); 243 l2tp_dfs_seq_session_show(m, pd->session);
244 if (pd->session->deref)
245 pd->session->deref(pd->session);
246 l2tp_session_dec_refcount(pd->session); 244 l2tp_session_dec_refcount(pd->session);
247 } 245 }
248 246
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 014a7bc2a872..5c366ecfa1cb 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -41,8 +41,6 @@
41 41
42/* via netdev_priv() */ 42/* via netdev_priv() */
43struct l2tp_eth { 43struct l2tp_eth {
44 struct net_device *dev;
45 struct sock *tunnel_sock;
46 struct l2tp_session *session; 44 struct l2tp_session *session;
47 atomic_long_t tx_bytes; 45 atomic_long_t tx_bytes;
48 atomic_long_t tx_packets; 46 atomic_long_t tx_packets;
@@ -54,15 +52,12 @@ struct l2tp_eth {
54 52
55/* via l2tp_session_priv() */ 53/* via l2tp_session_priv() */
56struct l2tp_eth_sess { 54struct l2tp_eth_sess {
57 struct net_device *dev; 55 struct net_device __rcu *dev;
58}; 56};
59 57
60 58
61static int l2tp_eth_dev_init(struct net_device *dev) 59static int l2tp_eth_dev_init(struct net_device *dev)
62{ 60{
63 struct l2tp_eth *priv = netdev_priv(dev);
64
65 priv->dev = dev;
66 eth_hw_addr_random(dev); 61 eth_hw_addr_random(dev);
67 eth_broadcast_addr(dev->broadcast); 62 eth_broadcast_addr(dev->broadcast);
68 netdev_lockdep_set_classes(dev); 63 netdev_lockdep_set_classes(dev);
@@ -72,7 +67,14 @@ static int l2tp_eth_dev_init(struct net_device *dev)
72 67
73static void l2tp_eth_dev_uninit(struct net_device *dev) 68static void l2tp_eth_dev_uninit(struct net_device *dev)
74{ 69{
75 dev_put(dev); 70 struct l2tp_eth *priv = netdev_priv(dev);
71 struct l2tp_eth_sess *spriv;
72
73 spriv = l2tp_session_priv(priv->session);
74 RCU_INIT_POINTER(spriv->dev, NULL);
75 /* No need for synchronize_net() here. We're called by
76 * unregister_netdev*(), which does the synchronisation for us.
77 */
76} 78}
77 79
78static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) 80static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -130,8 +132,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
130static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) 132static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
131{ 133{
132 struct l2tp_eth_sess *spriv = l2tp_session_priv(session); 134 struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
133 struct net_device *dev = spriv->dev; 135 struct net_device *dev;
134 struct l2tp_eth *priv = netdev_priv(dev); 136 struct l2tp_eth *priv;
135 137
136 if (session->debug & L2TP_MSG_DATA) { 138 if (session->debug & L2TP_MSG_DATA) {
137 unsigned int length; 139 unsigned int length;
@@ -155,16 +157,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
155 skb_dst_drop(skb); 157 skb_dst_drop(skb);
156 nf_reset(skb); 158 nf_reset(skb);
157 159
160 rcu_read_lock();
161 dev = rcu_dereference(spriv->dev);
162 if (!dev)
163 goto error_rcu;
164
165 priv = netdev_priv(dev);
158 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { 166 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
159 atomic_long_inc(&priv->rx_packets); 167 atomic_long_inc(&priv->rx_packets);
160 atomic_long_add(data_len, &priv->rx_bytes); 168 atomic_long_add(data_len, &priv->rx_bytes);
161 } else { 169 } else {
162 atomic_long_inc(&priv->rx_errors); 170 atomic_long_inc(&priv->rx_errors);
163 } 171 }
172 rcu_read_unlock();
173
164 return; 174 return;
165 175
176error_rcu:
177 rcu_read_unlock();
166error: 178error:
167 atomic_long_inc(&priv->rx_errors);
168 kfree_skb(skb); 179 kfree_skb(skb);
169} 180}
170 181
@@ -175,11 +186,15 @@ static void l2tp_eth_delete(struct l2tp_session *session)
175 186
176 if (session) { 187 if (session) {
177 spriv = l2tp_session_priv(session); 188 spriv = l2tp_session_priv(session);
178 dev = spriv->dev; 189
190 rtnl_lock();
191 dev = rtnl_dereference(spriv->dev);
179 if (dev) { 192 if (dev) {
180 unregister_netdev(dev); 193 unregister_netdevice(dev);
181 spriv->dev = NULL; 194 rtnl_unlock();
182 module_put(THIS_MODULE); 195 module_put(THIS_MODULE);
196 } else {
197 rtnl_unlock();
183 } 198 }
184 } 199 }
185} 200}
@@ -189,9 +204,20 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
189{ 204{
190 struct l2tp_session *session = arg; 205 struct l2tp_session *session = arg;
191 struct l2tp_eth_sess *spriv = l2tp_session_priv(session); 206 struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
192 struct net_device *dev = spriv->dev; 207 struct net_device *dev;
208
209 rcu_read_lock();
210 dev = rcu_dereference(spriv->dev);
211 if (!dev) {
212 rcu_read_unlock();
213 return;
214 }
215 dev_hold(dev);
216 rcu_read_unlock();
193 217
194 seq_printf(m, " interface %s\n", dev->name); 218 seq_printf(m, " interface %s\n", dev->name);
219
220 dev_put(dev);
195} 221}
196#endif 222#endif
197 223
@@ -268,14 +294,14 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
268 peer_session_id, cfg); 294 peer_session_id, cfg);
269 if (IS_ERR(session)) { 295 if (IS_ERR(session)) {
270 rc = PTR_ERR(session); 296 rc = PTR_ERR(session);
271 goto out; 297 goto err;
272 } 298 }
273 299
274 dev = alloc_netdev(sizeof(*priv), name, name_assign_type, 300 dev = alloc_netdev(sizeof(*priv), name, name_assign_type,
275 l2tp_eth_dev_setup); 301 l2tp_eth_dev_setup);
276 if (!dev) { 302 if (!dev) {
277 rc = -ENOMEM; 303 rc = -ENOMEM;
278 goto out_del_session; 304 goto err_sess;
279 } 305 }
280 306
281 dev_net_set(dev, net); 307 dev_net_set(dev, net);
@@ -284,10 +310,8 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
284 l2tp_eth_adjust_mtu(tunnel, session, dev); 310 l2tp_eth_adjust_mtu(tunnel, session, dev);
285 311
286 priv = netdev_priv(dev); 312 priv = netdev_priv(dev);
287 priv->dev = dev;
288 priv->session = session; 313 priv->session = session;
289 314
290 priv->tunnel_sock = tunnel->sock;
291 session->recv_skb = l2tp_eth_dev_recv; 315 session->recv_skb = l2tp_eth_dev_recv;
292 session->session_close = l2tp_eth_delete; 316 session->session_close = l2tp_eth_delete;
293#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) 317#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
@@ -295,26 +319,48 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
295#endif 319#endif
296 320
297 spriv = l2tp_session_priv(session); 321 spriv = l2tp_session_priv(session);
298 spriv->dev = dev;
299 322
300 rc = register_netdev(dev); 323 l2tp_session_inc_refcount(session);
301 if (rc < 0) 324
302 goto out_del_dev; 325 rtnl_lock();
326
327 /* Register both device and session while holding the rtnl lock. This
328 * ensures that l2tp_eth_delete() will see that there's a device to
329 * unregister, even if it happened to run before we assign spriv->dev.
330 */
331 rc = l2tp_session_register(session, tunnel);
332 if (rc < 0) {
333 rtnl_unlock();
334 goto err_sess_dev;
335 }
336
337 rc = register_netdevice(dev);
338 if (rc < 0) {
339 rtnl_unlock();
340 l2tp_session_delete(session);
341 l2tp_session_dec_refcount(session);
342 free_netdev(dev);
343
344 return rc;
345 }
303 346
304 __module_get(THIS_MODULE);
305 /* Must be done after register_netdev() */
306 strlcpy(session->ifname, dev->name, IFNAMSIZ); 347 strlcpy(session->ifname, dev->name, IFNAMSIZ);
348 rcu_assign_pointer(spriv->dev, dev);
307 349
308 dev_hold(dev); 350 rtnl_unlock();
351
352 l2tp_session_dec_refcount(session);
353
354 __module_get(THIS_MODULE);
309 355
310 return 0; 356 return 0;
311 357
312out_del_dev: 358err_sess_dev:
359 l2tp_session_dec_refcount(session);
313 free_netdev(dev); 360 free_netdev(dev);
314 spriv->dev = NULL; 361err_sess:
315out_del_session: 362 kfree(session);
316 l2tp_session_delete(session); 363err:
317out:
318 return rc; 364 return rc;
319} 365}
320 366
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index e4280b6568b4..ff61124fdf59 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -144,7 +144,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
144 } 144 }
145 145
146 /* Ok, this is a data packet. Lookup the session. */ 146 /* Ok, this is a data packet. Lookup the session. */
147 session = l2tp_session_get(net, NULL, session_id, true); 147 session = l2tp_session_get(net, NULL, session_id);
148 if (!session) 148 if (!session)
149 goto discard; 149 goto discard;
150 150
@@ -199,8 +199,6 @@ pass_up:
199 return sk_receive_skb(sk, skb, 1); 199 return sk_receive_skb(sk, skb, 1);
200 200
201discard_sess: 201discard_sess:
202 if (session->deref)
203 session->deref(session);
204 l2tp_session_dec_refcount(session); 202 l2tp_session_dec_refcount(session);
205 goto discard; 203 goto discard;
206 204
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 8bcaa975b432..192344688c06 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -157,7 +157,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
157 } 157 }
158 158
159 /* Ok, this is a data packet. Lookup the session. */ 159 /* Ok, this is a data packet. Lookup the session. */
160 session = l2tp_session_get(net, NULL, session_id, true); 160 session = l2tp_session_get(net, NULL, session_id);
161 if (!session) 161 if (!session)
162 goto discard; 162 goto discard;
163 163
@@ -213,8 +213,6 @@ pass_up:
213 return sk_receive_skb(sk, skb, 1); 213 return sk_receive_skb(sk, skb, 1);
214 214
215discard_sess: 215discard_sess:
216 if (session->deref)
217 session->deref(session);
218 l2tp_session_dec_refcount(session); 216 l2tp_session_dec_refcount(session);
219 goto discard; 217 goto discard;
220 218
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 7135f4645d3a..a1f24fb2be98 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,8 +48,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
48/* Accessed under genl lock */ 48/* Accessed under genl lock */
49static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; 49static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
50 50
51static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, 51static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
52 bool do_ref)
53{ 52{
54 u32 tunnel_id; 53 u32 tunnel_id;
55 u32 session_id; 54 u32 session_id;
@@ -60,15 +59,14 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
60 59
61 if (info->attrs[L2TP_ATTR_IFNAME]) { 60 if (info->attrs[L2TP_ATTR_IFNAME]) {
62 ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); 61 ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
63 session = l2tp_session_get_by_ifname(net, ifname, do_ref); 62 session = l2tp_session_get_by_ifname(net, ifname);
64 } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && 63 } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
65 (info->attrs[L2TP_ATTR_CONN_ID])) { 64 (info->attrs[L2TP_ATTR_CONN_ID])) {
66 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); 65 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
67 session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); 66 session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
68 tunnel = l2tp_tunnel_get(net, tunnel_id); 67 tunnel = l2tp_tunnel_get(net, tunnel_id);
69 if (tunnel) { 68 if (tunnel) {
70 session = l2tp_session_get(net, tunnel, session_id, 69 session = l2tp_session_get(net, tunnel, session_id);
71 do_ref);
72 l2tp_tunnel_dec_refcount(tunnel); 70 l2tp_tunnel_dec_refcount(tunnel);
73 } 71 }
74 } 72 }
@@ -282,7 +280,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
282 l2tp_tunnel_notify(&l2tp_nl_family, info, 280 l2tp_tunnel_notify(&l2tp_nl_family, info,
283 tunnel, L2TP_CMD_TUNNEL_DELETE); 281 tunnel, L2TP_CMD_TUNNEL_DELETE);
284 282
285 (void) l2tp_tunnel_delete(tunnel); 283 l2tp_tunnel_delete(tunnel);
286 284
287 l2tp_tunnel_dec_refcount(tunnel); 285 l2tp_tunnel_dec_refcount(tunnel);
288 286
@@ -406,7 +404,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
406 if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || 404 if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
407 nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) 405 nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
408 goto nla_put_failure; 406 goto nla_put_failure;
409 /* NOBREAK */ 407 /* fall through */
410 case L2TP_ENCAPTYPE_IP: 408 case L2TP_ENCAPTYPE_IP:
411#if IS_ENABLED(CONFIG_IPV6) 409#if IS_ENABLED(CONFIG_IPV6)
412 if (np) { 410 if (np) {
@@ -649,7 +647,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
649 &cfg); 647 &cfg);
650 648
651 if (ret >= 0) { 649 if (ret >= 0) {
652 session = l2tp_session_get(net, tunnel, session_id, false); 650 session = l2tp_session_get(net, tunnel, session_id);
653 if (session) { 651 if (session) {
654 ret = l2tp_session_notify(&l2tp_nl_family, info, session, 652 ret = l2tp_session_notify(&l2tp_nl_family, info, session,
655 L2TP_CMD_SESSION_CREATE); 653 L2TP_CMD_SESSION_CREATE);
@@ -669,7 +667,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
669 struct l2tp_session *session; 667 struct l2tp_session *session;
670 u16 pw_type; 668 u16 pw_type;
671 669
672 session = l2tp_nl_session_get(info, true); 670 session = l2tp_nl_session_get(info);
673 if (session == NULL) { 671 if (session == NULL) {
674 ret = -ENODEV; 672 ret = -ENODEV;
675 goto out; 673 goto out;
@@ -683,8 +681,6 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
683 if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) 681 if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
684 ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); 682 ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
685 683
686 if (session->deref)
687 session->deref(session);
688 l2tp_session_dec_refcount(session); 684 l2tp_session_dec_refcount(session);
689 685
690out: 686out:
@@ -696,7 +692,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
696 int ret = 0; 692 int ret = 0;
697 struct l2tp_session *session; 693 struct l2tp_session *session;
698 694
699 session = l2tp_nl_session_get(info, false); 695 session = l2tp_nl_session_get(info);
700 if (session == NULL) { 696 if (session == NULL) {
701 ret = -ENODEV; 697 ret = -ENODEV;
702 goto out; 698 goto out;
@@ -828,7 +824,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
828 struct sk_buff *msg; 824 struct sk_buff *msg;
829 int ret; 825 int ret;
830 826
831 session = l2tp_nl_session_get(info, false); 827 session = l2tp_nl_session_get(info);
832 if (session == NULL) { 828 if (session == NULL) {
833 ret = -ENODEV; 829 ret = -ENODEV;
834 goto err; 830 goto err;
@@ -874,7 +870,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
874 goto out; 870 goto out;
875 } 871 }
876 872
877 session = l2tp_session_get_nth(tunnel, si, false); 873 session = l2tp_session_get_nth(tunnel, si);
878 if (session == NULL) { 874 if (session == NULL) {
879 ti++; 875 ti++;
880 tunnel = NULL; 876 tunnel = NULL;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 0c2738349442..b412fc3351dc 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -122,10 +122,11 @@
122struct pppol2tp_session { 122struct pppol2tp_session {
123 int owner; /* pid that opened the socket */ 123 int owner; /* pid that opened the socket */
124 124
125 struct sock *sock; /* Pointer to the session 125 struct mutex sk_lock; /* Protects .sk */
126 struct sock __rcu *sk; /* Pointer to the session
126 * PPPoX socket */ 127 * PPPoX socket */
127 struct sock *tunnel_sock; /* Pointer to the tunnel UDP 128 struct sock *__sk; /* Copy of .sk, for cleanup */
128 * socket */ 129 struct rcu_head rcu; /* For asynchronous release */
129 int flags; /* accessed by PPPIOCGFLAGS. 130 int flags; /* accessed by PPPIOCGFLAGS.
130 * Unused. */ 131 * Unused. */
131}; 132};
@@ -138,6 +139,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = {
138 139
139static const struct proto_ops pppol2tp_ops; 140static const struct proto_ops pppol2tp_ops;
140 141
142/* Retrieves the pppol2tp socket associated to a session.
143 * A reference is held on the returned socket, so this function must be paired
144 * with sock_put().
145 */
146static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
147{
148 struct pppol2tp_session *ps = l2tp_session_priv(session);
149 struct sock *sk;
150
151 rcu_read_lock();
152 sk = rcu_dereference(ps->sk);
153 if (sk)
154 sock_hold(sk);
155 rcu_read_unlock();
156
157 return sk;
158}
159
141/* Helpers to obtain tunnel/session contexts from sockets. 160/* Helpers to obtain tunnel/session contexts from sockets.
142 */ 161 */
143static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk) 162static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
@@ -224,7 +243,8 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
224 /* If the socket is bound, send it in to PPP's input queue. Otherwise 243 /* If the socket is bound, send it in to PPP's input queue. Otherwise
225 * queue it on the session socket. 244 * queue it on the session socket.
226 */ 245 */
227 sk = ps->sock; 246 rcu_read_lock();
247 sk = rcu_dereference(ps->sk);
228 if (sk == NULL) 248 if (sk == NULL)
229 goto no_sock; 249 goto no_sock;
230 250
@@ -247,30 +267,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
247 kfree_skb(skb); 267 kfree_skb(skb);
248 } 268 }
249 } 269 }
270 rcu_read_unlock();
250 271
251 return; 272 return;
252 273
253no_sock: 274no_sock:
275 rcu_read_unlock();
254 l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name); 276 l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
255 kfree_skb(skb); 277 kfree_skb(skb);
256} 278}
257 279
258static void pppol2tp_session_sock_hold(struct l2tp_session *session)
259{
260 struct pppol2tp_session *ps = l2tp_session_priv(session);
261
262 if (ps->sock)
263 sock_hold(ps->sock);
264}
265
266static void pppol2tp_session_sock_put(struct l2tp_session *session)
267{
268 struct pppol2tp_session *ps = l2tp_session_priv(session);
269
270 if (ps->sock)
271 sock_put(ps->sock);
272}
273
274/************************************************************************ 280/************************************************************************
275 * Transmit handling 281 * Transmit handling
276 ***********************************************************************/ 282 ***********************************************************************/
@@ -287,7 +293,6 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
287 int error; 293 int error;
288 struct l2tp_session *session; 294 struct l2tp_session *session;
289 struct l2tp_tunnel *tunnel; 295 struct l2tp_tunnel *tunnel;
290 struct pppol2tp_session *ps;
291 int uhlen; 296 int uhlen;
292 297
293 error = -ENOTCONN; 298 error = -ENOTCONN;
@@ -300,10 +305,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
300 if (session == NULL) 305 if (session == NULL)
301 goto error; 306 goto error;
302 307
303 ps = l2tp_session_priv(session); 308 tunnel = session->tunnel;
304 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
305 if (tunnel == NULL)
306 goto error_put_sess;
307 309
308 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; 310 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
309 311
@@ -314,7 +316,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
314 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ 316 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
315 0, GFP_KERNEL); 317 0, GFP_KERNEL);
316 if (!skb) 318 if (!skb)
317 goto error_put_sess_tun; 319 goto error_put_sess;
318 320
319 /* Reserve space for headers. */ 321 /* Reserve space for headers. */
320 skb_reserve(skb, NET_SKB_PAD); 322 skb_reserve(skb, NET_SKB_PAD);
@@ -332,20 +334,17 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
332 error = memcpy_from_msg(skb_put(skb, total_len), m, total_len); 334 error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
333 if (error < 0) { 335 if (error < 0) {
334 kfree_skb(skb); 336 kfree_skb(skb);
335 goto error_put_sess_tun; 337 goto error_put_sess;
336 } 338 }
337 339
338 local_bh_disable(); 340 local_bh_disable();
339 l2tp_xmit_skb(session, skb, session->hdr_len); 341 l2tp_xmit_skb(session, skb, session->hdr_len);
340 local_bh_enable(); 342 local_bh_enable();
341 343
342 sock_put(ps->tunnel_sock);
343 sock_put(sk); 344 sock_put(sk);
344 345
345 return total_len; 346 return total_len;
346 347
347error_put_sess_tun:
348 sock_put(ps->tunnel_sock);
349error_put_sess: 348error_put_sess:
350 sock_put(sk); 349 sock_put(sk);
351error: 350error:
@@ -369,10 +368,8 @@ error:
369static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) 368static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
370{ 369{
371 struct sock *sk = (struct sock *) chan->private; 370 struct sock *sk = (struct sock *) chan->private;
372 struct sock *sk_tun;
373 struct l2tp_session *session; 371 struct l2tp_session *session;
374 struct l2tp_tunnel *tunnel; 372 struct l2tp_tunnel *tunnel;
375 struct pppol2tp_session *ps;
376 int uhlen, headroom; 373 int uhlen, headroom;
377 374
378 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) 375 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -383,13 +380,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
383 if (session == NULL) 380 if (session == NULL)
384 goto abort; 381 goto abort;
385 382
386 ps = l2tp_session_priv(session); 383 tunnel = session->tunnel;
387 sk_tun = ps->tunnel_sock;
388 if (sk_tun == NULL)
389 goto abort_put_sess;
390 tunnel = l2tp_sock_to_tunnel(sk_tun);
391 if (tunnel == NULL)
392 goto abort_put_sess;
393 384
394 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; 385 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
395 headroom = NET_SKB_PAD + 386 headroom = NET_SKB_PAD +
@@ -398,7 +389,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
398 session->hdr_len + /* L2TP header */ 389 session->hdr_len + /* L2TP header */
399 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ 390 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
400 if (skb_cow_head(skb, headroom)) 391 if (skb_cow_head(skb, headroom))
401 goto abort_put_sess_tun; 392 goto abort_put_sess;
402 393
403 /* Setup PPP header */ 394 /* Setup PPP header */
404 __skb_push(skb, 2); 395 __skb_push(skb, 2);
@@ -409,12 +400,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
409 l2tp_xmit_skb(session, skb, session->hdr_len); 400 l2tp_xmit_skb(session, skb, session->hdr_len);
410 local_bh_enable(); 401 local_bh_enable();
411 402
412 sock_put(sk_tun);
413 sock_put(sk); 403 sock_put(sk);
404
414 return 1; 405 return 1;
415 406
416abort_put_sess_tun:
417 sock_put(sk_tun);
418abort_put_sess: 407abort_put_sess:
419 sock_put(sk); 408 sock_put(sk);
420abort: 409abort:
@@ -431,17 +420,16 @@ abort:
431 */ 420 */
432static void pppol2tp_session_close(struct l2tp_session *session) 421static void pppol2tp_session_close(struct l2tp_session *session)
433{ 422{
434 struct pppol2tp_session *ps = l2tp_session_priv(session); 423 struct sock *sk;
435 struct sock *sk = ps->sock;
436 struct socket *sock = sk->sk_socket;
437 424
438 BUG_ON(session->magic != L2TP_SESSION_MAGIC); 425 BUG_ON(session->magic != L2TP_SESSION_MAGIC);
439 426
440 if (sock) 427 sk = pppol2tp_session_get_sock(session);
441 inet_shutdown(sock, SEND_SHUTDOWN); 428 if (sk) {
442 429 if (sk->sk_socket)
443 /* Don't let the session go away before our socket does */ 430 inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
444 l2tp_session_inc_refcount(session); 431 sock_put(sk);
432 }
445} 433}
446 434
447/* Really kill the session socket. (Called from sock_put() if 435/* Really kill the session socket. (Called from sock_put() if
@@ -461,6 +449,14 @@ static void pppol2tp_session_destruct(struct sock *sk)
461 } 449 }
462} 450}
463 451
452static void pppol2tp_put_sk(struct rcu_head *head)
453{
454 struct pppol2tp_session *ps;
455
456 ps = container_of(head, typeof(*ps), rcu);
457 sock_put(ps->__sk);
458}
459
464/* Called when the PPPoX socket (session) is closed. 460/* Called when the PPPoX socket (session) is closed.
465 */ 461 */
466static int pppol2tp_release(struct socket *sock) 462static int pppol2tp_release(struct socket *sock)
@@ -486,11 +482,23 @@ static int pppol2tp_release(struct socket *sock)
486 482
487 session = pppol2tp_sock_to_session(sk); 483 session = pppol2tp_sock_to_session(sk);
488 484
489 /* Purge any queued data */
490 if (session != NULL) { 485 if (session != NULL) {
491 __l2tp_session_unhash(session); 486 struct pppol2tp_session *ps;
492 l2tp_session_queue_purge(session); 487
493 sock_put(sk); 488 l2tp_session_delete(session);
489
490 ps = l2tp_session_priv(session);
491 mutex_lock(&ps->sk_lock);
492 ps->__sk = rcu_dereference_protected(ps->sk,
493 lockdep_is_held(&ps->sk_lock));
494 RCU_INIT_POINTER(ps->sk, NULL);
495 mutex_unlock(&ps->sk_lock);
496 call_rcu(&ps->rcu, pppol2tp_put_sk);
497
498 /* Rely on the sock_put() call at the end of the function for
499 * dropping the reference held by pppol2tp_sock_to_session().
500 * The last reference will be dropped by pppol2tp_put_sk().
501 */
494 } 502 }
495 release_sock(sk); 503 release_sock(sk);
496 504
@@ -557,16 +565,46 @@ out:
557static void pppol2tp_show(struct seq_file *m, void *arg) 565static void pppol2tp_show(struct seq_file *m, void *arg)
558{ 566{
559 struct l2tp_session *session = arg; 567 struct l2tp_session *session = arg;
560 struct pppol2tp_session *ps = l2tp_session_priv(session); 568 struct sock *sk;
561 569
562 if (ps) { 570 sk = pppol2tp_session_get_sock(session);
563 struct pppox_sock *po = pppox_sk(ps->sock); 571 if (sk) {
564 if (po) 572 struct pppox_sock *po = pppox_sk(sk);
565 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); 573
574 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
575 sock_put(sk);
566 } 576 }
567} 577}
568#endif 578#endif
569 579
580static void pppol2tp_session_init(struct l2tp_session *session)
581{
582 struct pppol2tp_session *ps;
583 struct dst_entry *dst;
584
585 session->recv_skb = pppol2tp_recv;
586 session->session_close = pppol2tp_session_close;
587#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
588 session->show = pppol2tp_show;
589#endif
590
591 ps = l2tp_session_priv(session);
592 mutex_init(&ps->sk_lock);
593 ps->owner = current->pid;
594
595 /* If PMTU discovery was enabled, use the MTU that was discovered */
596 dst = sk_dst_get(session->tunnel->sock);
597 if (dst) {
598 u32 pmtu = dst_mtu(dst);
599
600 if (pmtu) {
601 session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD;
602 session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD;
603 }
604 dst_release(dst);
605 }
606}
607
570/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket 608/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
571 */ 609 */
572static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, 610static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -578,7 +616,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
578 struct l2tp_session *session = NULL; 616 struct l2tp_session *session = NULL;
579 struct l2tp_tunnel *tunnel; 617 struct l2tp_tunnel *tunnel;
580 struct pppol2tp_session *ps; 618 struct pppol2tp_session *ps;
581 struct dst_entry *dst;
582 struct l2tp_session_cfg cfg = { 0, }; 619 struct l2tp_session_cfg cfg = { 0, };
583 int error = 0; 620 int error = 0;
584 u32 tunnel_id, peer_tunnel_id; 621 u32 tunnel_id, peer_tunnel_id;
@@ -688,7 +725,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
688 if (tunnel->peer_tunnel_id == 0) 725 if (tunnel->peer_tunnel_id == 0)
689 tunnel->peer_tunnel_id = peer_tunnel_id; 726 tunnel->peer_tunnel_id = peer_tunnel_id;
690 727
691 session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); 728 session = l2tp_session_get(sock_net(sk), tunnel, session_id);
692 if (session) { 729 if (session) {
693 drop_refcnt = true; 730 drop_refcnt = true;
694 ps = l2tp_session_priv(session); 731 ps = l2tp_session_priv(session);
@@ -696,13 +733,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
696 /* Using a pre-existing session is fine as long as it hasn't 733 /* Using a pre-existing session is fine as long as it hasn't
697 * been connected yet. 734 * been connected yet.
698 */ 735 */
699 if (ps->sock) { 736 mutex_lock(&ps->sk_lock);
700 error = -EEXIST; 737 if (rcu_dereference_protected(ps->sk,
701 goto end; 738 lockdep_is_held(&ps->sk_lock))) {
702 } 739 mutex_unlock(&ps->sk_lock);
703
704 /* consistency checks */
705 if (ps->tunnel_sock != tunnel->sock) {
706 error = -EEXIST; 740 error = -EEXIST;
707 goto end; 741 goto end;
708 } 742 }
@@ -718,35 +752,19 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
718 error = PTR_ERR(session); 752 error = PTR_ERR(session);
719 goto end; 753 goto end;
720 } 754 }
721 }
722
723 /* Associate session with its PPPoL2TP socket */
724 ps = l2tp_session_priv(session);
725 ps->owner = current->pid;
726 ps->sock = sk;
727 ps->tunnel_sock = tunnel->sock;
728 755
729 session->recv_skb = pppol2tp_recv; 756 pppol2tp_session_init(session);
730 session->session_close = pppol2tp_session_close; 757 ps = l2tp_session_priv(session);
731#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) 758 l2tp_session_inc_refcount(session);
732 session->show = pppol2tp_show;
733#endif
734
735 /* We need to know each time a skb is dropped from the reorder
736 * queue.
737 */
738 session->ref = pppol2tp_session_sock_hold;
739 session->deref = pppol2tp_session_sock_put;
740
741 /* If PMTU discovery was enabled, use the MTU that was discovered */
742 dst = sk_dst_get(tunnel->sock);
743 if (dst != NULL) {
744 u32 pmtu = dst_mtu(dst);
745 759
746 if (pmtu != 0) 760 mutex_lock(&ps->sk_lock);
747 session->mtu = session->mru = pmtu - 761 error = l2tp_session_register(session, tunnel);
748 PPPOL2TP_HEADER_OVERHEAD; 762 if (error < 0) {
749 dst_release(dst); 763 mutex_unlock(&ps->sk_lock);
764 kfree(session);
765 goto end;
766 }
767 drop_refcnt = true;
750 } 768 }
751 769
752 /* Special case: if source & dest session_id == 0x0000, this 770 /* Special case: if source & dest session_id == 0x0000, this
@@ -771,12 +789,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
771 po->chan.mtu = session->mtu; 789 po->chan.mtu = session->mtu;
772 790
773 error = ppp_register_net_channel(sock_net(sk), &po->chan); 791 error = ppp_register_net_channel(sock_net(sk), &po->chan);
774 if (error) 792 if (error) {
793 mutex_unlock(&ps->sk_lock);
775 goto end; 794 goto end;
795 }
776 796
777out_no_ppp: 797out_no_ppp:
778 /* This is how we get the session context from the socket. */ 798 /* This is how we get the session context from the socket. */
779 sk->sk_user_data = session; 799 sk->sk_user_data = session;
800 rcu_assign_pointer(ps->sk, sk);
801 mutex_unlock(&ps->sk_lock);
802
803 /* Keep the reference we've grabbed on the session: sk doesn't expect
804 * the session to disappear. pppol2tp_session_destruct() is responsible
805 * for dropping it.
806 */
807 drop_refcnt = false;
808
780 sk->sk_state = PPPOX_CONNECTED; 809 sk->sk_state = PPPOX_CONNECTED;
781 l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", 810 l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
782 session->name); 811 session->name);
@@ -800,12 +829,11 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
800{ 829{
801 int error; 830 int error;
802 struct l2tp_session *session; 831 struct l2tp_session *session;
803 struct pppol2tp_session *ps;
804 832
805 /* Error if tunnel socket is not prepped */ 833 /* Error if tunnel socket is not prepped */
806 if (!tunnel->sock) { 834 if (!tunnel->sock) {
807 error = -ENOENT; 835 error = -ENOENT;
808 goto out; 836 goto err;
809 } 837 }
810 838
811 /* Default MTU values. */ 839 /* Default MTU values. */
@@ -820,18 +848,20 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
820 peer_session_id, cfg); 848 peer_session_id, cfg);
821 if (IS_ERR(session)) { 849 if (IS_ERR(session)) {
822 error = PTR_ERR(session); 850 error = PTR_ERR(session);
823 goto out; 851 goto err;
824 } 852 }
825 853
826 ps = l2tp_session_priv(session); 854 pppol2tp_session_init(session);
827 ps->tunnel_sock = tunnel->sock;
828 855
829 l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", 856 error = l2tp_session_register(session, tunnel);
830 session->name); 857 if (error < 0)
858 goto err_sess;
831 859
832 error = 0; 860 return 0;
833 861
834out: 862err_sess:
863 kfree(session);
864err:
835 return error; 865 return error;
836} 866}
837 867
@@ -862,9 +892,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
862 goto end; 892 goto end;
863 893
864 pls = l2tp_session_priv(session); 894 pls = l2tp_session_priv(session);
865 tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock); 895 tunnel = session->tunnel;
866 if (tunnel == NULL)
867 goto end_put_sess;
868 896
869 inet = inet_sk(tunnel->sock); 897 inet = inet_sk(tunnel->sock);
870 if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) { 898 if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
@@ -944,8 +972,6 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
944 *usockaddr_len = len; 972 *usockaddr_len = len;
945 error = 0; 973 error = 0;
946 974
947 sock_put(pls->tunnel_sock);
948end_put_sess:
949 sock_put(sk); 975 sock_put(sk);
950end: 976end:
951 return error; 977 return error;
@@ -992,12 +1018,10 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
992 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n", 1018 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
993 session->name, cmd, arg); 1019 session->name, cmd, arg);
994 1020
995 sk = ps->sock; 1021 sk = pppol2tp_session_get_sock(session);
996 if (!sk) 1022 if (!sk)
997 return -EBADR; 1023 return -EBADR;
998 1024
999 sock_hold(sk);
1000
1001 switch (cmd) { 1025 switch (cmd) {
1002 case SIOCGIFMTU: 1026 case SIOCGIFMTU:
1003 err = -ENXIO; 1027 err = -ENXIO;
@@ -1143,13 +1167,11 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
1143 /* resend to session ioctl handler */ 1167 /* resend to session ioctl handler */
1144 struct l2tp_session *session = 1168 struct l2tp_session *session =
1145 l2tp_session_get(sock_net(sk), tunnel, 1169 l2tp_session_get(sock_net(sk), tunnel,
1146 stats.session_id, true); 1170 stats.session_id);
1147 1171
1148 if (session) { 1172 if (session) {
1149 err = pppol2tp_session_ioctl(session, cmd, 1173 err = pppol2tp_session_ioctl(session, cmd,
1150 arg); 1174 arg);
1151 if (session->deref)
1152 session->deref(session);
1153 l2tp_session_dec_refcount(session); 1175 l2tp_session_dec_refcount(session);
1154 } else { 1176 } else {
1155 err = -EBADR; 1177 err = -EBADR;
@@ -1188,7 +1210,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
1188 struct sock *sk = sock->sk; 1210 struct sock *sk = sock->sk;
1189 struct l2tp_session *session; 1211 struct l2tp_session *session;
1190 struct l2tp_tunnel *tunnel; 1212 struct l2tp_tunnel *tunnel;
1191 struct pppol2tp_session *ps;
1192 int err; 1213 int err;
1193 1214
1194 if (!sk) 1215 if (!sk)
@@ -1212,16 +1233,10 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
1212 /* Special case: if session's session_id is zero, treat ioctl as a 1233 /* Special case: if session's session_id is zero, treat ioctl as a
1213 * tunnel ioctl 1234 * tunnel ioctl
1214 */ 1235 */
1215 ps = l2tp_session_priv(session);
1216 if ((session->session_id == 0) && 1236 if ((session->session_id == 0) &&
1217 (session->peer_session_id == 0)) { 1237 (session->peer_session_id == 0)) {
1218 err = -EBADF; 1238 tunnel = session->tunnel;
1219 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1220 if (tunnel == NULL)
1221 goto end_put_sess;
1222
1223 err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg); 1239 err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
1224 sock_put(ps->tunnel_sock);
1225 goto end_put_sess; 1240 goto end_put_sess;
1226 } 1241 }
1227 1242
@@ -1273,7 +1288,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
1273 int optname, int val) 1288 int optname, int val)
1274{ 1289{
1275 int err = 0; 1290 int err = 0;
1276 struct pppol2tp_session *ps = l2tp_session_priv(session);
1277 1291
1278 switch (optname) { 1292 switch (optname) {
1279 case PPPOL2TP_SO_RECVSEQ: 1293 case PPPOL2TP_SO_RECVSEQ:
@@ -1294,8 +1308,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
1294 } 1308 }
1295 session->send_seq = !!val; 1309 session->send_seq = !!val;
1296 { 1310 {
1297 struct sock *ssk = ps->sock; 1311 struct pppox_sock *po = pppox_sk(sk);
1298 struct pppox_sock *po = pppox_sk(ssk); 1312
1299 po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : 1313 po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
1300 PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; 1314 PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
1301 } 1315 }
@@ -1348,7 +1362,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
1348 struct sock *sk = sock->sk; 1362 struct sock *sk = sock->sk;
1349 struct l2tp_session *session; 1363 struct l2tp_session *session;
1350 struct l2tp_tunnel *tunnel; 1364 struct l2tp_tunnel *tunnel;
1351 struct pppol2tp_session *ps;
1352 int val; 1365 int val;
1353 int err; 1366 int err;
1354 1367
@@ -1373,20 +1386,14 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
1373 1386
1374 /* Special case: if session_id == 0x0000, treat as operation on tunnel 1387 /* Special case: if session_id == 0x0000, treat as operation on tunnel
1375 */ 1388 */
1376 ps = l2tp_session_priv(session);
1377 if ((session->session_id == 0) && 1389 if ((session->session_id == 0) &&
1378 (session->peer_session_id == 0)) { 1390 (session->peer_session_id == 0)) {
1379 err = -EBADF; 1391 tunnel = session->tunnel;
1380 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1381 if (tunnel == NULL)
1382 goto end_put_sess;
1383
1384 err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val); 1392 err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
1385 sock_put(ps->tunnel_sock); 1393 } else {
1386 } else
1387 err = pppol2tp_session_setsockopt(sk, session, optname, val); 1394 err = pppol2tp_session_setsockopt(sk, session, optname, val);
1395 }
1388 1396
1389end_put_sess:
1390 sock_put(sk); 1397 sock_put(sk);
1391end: 1398end:
1392 return err; 1399 return err;
@@ -1474,7 +1481,6 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
1474 struct l2tp_tunnel *tunnel; 1481 struct l2tp_tunnel *tunnel;
1475 int val, len; 1482 int val, len;
1476 int err; 1483 int err;
1477 struct pppol2tp_session *ps;
1478 1484
1479 if (level != SOL_PPPOL2TP) 1485 if (level != SOL_PPPOL2TP)
1480 return -EINVAL; 1486 return -EINVAL;
@@ -1498,16 +1504,10 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
1498 goto end; 1504 goto end;
1499 1505
1500 /* Special case: if session_id == 0x0000, treat as operation on tunnel */ 1506 /* Special case: if session_id == 0x0000, treat as operation on tunnel */
1501 ps = l2tp_session_priv(session);
1502 if ((session->session_id == 0) && 1507 if ((session->session_id == 0) &&
1503 (session->peer_session_id == 0)) { 1508 (session->peer_session_id == 0)) {
1504 err = -EBADF; 1509 tunnel = session->tunnel;
1505 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1506 if (tunnel == NULL)
1507 goto end_put_sess;
1508
1509 err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val); 1510 err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
1510 sock_put(ps->tunnel_sock);
1511 if (err) 1511 if (err)
1512 goto end_put_sess; 1512 goto end_put_sess;
1513 } else { 1513 } else {
@@ -1566,7 +1566,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
1566 1566
1567static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) 1567static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
1568{ 1568{
1569 pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); 1569 pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
1570 pd->session_idx++; 1570 pd->session_idx++;
1571 1571
1572 if (pd->session == NULL) { 1572 if (pd->session == NULL) {
@@ -1634,8 +1634,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1634{ 1634{
1635 struct l2tp_session *session = v; 1635 struct l2tp_session *session = v;
1636 struct l2tp_tunnel *tunnel = session->tunnel; 1636 struct l2tp_tunnel *tunnel = session->tunnel;
1637 struct pppol2tp_session *ps = l2tp_session_priv(session); 1637 unsigned char state;
1638 struct pppox_sock *po = pppox_sk(ps->sock); 1638 char user_data_ok;
1639 struct sock *sk;
1639 u32 ip = 0; 1640 u32 ip = 0;
1640 u16 port = 0; 1641 u16 port = 0;
1641 1642
@@ -1645,6 +1646,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1645 port = ntohs(inet->inet_sport); 1646 port = ntohs(inet->inet_sport);
1646 } 1647 }
1647 1648
1649 sk = pppol2tp_session_get_sock(session);
1650 if (sk) {
1651 state = sk->sk_state;
1652 user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N';
1653 } else {
1654 state = 0;
1655 user_data_ok = 'N';
1656 }
1657
1648 seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> " 1658 seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> "
1649 "%04X/%04X %d %c\n", 1659 "%04X/%04X %d %c\n",
1650 session->name, ip, port, 1660 session->name, ip, port,
@@ -1652,9 +1662,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1652 session->session_id, 1662 session->session_id,
1653 tunnel->peer_tunnel_id, 1663 tunnel->peer_tunnel_id,
1654 session->peer_session_id, 1664 session->peer_session_id,
1655 ps->sock->sk_state, 1665 state, user_data_ok);
1656 (session == ps->sock->sk_user_data) ?
1657 'Y' : 'N');
1658 seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n", 1666 seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n",
1659 session->mtu, session->mru, 1667 session->mtu, session->mru,
1660 session->recv_seq ? 'R' : '-', 1668 session->recv_seq ? 'R' : '-',
@@ -1671,8 +1679,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1671 atomic_long_read(&session->stats.rx_bytes), 1679 atomic_long_read(&session->stats.rx_bytes),
1672 atomic_long_read(&session->stats.rx_errors)); 1680 atomic_long_read(&session->stats.rx_errors));
1673 1681
1674 if (po) 1682 if (sk) {
1683 struct pppox_sock *po = pppox_sk(sk);
1684
1675 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); 1685 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
1686 sock_put(sk);
1687 }
1676} 1688}
1677 1689
1678static int pppol2tp_seq_show(struct seq_file *m, void *v) 1690static int pppol2tp_seq_show(struct seq_file *m, void *v)
@@ -1697,8 +1709,6 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
1697 pppol2tp_seq_tunnel_show(m, pd->tunnel); 1709 pppol2tp_seq_tunnel_show(m, pd->tunnel);
1698 } else { 1710 } else {
1699 pppol2tp_seq_session_show(m, pd->session); 1711 pppol2tp_seq_session_show(m, pd->session);
1700 if (pd->session->deref)
1701 pd->session->deref(pd->session);
1702 l2tp_session_dec_refcount(pd->session); 1712 l2tp_session_dec_refcount(pd->session);
1703 } 1713 }
1704 1714
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index e15314e3b464..db6e0afe3a20 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -127,8 +127,8 @@ static struct lapb_cb *lapb_create_cb(void)
127 skb_queue_head_init(&lapb->write_queue); 127 skb_queue_head_init(&lapb->write_queue);
128 skb_queue_head_init(&lapb->ack_queue); 128 skb_queue_head_init(&lapb->ack_queue);
129 129
130 init_timer(&lapb->t1timer); 130 timer_setup(&lapb->t1timer, NULL, 0);
131 init_timer(&lapb->t2timer); 131 timer_setup(&lapb->t2timer, NULL, 0);
132 132
133 lapb->t1 = LAPB_DEFAULT_T1; 133 lapb->t1 = LAPB_DEFAULT_T1;
134 lapb->t2 = LAPB_DEFAULT_T2; 134 lapb->t2 = LAPB_DEFAULT_T2;
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 1a5535bc3b8d..5d4ae01951b5 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -35,15 +35,14 @@
35#include <linux/interrupt.h> 35#include <linux/interrupt.h>
36#include <net/lapb.h> 36#include <net/lapb.h>
37 37
38static void lapb_t1timer_expiry(unsigned long); 38static void lapb_t1timer_expiry(struct timer_list *);
39static void lapb_t2timer_expiry(unsigned long); 39static void lapb_t2timer_expiry(struct timer_list *);
40 40
41void lapb_start_t1timer(struct lapb_cb *lapb) 41void lapb_start_t1timer(struct lapb_cb *lapb)
42{ 42{
43 del_timer(&lapb->t1timer); 43 del_timer(&lapb->t1timer);
44 44
45 lapb->t1timer.data = (unsigned long)lapb; 45 lapb->t1timer.function = lapb_t1timer_expiry;
46 lapb->t1timer.function = &lapb_t1timer_expiry;
47 lapb->t1timer.expires = jiffies + lapb->t1; 46 lapb->t1timer.expires = jiffies + lapb->t1;
48 47
49 add_timer(&lapb->t1timer); 48 add_timer(&lapb->t1timer);
@@ -53,8 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
53{ 52{
54 del_timer(&lapb->t2timer); 53 del_timer(&lapb->t2timer);
55 54
56 lapb->t2timer.data = (unsigned long)lapb; 55 lapb->t2timer.function = lapb_t2timer_expiry;
57 lapb->t2timer.function = &lapb_t2timer_expiry;
58 lapb->t2timer.expires = jiffies + lapb->t2; 56 lapb->t2timer.expires = jiffies + lapb->t2;
59 57
60 add_timer(&lapb->t2timer); 58 add_timer(&lapb->t2timer);
@@ -75,9 +73,9 @@ int lapb_t1timer_running(struct lapb_cb *lapb)
75 return timer_pending(&lapb->t1timer); 73 return timer_pending(&lapb->t1timer);
76} 74}
77 75
78static void lapb_t2timer_expiry(unsigned long param) 76static void lapb_t2timer_expiry(struct timer_list *t)
79{ 77{
80 struct lapb_cb *lapb = (struct lapb_cb *)param; 78 struct lapb_cb *lapb = from_timer(lapb, t, t2timer);
81 79
82 if (lapb->condition & LAPB_ACK_PENDING_CONDITION) { 80 if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
83 lapb->condition &= ~LAPB_ACK_PENDING_CONDITION; 81 lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
@@ -85,9 +83,9 @@ static void lapb_t2timer_expiry(unsigned long param)
85 } 83 }
86} 84}
87 85
88static void lapb_t1timer_expiry(unsigned long param) 86static void lapb_t1timer_expiry(struct timer_list *t)
89{ 87{
90 struct lapb_cb *lapb = (struct lapb_cb *)param; 88 struct lapb_cb *lapb = from_timer(lapb, t, t1timer);
91 89
92 switch (lapb->state) { 90 switch (lapb->state) {
93 91
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index ea225bd2672c..f59648018060 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1318,9 +1318,8 @@ static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb)
1318 return 0; 1318 return 0;
1319} 1319}
1320 1320
1321static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) 1321static void llc_conn_tmr_common_cb(struct sock *sk, u8 type)
1322{ 1322{
1323 struct sock *sk = (struct sock *)timeout_data;
1324 struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); 1323 struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
1325 1324
1326 bh_lock_sock(sk); 1325 bh_lock_sock(sk);
@@ -1334,24 +1333,32 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
1334 bh_unlock_sock(sk); 1333 bh_unlock_sock(sk);
1335} 1334}
1336 1335
1337void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) 1336void llc_conn_pf_cycle_tmr_cb(struct timer_list *t)
1338{ 1337{
1339 llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR); 1338 struct llc_sock *llc = from_timer(llc, t, pf_cycle_timer.timer);
1339
1340 llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_P_TMR);
1340} 1341}
1341 1342
1342void llc_conn_busy_tmr_cb(unsigned long timeout_data) 1343void llc_conn_busy_tmr_cb(struct timer_list *t)
1343{ 1344{
1344 llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR); 1345 struct llc_sock *llc = from_timer(llc, t, busy_state_timer.timer);
1346
1347 llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_BUSY_TMR);
1345} 1348}
1346 1349
1347void llc_conn_ack_tmr_cb(unsigned long timeout_data) 1350void llc_conn_ack_tmr_cb(struct timer_list *t)
1348{ 1351{
1349 llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR); 1352 struct llc_sock *llc = from_timer(llc, t, ack_timer.timer);
1353
1354 llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_ACK_TMR);
1350} 1355}
1351 1356
1352void llc_conn_rej_tmr_cb(unsigned long timeout_data) 1357void llc_conn_rej_tmr_cb(struct timer_list *t)
1353{ 1358{
1354 llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR); 1359 struct llc_sock *llc = from_timer(llc, t, rej_sent_timer.timer);
1360
1361 llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_REJ_TMR);
1355} 1362}
1356 1363
1357int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb) 1364int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb)
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5e91b47f0d2a..9177dbb16dce 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -902,20 +902,16 @@ static void llc_sk_init(struct sock *sk)
902 llc->inc_cntr = llc->dec_cntr = 2; 902 llc->inc_cntr = llc->dec_cntr = 2;
903 llc->dec_step = llc->connect_step = 1; 903 llc->dec_step = llc->connect_step = 1;
904 904
905 setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 905 timer_setup(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 0);
906 (unsigned long)sk);
907 llc->ack_timer.expire = sysctl_llc2_ack_timeout; 906 llc->ack_timer.expire = sysctl_llc2_ack_timeout;
908 907
909 setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 908 timer_setup(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 0);
910 (unsigned long)sk);
911 llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; 909 llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout;
912 910
913 setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 911 timer_setup(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 0);
914 (unsigned long)sk);
915 llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; 912 llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout;
916 913
917 setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 914 timer_setup(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 0);
918 (unsigned long)sk);
919 llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; 915 llc->busy_state_timer.expire = sysctl_llc2_busy_timeout;
920 916
921 llc->n2 = 2; /* max retransmit */ 917 llc->n2 = 2; /* max retransmit */
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 80bfe29099f6..e3589ade62e0 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -7,6 +7,7 @@ mac80211-y := \
7 driver-ops.o \ 7 driver-ops.o \
8 sta_info.o \ 8 sta_info.o \
9 wep.o \ 9 wep.o \
10 aead_api.o \
10 wpa.o \ 11 wpa.o \
11 scan.o offchannel.o \ 12 scan.o offchannel.o \
12 ht.o agg-tx.o agg-rx.o \ 13 ht.o agg-tx.o agg-rx.o \
@@ -16,8 +17,6 @@ mac80211-y := \
16 rate.o \ 17 rate.o \
17 michael.o \ 18 michael.o \
18 tkip.o \ 19 tkip.o \
19 aes_ccm.o \
20 aes_gcm.o \
21 aes_cmac.o \ 20 aes_cmac.o \
22 aes_gmac.o \ 21 aes_gmac.o \
23 fils_aead.o \ 22 fils_aead.o \
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aead_api.c
index a4e0d59a40dd..160f9df30402 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aead_api.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright 2003-2004, Instant802 Networks, Inc. 2 * Copyright 2003-2004, Instant802 Networks, Inc.
3 * Copyright 2005-2006, Devicescape Software, Inc. 3 * Copyright 2005-2006, Devicescape Software, Inc.
4 * Copyright 2014-2015, Qualcomm Atheros, Inc.
4 * 5 *
5 * Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 6 * Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * 7 *
@@ -12,30 +13,29 @@
12#include <linux/kernel.h> 13#include <linux/kernel.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/scatterlist.h>
15#include <crypto/aead.h> 17#include <crypto/aead.h>
16 18
17#include <net/mac80211.h> 19#include "aead_api.h"
18#include "key.h"
19#include "aes_ccm.h"
20 20
21int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, 21int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
22 u8 *data, size_t data_len, u8 *mic, 22 u8 *data, size_t data_len, u8 *mic)
23 size_t mic_len)
24{ 23{
24 size_t mic_len = crypto_aead_authsize(tfm);
25 struct scatterlist sg[3]; 25 struct scatterlist sg[3];
26 struct aead_request *aead_req; 26 struct aead_request *aead_req;
27 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); 27 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
28 u8 *__aad; 28 u8 *__aad;
29 29
30 aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); 30 aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
31 if (!aead_req) 31 if (!aead_req)
32 return -ENOMEM; 32 return -ENOMEM;
33 33
34 __aad = (u8 *)aead_req + reqsize; 34 __aad = (u8 *)aead_req + reqsize;
35 memcpy(__aad, aad, CCM_AAD_LEN); 35 memcpy(__aad, aad, aad_len);
36 36
37 sg_init_table(sg, 3); 37 sg_init_table(sg, 3);
38 sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); 38 sg_set_buf(&sg[0], __aad, aad_len);
39 sg_set_buf(&sg[1], data, data_len); 39 sg_set_buf(&sg[1], data, data_len);
40 sg_set_buf(&sg[2], mic, mic_len); 40 sg_set_buf(&sg[2], mic, mic_len);
41 41
@@ -49,10 +49,10 @@ int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
49 return 0; 49 return 0;
50} 50}
51 51
52int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, 52int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
53 u8 *data, size_t data_len, u8 *mic, 53 u8 *data, size_t data_len, u8 *mic)
54 size_t mic_len)
55{ 54{
55 size_t mic_len = crypto_aead_authsize(tfm);
56 struct scatterlist sg[3]; 56 struct scatterlist sg[3];
57 struct aead_request *aead_req; 57 struct aead_request *aead_req;
58 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); 58 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
@@ -62,15 +62,15 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
62 if (data_len == 0) 62 if (data_len == 0)
63 return -EINVAL; 63 return -EINVAL;
64 64
65 aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); 65 aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
66 if (!aead_req) 66 if (!aead_req)
67 return -ENOMEM; 67 return -ENOMEM;
68 68
69 __aad = (u8 *)aead_req + reqsize; 69 __aad = (u8 *)aead_req + reqsize;
70 memcpy(__aad, aad, CCM_AAD_LEN); 70 memcpy(__aad, aad, aad_len);
71 71
72 sg_init_table(sg, 3); 72 sg_init_table(sg, 3);
73 sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); 73 sg_set_buf(&sg[0], __aad, aad_len);
74 sg_set_buf(&sg[1], data, data_len); 74 sg_set_buf(&sg[1], data, data_len);
75 sg_set_buf(&sg[2], mic, mic_len); 75 sg_set_buf(&sg[2], mic, mic_len);
76 76
@@ -84,14 +84,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
84 return err; 84 return err;
85} 85}
86 86
87struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], 87struct crypto_aead *
88 size_t key_len, 88aead_key_setup_encrypt(const char *alg, const u8 key[],
89 size_t mic_len) 89 size_t key_len, size_t mic_len)
90{ 90{
91 struct crypto_aead *tfm; 91 struct crypto_aead *tfm;
92 int err; 92 int err;
93 93
94 tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); 94 tfm = crypto_alloc_aead(alg, 0, CRYPTO_ALG_ASYNC);
95 if (IS_ERR(tfm)) 95 if (IS_ERR(tfm))
96 return tfm; 96 return tfm;
97 97
@@ -109,7 +109,7 @@ free_aead:
109 return ERR_PTR(err); 109 return ERR_PTR(err);
110} 110}
111 111
112void ieee80211_aes_key_free(struct crypto_aead *tfm) 112void aead_key_free(struct crypto_aead *tfm)
113{ 113{
114 crypto_free_aead(tfm); 114 crypto_free_aead(tfm);
115} 115}
diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h
new file mode 100644
index 000000000000..5e39ea843bbf
--- /dev/null
+++ b/net/mac80211/aead_api.h
@@ -0,0 +1,27 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 */
6
7#ifndef _AEAD_API_H
8#define _AEAD_API_H
9
10#include <crypto/aead.h>
11#include <linux/crypto.h>
12
13struct crypto_aead *
14aead_key_setup_encrypt(const char *alg, const u8 key[],
15 size_t key_len, size_t mic_len);
16
17int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
18 size_t aad_len, u8 *data,
19 size_t data_len, u8 *mic);
20
21int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
22 size_t aad_len, u8 *data,
23 size_t data_len, u8 *mic);
24
25void aead_key_free(struct crypto_aead *tfm);
26
27#endif /* _AEAD_API_H */
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index fcd3254c5cf0..e9b7ca0bde5b 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -10,19 +10,39 @@
10#ifndef AES_CCM_H 10#ifndef AES_CCM_H
11#define AES_CCM_H 11#define AES_CCM_H
12 12
13#include <linux/crypto.h> 13#include "aead_api.h"
14 14
15#define CCM_AAD_LEN 32 15#define CCM_AAD_LEN 32
16 16
17struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], 17static inline struct crypto_aead *
18 size_t key_len, 18ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len)
19 size_t mic_len); 19{
20int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, 20 return aead_key_setup_encrypt("ccm(aes)", key, key_len, mic_len);
21 u8 *data, size_t data_len, u8 *mic, 21}
22 size_t mic_len); 22
23int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, 23static inline int
24 u8 *data, size_t data_len, u8 *mic, 24ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm,
25 size_t mic_len); 25 u8 *b_0, u8 *aad, u8 *data,
26void ieee80211_aes_key_free(struct crypto_aead *tfm); 26 size_t data_len, u8 *mic)
27{
28 return aead_encrypt(tfm, b_0, aad + 2,
29 be16_to_cpup((__be16 *)aad),
30 data, data_len, mic);
31}
32
33static inline int
34ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm,
35 u8 *b_0, u8 *aad, u8 *data,
36 size_t data_len, u8 *mic)
37{
38 return aead_decrypt(tfm, b_0, aad + 2,
39 be16_to_cpup((__be16 *)aad),
40 data, data_len, mic);
41}
42
43static inline void ieee80211_aes_key_free(struct crypto_aead *tfm)
44{
45 return aead_key_free(tfm);
46}
27 47
28#endif /* AES_CCM_H */ 48#endif /* AES_CCM_H */
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
deleted file mode 100644
index 8a4397cc1b08..000000000000
--- a/net/mac80211/aes_gcm.c
+++ /dev/null
@@ -1,109 +0,0 @@
1/*
2 * Copyright 2014-2015, Qualcomm Atheros, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/kernel.h>
10#include <linux/types.h>
11#include <linux/err.h>
12#include <crypto/aead.h>
13
14#include <net/mac80211.h>
15#include "key.h"
16#include "aes_gcm.h"
17
18int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
19 u8 *data, size_t data_len, u8 *mic)
20{
21 struct scatterlist sg[3];
22 struct aead_request *aead_req;
23 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
24 u8 *__aad;
25
26 aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
27 if (!aead_req)
28 return -ENOMEM;
29
30 __aad = (u8 *)aead_req + reqsize;
31 memcpy(__aad, aad, GCM_AAD_LEN);
32
33 sg_init_table(sg, 3);
34 sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
35 sg_set_buf(&sg[1], data, data_len);
36 sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
37
38 aead_request_set_tfm(aead_req, tfm);
39 aead_request_set_crypt(aead_req, sg, sg, data_len, j_0);
40 aead_request_set_ad(aead_req, sg[0].length);
41
42 crypto_aead_encrypt(aead_req);
43 kzfree(aead_req);
44 return 0;
45}
46
47int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
48 u8 *data, size_t data_len, u8 *mic)
49{
50 struct scatterlist sg[3];
51 struct aead_request *aead_req;
52 int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
53 u8 *__aad;
54 int err;
55
56 if (data_len == 0)
57 return -EINVAL;
58
59 aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
60 if (!aead_req)
61 return -ENOMEM;
62
63 __aad = (u8 *)aead_req + reqsize;
64 memcpy(__aad, aad, GCM_AAD_LEN);
65
66 sg_init_table(sg, 3);
67 sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
68 sg_set_buf(&sg[1], data, data_len);
69 sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
70
71 aead_request_set_tfm(aead_req, tfm);
72 aead_request_set_crypt(aead_req, sg, sg,
73 data_len + IEEE80211_GCMP_MIC_LEN, j_0);
74 aead_request_set_ad(aead_req, sg[0].length);
75
76 err = crypto_aead_decrypt(aead_req);
77 kzfree(aead_req);
78
79 return err;
80}
81
82struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
83 size_t key_len)
84{
85 struct crypto_aead *tfm;
86 int err;
87
88 tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
89 if (IS_ERR(tfm))
90 return tfm;
91
92 err = crypto_aead_setkey(tfm, key, key_len);
93 if (err)
94 goto free_aead;
95 err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN);
96 if (err)
97 goto free_aead;
98
99 return tfm;
100
101free_aead:
102 crypto_free_aead(tfm);
103 return ERR_PTR(err);
104}
105
106void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
107{
108 crypto_free_aead(tfm);
109}
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
index 55aed5352494..d2b096033009 100644
--- a/net/mac80211/aes_gcm.h
+++ b/net/mac80211/aes_gcm.h
@@ -9,16 +9,38 @@
9#ifndef AES_GCM_H 9#ifndef AES_GCM_H
10#define AES_GCM_H 10#define AES_GCM_H
11 11
12#include <linux/crypto.h> 12#include "aead_api.h"
13 13
14#define GCM_AAD_LEN 32 14#define GCM_AAD_LEN 32
15 15
16int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, 16static inline int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm,
17 u8 *data, size_t data_len, u8 *mic); 17 u8 *j_0, u8 *aad, u8 *data,
18int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, 18 size_t data_len, u8 *mic)
19 u8 *data, size_t data_len, u8 *mic); 19{
20struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], 20 return aead_encrypt(tfm, j_0, aad + 2,
21 size_t key_len); 21 be16_to_cpup((__be16 *)aad),
22void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm); 22 data, data_len, mic);
23}
24
25static inline int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm,
26 u8 *j_0, u8 *aad, u8 *data,
27 size_t data_len, u8 *mic)
28{
29 return aead_decrypt(tfm, j_0, aad + 2,
30 be16_to_cpup((__be16 *)aad),
31 data, data_len, mic);
32}
33
34static inline struct crypto_aead *
35ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], size_t key_len)
36{
37 return aead_key_setup_encrypt("gcm(aes)", key,
38 key_len, IEEE80211_GCMP_MIC_LEN);
39}
40
41static inline void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
42{
43 return aead_key_free(tfm);
44}
23 45
24#endif /* AES_GCM_H */ 46#endif /* AES_GCM_H */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 2849a1fc41c5..d444752dbf40 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -151,21 +151,17 @@ EXPORT_SYMBOL(ieee80211_stop_rx_ba_session);
151 * After accepting the AddBA Request we activated a timer, 151 * After accepting the AddBA Request we activated a timer,
152 * resetting it after each frame that arrives from the originator. 152 * resetting it after each frame that arrives from the originator.
153 */ 153 */
154static void sta_rx_agg_session_timer_expired(unsigned long data) 154static void sta_rx_agg_session_timer_expired(struct timer_list *t)
155{ 155{
156 /* not an elegant detour, but there is no choice as the timer passes 156 struct tid_ampdu_rx *tid_rx_timer =
157 * only one argument, and various sta_info are needed here, so init 157 from_timer(tid_rx_timer, t, session_timer);
158 * flow in sta_info_create gives the TID as data, while the timer_to_id 158 struct sta_info *sta = tid_rx_timer->sta;
159 * array gives the sta through container_of */ 159 u8 tid = tid_rx_timer->tid;
160 u8 *ptid = (u8 *)data;
161 u8 *timer_to_id = ptid - *ptid;
162 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
163 timer_to_tid[0]);
164 struct tid_ampdu_rx *tid_rx; 160 struct tid_ampdu_rx *tid_rx;
165 unsigned long timeout; 161 unsigned long timeout;
166 162
167 rcu_read_lock(); 163 rcu_read_lock();
168 tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[*ptid]); 164 tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
169 if (!tid_rx) { 165 if (!tid_rx) {
170 rcu_read_unlock(); 166 rcu_read_unlock();
171 return; 167 return;
@@ -180,21 +176,18 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
180 rcu_read_unlock(); 176 rcu_read_unlock();
181 177
182 ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", 178 ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n",
183 sta->sta.addr, (u16)*ptid); 179 sta->sta.addr, tid);
184 180
185 set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); 181 set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired);
186 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); 182 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
187} 183}
188 184
189static void sta_rx_agg_reorder_timer_expired(unsigned long data) 185static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
190{ 186{
191 u8 *ptid = (u8 *)data; 187 struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, reorder_timer);
192 u8 *timer_to_id = ptid - *ptid;
193 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
194 timer_to_tid[0]);
195 188
196 rcu_read_lock(); 189 rcu_read_lock();
197 ieee80211_release_reorder_timeout(sta, *ptid); 190 ieee80211_release_reorder_timeout(tid_rx->sta, tid_rx->tid);
198 rcu_read_unlock(); 191 rcu_read_unlock();
199} 192}
200 193
@@ -356,14 +349,12 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
356 spin_lock_init(&tid_agg_rx->reorder_lock); 349 spin_lock_init(&tid_agg_rx->reorder_lock);
357 350
358 /* rx timer */ 351 /* rx timer */
359 setup_deferrable_timer(&tid_agg_rx->session_timer, 352 timer_setup(&tid_agg_rx->session_timer,
360 sta_rx_agg_session_timer_expired, 353 sta_rx_agg_session_timer_expired, TIMER_DEFERRABLE);
361 (unsigned long)&sta->timer_to_tid[tid]);
362 354
363 /* rx reorder timer */ 355 /* rx reorder timer */
364 setup_timer(&tid_agg_rx->reorder_timer, 356 timer_setup(&tid_agg_rx->reorder_timer,
365 sta_rx_agg_reorder_timer_expired, 357 sta_rx_agg_reorder_timer_expired, 0);
366 (unsigned long)&sta->timer_to_tid[tid]);
367 358
368 /* prepare reordering buffer */ 359 /* prepare reordering buffer */
369 tid_agg_rx->reorder_buf = 360 tid_agg_rx->reorder_buf =
@@ -399,6 +390,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
399 tid_agg_rx->auto_seq = auto_seq; 390 tid_agg_rx->auto_seq = auto_seq;
400 tid_agg_rx->started = false; 391 tid_agg_rx->started = false;
401 tid_agg_rx->reorder_buf_filtered = 0; 392 tid_agg_rx->reorder_buf_filtered = 0;
393 tid_agg_rx->tid = tid;
394 tid_agg_rx->sta = sta;
402 status = WLAN_STATUS_SUCCESS; 395 status = WLAN_STATUS_SUCCESS;
403 396
404 /* activate it for RX */ 397 /* activate it for RX */
@@ -459,7 +452,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
459} 452}
460 453
461void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, 454void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
462 const u8 *addr, unsigned int bit) 455 const u8 *addr, unsigned int tid)
463{ 456{
464 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); 457 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
465 struct ieee80211_local *local = sdata->local; 458 struct ieee80211_local *local = sdata->local;
@@ -470,7 +463,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
470 if (!sta) 463 if (!sta)
471 goto unlock; 464 goto unlock;
472 465
473 set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl); 466 set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl);
474 ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); 467 ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
475 unlock: 468 unlock:
476 rcu_read_unlock(); 469 rcu_read_unlock();
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index bef516ec47f9..5f8ab5be369f 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -330,6 +330,11 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
330 330
331 spin_lock_bh(&sta->lock); 331 spin_lock_bh(&sta->lock);
332 332
333 /* free struct pending for start, if present */
334 tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
335 kfree(tid_tx);
336 sta->ampdu_mlme.tid_start_tx[tid] = NULL;
337
333 tid_tx = rcu_dereference_protected_tid_tx(sta, tid); 338 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
334 if (!tid_tx) { 339 if (!tid_tx) {
335 spin_unlock_bh(&sta->lock); 340 spin_unlock_bh(&sta->lock);
@@ -422,15 +427,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
422 * add Block Ack response will arrive from the recipient. 427 * add Block Ack response will arrive from the recipient.
423 * If this timer expires sta_addba_resp_timer_expired will be executed. 428 * If this timer expires sta_addba_resp_timer_expired will be executed.
424 */ 429 */
425static void sta_addba_resp_timer_expired(unsigned long data) 430static void sta_addba_resp_timer_expired(struct timer_list *t)
426{ 431{
427 /* not an elegant detour, but there is no choice as the timer passes 432 struct tid_ampdu_tx *tid_tx_timer =
428 * only one argument, and both sta_info and TID are needed, so init 433 from_timer(tid_tx_timer, t, addba_resp_timer);
429 * flow in sta_info_create gives the TID as data, while the timer_to_id 434 struct sta_info *sta = tid_tx_timer->sta;
430 * array gives the sta through container_of */ 435 u8 tid = tid_tx_timer->tid;
431 u16 tid = *(u8 *)data;
432 struct sta_info *sta = container_of((void *)data,
433 struct sta_info, timer_to_tid[tid]);
434 struct tid_ampdu_tx *tid_tx; 436 struct tid_ampdu_tx *tid_tx;
435 437
436 /* check if the TID waits for addBA response */ 438 /* check if the TID waits for addBA response */
@@ -525,21 +527,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
525 * After accepting the AddBA Response we activated a timer, 527 * After accepting the AddBA Response we activated a timer,
526 * resetting it after each frame that we send. 528 * resetting it after each frame that we send.
527 */ 529 */
528static void sta_tx_agg_session_timer_expired(unsigned long data) 530static void sta_tx_agg_session_timer_expired(struct timer_list *t)
529{ 531{
530 /* not an elegant detour, but there is no choice as the timer passes 532 struct tid_ampdu_tx *tid_tx_timer =
531 * only one argument, and various sta_info are needed here, so init 533 from_timer(tid_tx_timer, t, session_timer);
532 * flow in sta_info_create gives the TID as data, while the timer_to_id 534 struct sta_info *sta = tid_tx_timer->sta;
533 * array gives the sta through container_of */ 535 u8 tid = tid_tx_timer->tid;
534 u8 *ptid = (u8 *)data;
535 u8 *timer_to_id = ptid - *ptid;
536 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
537 timer_to_tid[0]);
538 struct tid_ampdu_tx *tid_tx; 536 struct tid_ampdu_tx *tid_tx;
539 unsigned long timeout; 537 unsigned long timeout;
540 538
541 rcu_read_lock(); 539 rcu_read_lock();
542 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[*ptid]); 540 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
543 if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { 541 if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
544 rcu_read_unlock(); 542 rcu_read_unlock();
545 return; 543 return;
@@ -555,9 +553,9 @@ static void sta_tx_agg_session_timer_expired(unsigned long data)
555 rcu_read_unlock(); 553 rcu_read_unlock();
556 554
557 ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", 555 ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n",
558 sta->sta.addr, (u16)*ptid); 556 sta->sta.addr, tid);
559 557
560 ieee80211_stop_tx_ba_session(&sta->sta, *ptid); 558 ieee80211_stop_tx_ba_session(&sta->sta, tid);
561} 559}
562 560
563int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, 561int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
@@ -670,16 +668,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
670 __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); 668 __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
671 669
672 tid_tx->timeout = timeout; 670 tid_tx->timeout = timeout;
671 tid_tx->sta = sta;
672 tid_tx->tid = tid;
673 673
674 /* response timer */ 674 /* response timer */
675 setup_timer(&tid_tx->addba_resp_timer, 675 timer_setup(&tid_tx->addba_resp_timer, sta_addba_resp_timer_expired, 0);
676 sta_addba_resp_timer_expired,
677 (unsigned long)&sta->timer_to_tid[tid]);
678 676
679 /* tx timer */ 677 /* tx timer */
680 setup_deferrable_timer(&tid_tx->session_timer, 678 timer_setup(&tid_tx->session_timer,
681 sta_tx_agg_session_timer_expired, 679 sta_tx_agg_session_timer_expired, TIMER_DEFERRABLE);
682 (unsigned long)&sta->timer_to_tid[tid]);
683 680
684 /* assign a dialog token */ 681 /* assign a dialog token */
685 sta->ampdu_mlme.dialog_token_allocator++; 682 sta->ampdu_mlme.dialog_token_allocator++;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index d6d0b4201e40..41f5e48f8021 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -290,13 +290,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
290{ 290{
291 int i; 291 int i;
292 292
293 mutex_lock(&sta->ampdu_mlme.mtx);
293 for (i = 0; i < IEEE80211_NUM_TIDS; i++) { 294 for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
294 __ieee80211_stop_tx_ba_session(sta, i, reason); 295 ___ieee80211_stop_tx_ba_session(sta, i, reason);
295 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, 296 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
296 WLAN_REASON_QSTA_LEAVE_QBSS, 297 WLAN_REASON_QSTA_LEAVE_QBSS,
297 reason != AGG_STOP_DESTROY_STA && 298 reason != AGG_STOP_DESTROY_STA &&
298 reason != AGG_STOP_PEER_REQUEST); 299 reason != AGG_STOP_PEER_REQUEST);
299 } 300 }
301 mutex_unlock(&sta->ampdu_mlme.mtx);
300 302
301 /* stopping might queue the work again - so cancel only afterwards */ 303 /* stopping might queue the work again - so cancel only afterwards */
302 cancel_work_sync(&sta->ampdu_mlme.work); 304 cancel_work_sync(&sta->ampdu_mlme.work);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index e9c6aa3ed05b..db07e0de9a03 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1711,10 +1711,10 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
1711 sdata_unlock(sdata); 1711 sdata_unlock(sdata);
1712} 1712}
1713 1713
1714static void ieee80211_ibss_timer(unsigned long data) 1714static void ieee80211_ibss_timer(struct timer_list *t)
1715{ 1715{
1716 struct ieee80211_sub_if_data *sdata = 1716 struct ieee80211_sub_if_data *sdata =
1717 (struct ieee80211_sub_if_data *) data; 1717 from_timer(sdata, t, u.ibss.timer);
1718 1718
1719 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 1719 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
1720} 1720}
@@ -1723,8 +1723,7 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
1723{ 1723{
1724 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 1724 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1725 1725
1726 setup_timer(&ifibss->timer, ieee80211_ibss_timer, 1726 timer_setup(&ifibss->timer, ieee80211_ibss_timer, 0);
1727 (unsigned long) sdata);
1728 INIT_LIST_HEAD(&ifibss->incomplete_stations); 1727 INIT_LIST_HEAD(&ifibss->incomplete_stations);
1729 spin_lock_init(&ifibss->incomplete_lock); 1728 spin_lock_init(&ifibss->incomplete_lock);
1730 INIT_WORK(&ifibss->csa_connection_drop_work, 1729 INIT_WORK(&ifibss->csa_connection_drop_work,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9675814f64db..885d00b41911 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1057,6 +1057,7 @@ struct tpt_led_trigger {
1057 const struct ieee80211_tpt_blink *blink_table; 1057 const struct ieee80211_tpt_blink *blink_table;
1058 unsigned int blink_table_len; 1058 unsigned int blink_table_len;
1059 struct timer_list timer; 1059 struct timer_list timer;
1060 struct ieee80211_local *local;
1060 unsigned long prev_traffic; 1061 unsigned long prev_traffic;
1061 unsigned long tx_bytes, rx_bytes; 1062 unsigned long tx_bytes, rx_bytes;
1062 unsigned int active, want; 1063 unsigned int active, want;
@@ -1932,7 +1933,7 @@ static inline int ieee80211_ac_from_tid(int tid)
1932 1933
1933void ieee80211_dynamic_ps_enable_work(struct work_struct *work); 1934void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
1934void ieee80211_dynamic_ps_disable_work(struct work_struct *work); 1935void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
1935void ieee80211_dynamic_ps_timer(unsigned long data); 1936void ieee80211_dynamic_ps_timer(struct timer_list *t);
1936void ieee80211_send_nullfunc(struct ieee80211_local *local, 1937void ieee80211_send_nullfunc(struct ieee80211_local *local,
1937 struct ieee80211_sub_if_data *sdata, 1938 struct ieee80211_sub_if_data *sdata,
1938 bool powersave); 1939 bool powersave);
@@ -2009,6 +2010,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
2009 struct txq_info *txq, int tid); 2010 struct txq_info *txq, int tid);
2010void ieee80211_txq_purge(struct ieee80211_local *local, 2011void ieee80211_txq_purge(struct ieee80211_local *local,
2011 struct txq_info *txqi); 2012 struct txq_info *txqi);
2013void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
2014 struct ieee80211_sub_if_data *sdata);
2012void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, 2015void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
2013 u16 transaction, u16 auth_alg, u16 status, 2016 u16 transaction, u16 auth_alg, u16 status,
2014 const u8 *extra, size_t extra_len, const u8 *bssid, 2017 const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f75029abf728..13b16f90e1cf 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -793,9 +793,7 @@ static int ieee80211_open(struct net_device *dev)
793static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, 793static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
794 bool going_down) 794 bool going_down)
795{ 795{
796 struct ieee80211_sub_if_data *txq_sdata = sdata;
797 struct ieee80211_local *local = sdata->local; 796 struct ieee80211_local *local = sdata->local;
798 struct fq *fq = &local->fq;
799 unsigned long flags; 797 unsigned long flags;
800 struct sk_buff *skb, *tmp; 798 struct sk_buff *skb, *tmp;
801 u32 hw_reconf_flags = 0; 799 u32 hw_reconf_flags = 0;
@@ -939,9 +937,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
939 937
940 switch (sdata->vif.type) { 938 switch (sdata->vif.type) {
941 case NL80211_IFTYPE_AP_VLAN: 939 case NL80211_IFTYPE_AP_VLAN:
942 txq_sdata = container_of(sdata->bss,
943 struct ieee80211_sub_if_data, u.ap);
944
945 mutex_lock(&local->mtx); 940 mutex_lock(&local->mtx);
946 list_del(&sdata->u.vlan.list); 941 list_del(&sdata->u.vlan.list);
947 mutex_unlock(&local->mtx); 942 mutex_unlock(&local->mtx);
@@ -998,8 +993,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
998 skb_queue_purge(&sdata->skb_queue); 993 skb_queue_purge(&sdata->skb_queue);
999 } 994 }
1000 995
1001 sdata->bss = NULL;
1002
1003 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 996 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
1004 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { 997 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
1005 skb_queue_walk_safe(&local->pending[i], skb, tmp) { 998 skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -1012,22 +1005,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
1012 } 1005 }
1013 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 1006 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
1014 1007
1015 if (txq_sdata->vif.txq) { 1008 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
1016 struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq); 1009 ieee80211_txq_remove_vlan(local, sdata);
1017
1018 /*
1019 * FIXME FIXME
1020 *
1021 * We really shouldn't purge the *entire* txqi since that
1022 * contains frames for the other AP_VLANs (and possibly
1023 * the AP itself) as well, but there's no API in FQ now
1024 * to be able to filter.
1025 */
1026 1010
1027 spin_lock_bh(&fq->lock); 1011 sdata->bss = NULL;
1028 ieee80211_txq_purge(local, txqi);
1029 spin_unlock_bh(&fq->lock);
1030 }
1031 1012
1032 if (local->open_count == 0) 1013 if (local->open_count == 0)
1033 ieee80211_clear_tx_pending(local); 1014 ieee80211_clear_tx_pending(local);
@@ -1772,7 +1753,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1772 sizeof(void *)); 1753 sizeof(void *));
1773 int txq_size = 0; 1754 int txq_size = 0;
1774 1755
1775 if (local->ops->wake_tx_queue) 1756 if (local->ops->wake_tx_queue &&
1757 type != NL80211_IFTYPE_AP_VLAN &&
1758 type != NL80211_IFTYPE_MONITOR)
1776 txq_size += sizeof(struct txq_info) + 1759 txq_size += sizeof(struct txq_info) +
1777 local->hw.txq_data_size; 1760 local->hw.txq_data_size;
1778 1761
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 0505845b7ab8..ba0b507ea691 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -248,10 +248,10 @@ static unsigned long tpt_trig_traffic(struct ieee80211_local *local,
248 return DIV_ROUND_UP(delta, 1024 / 8); 248 return DIV_ROUND_UP(delta, 1024 / 8);
249} 249}
250 250
251static void tpt_trig_timer(unsigned long data) 251static void tpt_trig_timer(struct timer_list *t)
252{ 252{
253 struct ieee80211_local *local = (void *)data; 253 struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer);
254 struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; 254 struct ieee80211_local *local = tpt_trig->local;
255 struct led_classdev *led_cdev; 255 struct led_classdev *led_cdev;
256 unsigned long on, off, tpt; 256 unsigned long on, off, tpt;
257 int i; 257 int i;
@@ -306,8 +306,9 @@ __ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
306 tpt_trig->blink_table = blink_table; 306 tpt_trig->blink_table = blink_table;
307 tpt_trig->blink_table_len = blink_table_len; 307 tpt_trig->blink_table_len = blink_table_len;
308 tpt_trig->want = flags; 308 tpt_trig->want = flags;
309 tpt_trig->local = local;
309 310
310 setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local); 311 timer_setup(&tpt_trig->timer, tpt_trig_timer, 0);
311 312
312 local->tpt_led_trigger = tpt_trig; 313 local->tpt_led_trigger = tpt_trig;
313 314
@@ -326,7 +327,7 @@ static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
326 tpt_trig_traffic(local, tpt_trig); 327 tpt_trig_traffic(local, tpt_trig);
327 tpt_trig->running = true; 328 tpt_trig->running = true;
328 329
329 tpt_trig_timer((unsigned long)local); 330 tpt_trig_timer(&tpt_trig->timer);
330 mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); 331 mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ));
331} 332}
332 333
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8aa1f5b6a051..e054a2fd8d38 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -633,8 +633,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
633 ieee80211_dynamic_ps_enable_work); 633 ieee80211_dynamic_ps_enable_work);
634 INIT_WORK(&local->dynamic_ps_disable_work, 634 INIT_WORK(&local->dynamic_ps_disable_work,
635 ieee80211_dynamic_ps_disable_work); 635 ieee80211_dynamic_ps_disable_work);
636 setup_timer(&local->dynamic_ps_timer, 636 timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0);
637 ieee80211_dynamic_ps_timer, (unsigned long) local);
638 637
639 INIT_WORK(&local->sched_scan_stopped_work, 638 INIT_WORK(&local->sched_scan_stopped_work,
640 ieee80211_sched_scan_stopped_work); 639 ieee80211_sched_scan_stopped_work);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index a550c707cd8a..5e27364e10ac 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -37,9 +37,10 @@ void ieee80211s_stop(void)
37 kmem_cache_destroy(rm_cache); 37 kmem_cache_destroy(rm_cache);
38} 38}
39 39
40static void ieee80211_mesh_housekeeping_timer(unsigned long data) 40static void ieee80211_mesh_housekeeping_timer(struct timer_list *t)
41{ 41{
42 struct ieee80211_sub_if_data *sdata = (void *) data; 42 struct ieee80211_sub_if_data *sdata =
43 from_timer(sdata, t, u.mesh.housekeeping_timer);
43 struct ieee80211_local *local = sdata->local; 44 struct ieee80211_local *local = sdata->local;
44 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 45 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
45 46
@@ -528,18 +529,18 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
528 return 0; 529 return 0;
529} 530}
530 531
531static void ieee80211_mesh_path_timer(unsigned long data) 532static void ieee80211_mesh_path_timer(struct timer_list *t)
532{ 533{
533 struct ieee80211_sub_if_data *sdata = 534 struct ieee80211_sub_if_data *sdata =
534 (struct ieee80211_sub_if_data *) data; 535 from_timer(sdata, t, u.mesh.mesh_path_timer);
535 536
536 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 537 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
537} 538}
538 539
539static void ieee80211_mesh_path_root_timer(unsigned long data) 540static void ieee80211_mesh_path_root_timer(struct timer_list *t)
540{ 541{
541 struct ieee80211_sub_if_data *sdata = 542 struct ieee80211_sub_if_data *sdata =
542 (struct ieee80211_sub_if_data *) data; 543 from_timer(sdata, t, u.mesh.mesh_path_root_timer);
543 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 544 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
544 545
545 set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); 546 set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
@@ -675,8 +676,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
675 enum nl80211_band band; 676 enum nl80211_band band;
676 u8 *pos; 677 u8 *pos;
677 struct ieee80211_sub_if_data *sdata; 678 struct ieee80211_sub_if_data *sdata;
678 int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + 679 int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
679 sizeof(mgmt->u.beacon);
680 680
681 sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); 681 sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
682 rcu_read_lock(); 682 rcu_read_lock();
@@ -1443,9 +1443,8 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
1443 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 1443 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
1444 static u8 zero_addr[ETH_ALEN] = {}; 1444 static u8 zero_addr[ETH_ALEN] = {};
1445 1445
1446 setup_timer(&ifmsh->housekeeping_timer, 1446 timer_setup(&ifmsh->housekeeping_timer,
1447 ieee80211_mesh_housekeeping_timer, 1447 ieee80211_mesh_housekeeping_timer, 0);
1448 (unsigned long) sdata);
1449 1448
1450 ifmsh->accepting_plinks = true; 1449 ifmsh->accepting_plinks = true;
1451 atomic_set(&ifmsh->mpaths, 0); 1450 atomic_set(&ifmsh->mpaths, 0);
@@ -1459,12 +1458,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
1459 1458
1460 mesh_pathtbl_init(sdata); 1459 mesh_pathtbl_init(sdata);
1461 1460
1462 setup_timer(&ifmsh->mesh_path_timer, 1461 timer_setup(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, 0);
1463 ieee80211_mesh_path_timer, 1462 timer_setup(&ifmsh->mesh_path_root_timer,
1464 (unsigned long) sdata); 1463 ieee80211_mesh_path_root_timer, 0);
1465 setup_timer(&ifmsh->mesh_path_root_timer,
1466 ieee80211_mesh_path_root_timer,
1467 (unsigned long) sdata);
1468 INIT_LIST_HEAD(&ifmsh->preq_queue.list); 1464 INIT_LIST_HEAD(&ifmsh->preq_queue.list);
1469 skb_queue_head_init(&ifmsh->ps.bc_buf); 1465 skb_queue_head_init(&ifmsh->ps.bc_buf);
1470 spin_lock_init(&ifmsh->mesh_preq_queue_lock); 1466 spin_lock_init(&ifmsh->mesh_preq_queue_lock);
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7e5f271e3c30..ee56f18cad3f 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -275,6 +275,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
275 u8 *hw_addr, struct ieee802_11_elems *ie); 275 u8 *hw_addr, struct ieee802_11_elems *ie);
276bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); 276bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
277u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); 277u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
278void mesh_plink_timer(struct timer_list *t);
278void mesh_plink_broken(struct sta_info *sta); 279void mesh_plink_broken(struct sta_info *sta);
279u32 mesh_plink_deactivate(struct sta_info *sta); 280u32 mesh_plink_deactivate(struct sta_info *sta);
280u32 mesh_plink_open(struct sta_info *sta); 281u32 mesh_plink_open(struct sta_info *sta);
@@ -295,7 +296,7 @@ void mesh_path_tx_pending(struct mesh_path *mpath);
295int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); 296int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata);
296void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata); 297void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata);
297int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); 298int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr);
298void mesh_path_timer(unsigned long data); 299void mesh_path_timer(struct timer_list *t);
299void mesh_path_flush_by_nexthop(struct sta_info *sta); 300void mesh_path_flush_by_nexthop(struct sta_info *sta);
300void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, 301void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
301 struct sk_buff *skb); 302 struct sk_buff *skb);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d8bbd0d2225a..4f7826d7b47c 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
111 struct sk_buff *skb; 111 struct sk_buff *skb;
112 struct ieee80211_mgmt *mgmt; 112 struct ieee80211_mgmt *mgmt;
113 u8 *pos, ie_len; 113 u8 *pos, ie_len;
114 int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + 114 int hdr_len = offsetofend(struct ieee80211_mgmt,
115 sizeof(mgmt->u.action.u.mesh_action); 115 u.action.u.mesh_action);
116 116
117 skb = dev_alloc_skb(local->tx_headroom + 117 skb = dev_alloc_skb(local->tx_headroom +
118 hdr_len + 118 hdr_len +
@@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
242 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 242 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
243 struct ieee80211_mgmt *mgmt; 243 struct ieee80211_mgmt *mgmt;
244 u8 *pos, ie_len; 244 u8 *pos, ie_len;
245 int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + 245 int hdr_len = offsetofend(struct ieee80211_mgmt,
246 sizeof(mgmt->u.action.u.mesh_action); 246 u.action.u.mesh_action);
247 247
248 if (time_before(jiffies, ifmsh->next_perr)) 248 if (time_before(jiffies, ifmsh->next_perr))
249 return -EAGAIN; 249 return -EAGAIN;
@@ -1194,9 +1194,9 @@ endlookup:
1194 return err; 1194 return err;
1195} 1195}
1196 1196
1197void mesh_path_timer(unsigned long data) 1197void mesh_path_timer(struct timer_list *t)
1198{ 1198{
1199 struct mesh_path *mpath = (void *) data; 1199 struct mesh_path *mpath = from_timer(mpath, t, timer);
1200 struct ieee80211_sub_if_data *sdata = mpath->sdata; 1200 struct ieee80211_sub_if_data *sdata = mpath->sdata;
1201 int ret; 1201 int ret;
1202 1202
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 97269caafecd..86c8dfef56a4 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -399,8 +399,7 @@ struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata,
399 skb_queue_head_init(&new_mpath->frame_queue); 399 skb_queue_head_init(&new_mpath->frame_queue);
400 new_mpath->exp_time = jiffies; 400 new_mpath->exp_time = jiffies;
401 spin_lock_init(&new_mpath->state_lock); 401 spin_lock_init(&new_mpath->state_lock);
402 setup_timer(&new_mpath->timer, mesh_path_timer, 402 timer_setup(&new_mpath->timer, mesh_path_timer, 0);
403 (unsigned long) new_mpath);
404 403
405 return new_mpath; 404 return new_mpath;
406} 405}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f69c6c38ca43..e2d00cce3c17 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
220 bool include_plid = false; 220 bool include_plid = false;
221 u16 peering_proto = 0; 221 u16 peering_proto = 0;
222 u8 *pos, ie_len = 4; 222 u8 *pos, ie_len = 4;
223 int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + 223 int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
224 sizeof(mgmt->u.action.u.self_prot);
225 int err = -ENOMEM; 224 int err = -ENOMEM;
226 225
227 skb = dev_alloc_skb(local->tx_headroom + 226 skb = dev_alloc_skb(local->tx_headroom +
@@ -604,8 +603,9 @@ out:
604 ieee80211_mbss_info_change_notify(sdata, changed); 603 ieee80211_mbss_info_change_notify(sdata, changed);
605} 604}
606 605
607static void mesh_plink_timer(unsigned long data) 606void mesh_plink_timer(struct timer_list *t)
608{ 607{
608 struct mesh_sta *mesh = from_timer(mesh, t, plink_timer);
609 struct sta_info *sta; 609 struct sta_info *sta;
610 u16 reason = 0; 610 u16 reason = 0;
611 struct ieee80211_sub_if_data *sdata; 611 struct ieee80211_sub_if_data *sdata;
@@ -617,7 +617,7 @@ static void mesh_plink_timer(unsigned long data)
617 * del_timer_sync() this timer after having made sure 617 * del_timer_sync() this timer after having made sure
618 * it cannot be readded (by deleting the plink.) 618 * it cannot be readded (by deleting the plink.)
619 */ 619 */
620 sta = (struct sta_info *) data; 620 sta = mesh->plink_sta;
621 621
622 if (sta->sdata->local->quiescing) 622 if (sta->sdata->local->quiescing)
623 return; 623 return;
@@ -697,11 +697,8 @@ static void mesh_plink_timer(unsigned long data)
697 697
698static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout) 698static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout)
699{ 699{
700 sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout);
701 sta->mesh->plink_timer.data = (unsigned long) sta;
702 sta->mesh->plink_timer.function = mesh_plink_timer;
703 sta->mesh->plink_timeout = timeout; 700 sta->mesh->plink_timeout = timeout;
704 add_timer(&sta->mesh->plink_timer); 701 mod_timer(&sta->mesh->plink_timer, jiffies + msecs_to_jiffies(timeout));
705} 702}
706 703
707static bool llid_in_use(struct ieee80211_sub_if_data *sdata, 704static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3b8e2709d8de..04460440d731 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,7 +145,6 @@ static u32
145ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, 145ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
146 struct ieee80211_supported_band *sband, 146 struct ieee80211_supported_band *sband,
147 struct ieee80211_channel *channel, 147 struct ieee80211_channel *channel,
148 const struct ieee80211_ht_cap *ht_cap,
149 const struct ieee80211_ht_operation *ht_oper, 148 const struct ieee80211_ht_operation *ht_oper,
150 const struct ieee80211_vht_operation *vht_oper, 149 const struct ieee80211_vht_operation *vht_oper,
151 struct cfg80211_chan_def *chandef, bool tracking) 150 struct cfg80211_chan_def *chandef, bool tracking)
@@ -163,20 +162,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
163 chandef->center_freq1 = channel->center_freq; 162 chandef->center_freq1 = channel->center_freq;
164 chandef->center_freq2 = 0; 163 chandef->center_freq2 = 0;
165 164
166 if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) { 165 if (!ht_oper || !sta_ht_cap.ht_supported) {
167 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; 166 ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
168 goto out; 167 goto out;
169 } 168 }
170 169
171 chandef->width = NL80211_CHAN_WIDTH_20; 170 chandef->width = NL80211_CHAN_WIDTH_20;
172 171
173 if (!(ht_cap->cap_info &
174 cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
175 ret = IEEE80211_STA_DISABLE_40MHZ;
176 vht_chandef = *chandef;
177 goto out;
178 }
179
180 ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, 172 ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
181 channel->band); 173 channel->band);
182 /* check that channel matches the right operating channel */ 174 /* check that channel matches the right operating channel */
@@ -344,7 +336,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
344 336
345 /* calculate new channel (type) based on HT/VHT operation IEs */ 337 /* calculate new channel (type) based on HT/VHT operation IEs */
346 flags = ieee80211_determine_chantype(sdata, sband, chan, 338 flags = ieee80211_determine_chantype(sdata, sband, chan,
347 ht_cap, ht_oper, vht_oper, 339 ht_oper, vht_oper,
348 &chandef, true); 340 &chandef, true);
349 341
350 /* 342 /*
@@ -780,11 +772,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
780 WLAN_EID_SUPPORTED_REGULATORY_CLASSES, 772 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
781 WLAN_EID_HT_CAPABILITY, 773 WLAN_EID_HT_CAPABILITY,
782 WLAN_EID_BSS_COEX_2040, 774 WLAN_EID_BSS_COEX_2040,
775 /* luckily this is almost always there */
783 WLAN_EID_EXT_CAPABILITY, 776 WLAN_EID_EXT_CAPABILITY,
784 WLAN_EID_QOS_TRAFFIC_CAPA, 777 WLAN_EID_QOS_TRAFFIC_CAPA,
785 WLAN_EID_TIM_BCAST_REQ, 778 WLAN_EID_TIM_BCAST_REQ,
786 WLAN_EID_INTERWORKING, 779 WLAN_EID_INTERWORKING,
787 /* 60GHz doesn't happen right now */ 780 /* 60 GHz (Multi-band, DMG, MMS) can't happen */
788 WLAN_EID_VHT_CAPABILITY, 781 WLAN_EID_VHT_CAPABILITY,
789 WLAN_EID_OPMODE_NOTIF, 782 WLAN_EID_OPMODE_NOTIF,
790 }; 783 };
@@ -811,22 +804,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
811 /* if present, add any custom IEs that go before VHT */ 804 /* if present, add any custom IEs that go before VHT */
812 if (assoc_data->ie_len) { 805 if (assoc_data->ie_len) {
813 static const u8 before_vht[] = { 806 static const u8 before_vht[] = {
814 WLAN_EID_SSID, 807 /*
815 WLAN_EID_SUPP_RATES, 808 * no need to list the ones split off before HT
816 WLAN_EID_EXT_SUPP_RATES, 809 * or generated here
817 WLAN_EID_PWR_CAPABILITY, 810 */
818 WLAN_EID_SUPPORTED_CHANNELS,
819 WLAN_EID_RSN,
820 WLAN_EID_QOS_CAPA,
821 WLAN_EID_RRM_ENABLED_CAPABILITIES,
822 WLAN_EID_MOBILITY_DOMAIN,
823 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
824 WLAN_EID_HT_CAPABILITY,
825 WLAN_EID_BSS_COEX_2040, 811 WLAN_EID_BSS_COEX_2040,
826 WLAN_EID_EXT_CAPABILITY, 812 WLAN_EID_EXT_CAPABILITY,
827 WLAN_EID_QOS_TRAFFIC_CAPA, 813 WLAN_EID_QOS_TRAFFIC_CAPA,
828 WLAN_EID_TIM_BCAST_REQ, 814 WLAN_EID_TIM_BCAST_REQ,
829 WLAN_EID_INTERWORKING, 815 WLAN_EID_INTERWORKING,
816 /* 60 GHz (Multi-band, DMG, MMS) can't happen */
830 }; 817 };
831 818
832 /* RIC already taken above, so no need to handle here anymore */ 819 /* RIC already taken above, so no need to handle here anymore */
@@ -1079,10 +1066,10 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
1079} 1066}
1080EXPORT_SYMBOL(ieee80211_chswitch_done); 1067EXPORT_SYMBOL(ieee80211_chswitch_done);
1081 1068
1082static void ieee80211_chswitch_timer(unsigned long data) 1069static void ieee80211_chswitch_timer(struct timer_list *t)
1083{ 1070{
1084 struct ieee80211_sub_if_data *sdata = 1071 struct ieee80211_sub_if_data *sdata =
1085 (struct ieee80211_sub_if_data *) data; 1072 from_timer(sdata, t, u.mgd.chswitch_timer);
1086 1073
1087 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work); 1074 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
1088} 1075}
@@ -1590,9 +1577,9 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
1590 } 1577 }
1591} 1578}
1592 1579
1593void ieee80211_dynamic_ps_timer(unsigned long data) 1580void ieee80211_dynamic_ps_timer(struct timer_list *t)
1594{ 1581{
1595 struct ieee80211_local *local = (void *) data; 1582 struct ieee80211_local *local = from_timer(local, t, dynamic_ps_timer);
1596 1583
1597 ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); 1584 ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work);
1598} 1585}
@@ -3724,10 +3711,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
3724 sdata_unlock(sdata); 3711 sdata_unlock(sdata);
3725} 3712}
3726 3713
3727static void ieee80211_sta_timer(unsigned long data) 3714static void ieee80211_sta_timer(struct timer_list *t)
3728{ 3715{
3729 struct ieee80211_sub_if_data *sdata = 3716 struct ieee80211_sub_if_data *sdata =
3730 (struct ieee80211_sub_if_data *) data; 3717 from_timer(sdata, t, u.mgd.timer);
3731 3718
3732 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 3719 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
3733} 3720}
@@ -4004,10 +3991,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
4004 sdata_unlock(sdata); 3991 sdata_unlock(sdata);
4005} 3992}
4006 3993
4007static void ieee80211_sta_bcn_mon_timer(unsigned long data) 3994static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
4008{ 3995{
4009 struct ieee80211_sub_if_data *sdata = 3996 struct ieee80211_sub_if_data *sdata =
4010 (struct ieee80211_sub_if_data *) data; 3997 from_timer(sdata, t, u.mgd.bcn_mon_timer);
4011 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 3998 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
4012 3999
4013 if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) 4000 if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn)
@@ -4018,10 +4005,10 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
4018 &sdata->u.mgd.beacon_connection_loss_work); 4005 &sdata->u.mgd.beacon_connection_loss_work);
4019} 4006}
4020 4007
4021static void ieee80211_sta_conn_mon_timer(unsigned long data) 4008static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
4022{ 4009{
4023 struct ieee80211_sub_if_data *sdata = 4010 struct ieee80211_sub_if_data *sdata =
4024 (struct ieee80211_sub_if_data *) data; 4011 from_timer(sdata, t, u.mgd.conn_mon_timer);
4025 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 4012 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
4026 struct ieee80211_local *local = sdata->local; 4013 struct ieee80211_local *local = sdata->local;
4027 4014
@@ -4152,14 +4139,10 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
4152 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work); 4139 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work);
4153 INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work, 4140 INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work,
4154 ieee80211_tdls_peer_del_work); 4141 ieee80211_tdls_peer_del_work);
4155 setup_timer(&ifmgd->timer, ieee80211_sta_timer, 4142 timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0);
4156 (unsigned long) sdata); 4143 timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0);
4157 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 4144 timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0);
4158 (unsigned long) sdata); 4145 timer_setup(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, 0);
4159 setup_timer(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer,
4160 (unsigned long) sdata);
4161 setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer,
4162 (unsigned long) sdata);
4163 INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk, 4146 INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk,
4164 ieee80211_sta_handle_tspec_ac_params_wk); 4147 ieee80211_sta_handle_tspec_ac_params_wk);
4165 4148
@@ -4317,7 +4300,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
4317 4300
4318 ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, 4301 ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
4319 cbss->channel, 4302 cbss->channel,
4320 ht_cap, ht_oper, vht_oper, 4303 ht_oper, vht_oper,
4321 &chandef, false); 4304 &chandef, false);
4322 4305
4323 sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), 4306 sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 88e6ebbbe24f..d351dc1162be 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -150,9 +150,10 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata)
150 sdata_unlock(sdata); 150 sdata_unlock(sdata);
151} 151}
152 152
153static void ieee80211_ocb_housekeeping_timer(unsigned long data) 153static void ieee80211_ocb_housekeeping_timer(struct timer_list *t)
154{ 154{
155 struct ieee80211_sub_if_data *sdata = (void *)data; 155 struct ieee80211_sub_if_data *sdata =
156 from_timer(sdata, t, u.ocb.housekeeping_timer);
156 struct ieee80211_local *local = sdata->local; 157 struct ieee80211_local *local = sdata->local;
157 struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; 158 struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
158 159
@@ -165,9 +166,8 @@ void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata)
165{ 166{
166 struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; 167 struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
167 168
168 setup_timer(&ifocb->housekeeping_timer, 169 timer_setup(&ifocb->housekeeping_timer,
169 ieee80211_ocb_housekeeping_timer, 170 ieee80211_ocb_housekeeping_timer, 0);
170 (unsigned long)sdata);
171 INIT_LIST_HEAD(&ifocb->incomplete_stations); 171 INIT_LIST_HEAD(&ifocb->incomplete_stations);
172 spin_lock_init(&ifocb->incomplete_lock); 172 spin_lock_init(&ifocb->incomplete_lock);
173} 173}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 47d2ed570470..ef2becaade50 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,7 +7,7 @@
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2013-2015 Intel Mobile Communications GmbH 9 * Copyright 2013-2015 Intel Mobile Communications GmbH
10 * Copyright 2016 Intel Deutschland GmbH 10 * Copyright 2016-2017 Intel Deutschland GmbH
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
183 return bss; 183 return bss;
184} 184}
185 185
186static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
187 u32 scan_flags, const u8 *da)
188{
189 if (!sdata)
190 return false;
191 /* accept broadcast for OCE */
192 if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
193 is_broadcast_ether_addr(da))
194 return true;
195 if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
196 return true;
197 return ether_addr_equal(da, sdata->vif.addr);
198}
199
186void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) 200void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
187{ 201{
188 struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); 202 struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
@@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
208 if (ieee80211_is_probe_resp(mgmt->frame_control)) { 222 if (ieee80211_is_probe_resp(mgmt->frame_control)) {
209 struct cfg80211_scan_request *scan_req; 223 struct cfg80211_scan_request *scan_req;
210 struct cfg80211_sched_scan_request *sched_scan_req; 224 struct cfg80211_sched_scan_request *sched_scan_req;
225 u32 scan_req_flags = 0, sched_scan_req_flags = 0;
211 226
212 scan_req = rcu_dereference(local->scan_req); 227 scan_req = rcu_dereference(local->scan_req);
213 sched_scan_req = rcu_dereference(local->sched_scan_req); 228 sched_scan_req = rcu_dereference(local->sched_scan_req);
214 229
215 /* ignore ProbeResp to foreign address unless scanning 230 if (scan_req)
216 * with randomised address 231 scan_req_flags = scan_req->flags;
232
233 if (sched_scan_req)
234 sched_scan_req_flags = sched_scan_req->flags;
235
236 /* ignore ProbeResp to foreign address or non-bcast (OCE)
237 * unless scanning with randomised address
217 */ 238 */
218 if (!(sdata1 && 239 if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
219 (ether_addr_equal(mgmt->da, sdata1->vif.addr) || 240 mgmt->da) &&
220 scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) && 241 !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
221 !(sdata2 && 242 mgmt->da))
222 (ether_addr_equal(mgmt->da, sdata2->vif.addr) ||
223 sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)))
224 return; 243 return;
225 244
226 elements = mgmt->u.probe_resp.variable; 245 elements = mgmt->u.probe_resp.variable;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 214d2ba02877..0c5627f8a104 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -329,10 +329,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
329 sta->mesh = kzalloc(sizeof(*sta->mesh), gfp); 329 sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
330 if (!sta->mesh) 330 if (!sta->mesh)
331 goto free; 331 goto free;
332 sta->mesh->plink_sta = sta;
332 spin_lock_init(&sta->mesh->plink_lock); 333 spin_lock_init(&sta->mesh->plink_lock);
333 if (ieee80211_vif_is_mesh(&sdata->vif) && 334 if (ieee80211_vif_is_mesh(&sdata->vif) &&
334 !sdata->u.mesh.user_mpm) 335 !sdata->u.mesh.user_mpm)
335 init_timer(&sta->mesh->plink_timer); 336 timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
337 0);
336 sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; 338 sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
337 } 339 }
338#endif 340#endif
@@ -377,14 +379,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
377 if (sta_prepare_rate_control(local, sta, gfp)) 379 if (sta_prepare_rate_control(local, sta, gfp))
378 goto free_txq; 380 goto free_txq;
379 381
380 for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
381 /*
382 * timer_to_tid must be initialized with identity mapping
383 * to enable session_timer's data differentiation. See
384 * sta_rx_agg_session_timer_expired for usage.
385 */
386 sta->timer_to_tid[i] = i;
387 }
388 for (i = 0; i < IEEE80211_NUM_ACS; i++) { 382 for (i = 0; i < IEEE80211_NUM_ACS; i++) {
389 skb_queue_head_init(&sta->ps_tx_buf[i]); 383 skb_queue_head_init(&sta->ps_tx_buf[i]);
390 skb_queue_head_init(&sta->tx_filtered[i]); 384 skb_queue_head_init(&sta->tx_filtered[i]);
@@ -515,6 +509,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local,
515 return err; 509 return err;
516} 510}
517 511
512static void
513ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
514{
515 struct ieee80211_local *local = sdata->local;
516 bool allow_p2p_go_ps = sdata->vif.p2p;
517 struct sta_info *sta;
518
519 rcu_read_lock();
520 list_for_each_entry_rcu(sta, &local->sta_list, list) {
521 if (sdata != sta->sdata ||
522 !test_sta_flag(sta, WLAN_STA_ASSOC))
523 continue;
524 if (!sta->sta.support_p2p_ps) {
525 allow_p2p_go_ps = false;
526 break;
527 }
528 }
529 rcu_read_unlock();
530
531 if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
532 sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
533 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
534 }
535}
536
518/* 537/*
519 * should be called with sta_mtx locked 538 * should be called with sta_mtx locked
520 * this function replaces the mutex lock 539 * this function replaces the mutex lock
@@ -561,6 +580,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
561 goto out_remove; 580 goto out_remove;
562 581
563 set_sta_flag(sta, WLAN_STA_INSERTED); 582 set_sta_flag(sta, WLAN_STA_INSERTED);
583
584 if (sta->sta_state >= IEEE80211_STA_ASSOC) {
585 ieee80211_recalc_min_chandef(sta->sdata);
586 if (!sta->sta.support_p2p_ps)
587 ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
588 }
589
564 /* accept BA sessions now */ 590 /* accept BA sessions now */
565 clear_sta_flag(sta, WLAN_STA_BLOCK_BA); 591 clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
566 592
@@ -1030,9 +1056,9 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata,
1030 return ret; 1056 return ret;
1031} 1057}
1032 1058
1033static void sta_info_cleanup(unsigned long data) 1059static void sta_info_cleanup(struct timer_list *t)
1034{ 1060{
1035 struct ieee80211_local *local = (struct ieee80211_local *) data; 1061 struct ieee80211_local *local = from_timer(local, t, sta_cleanup);
1036 struct sta_info *sta; 1062 struct sta_info *sta;
1037 bool timer_needed = false; 1063 bool timer_needed = false;
1038 1064
@@ -1064,8 +1090,7 @@ int sta_info_init(struct ieee80211_local *local)
1064 mutex_init(&local->sta_mtx); 1090 mutex_init(&local->sta_mtx);
1065 INIT_LIST_HEAD(&local->sta_list); 1091 INIT_LIST_HEAD(&local->sta_list);
1066 1092
1067 setup_timer(&local->sta_cleanup, sta_info_cleanup, 1093 timer_setup(&local->sta_cleanup, sta_info_cleanup, 0);
1068 (unsigned long)local);
1069 return 0; 1094 return 0;
1070} 1095}
1071 1096
@@ -1788,31 +1813,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
1788} 1813}
1789EXPORT_SYMBOL(ieee80211_sta_set_buffered); 1814EXPORT_SYMBOL(ieee80211_sta_set_buffered);
1790 1815
1791static void
1792ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
1793{
1794 struct ieee80211_local *local = sdata->local;
1795 bool allow_p2p_go_ps = sdata->vif.p2p;
1796 struct sta_info *sta;
1797
1798 rcu_read_lock();
1799 list_for_each_entry_rcu(sta, &local->sta_list, list) {
1800 if (sdata != sta->sdata ||
1801 !test_sta_flag(sta, WLAN_STA_ASSOC))
1802 continue;
1803 if (!sta->sta.support_p2p_ps) {
1804 allow_p2p_go_ps = false;
1805 break;
1806 }
1807 }
1808 rcu_read_unlock();
1809
1810 if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
1811 sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
1812 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
1813 }
1814}
1815
1816int sta_info_move_state(struct sta_info *sta, 1816int sta_info_move_state(struct sta_info *sta,
1817 enum ieee80211_sta_state new_state) 1817 enum ieee80211_sta_state new_state)
1818{ 1818{
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 3acbdfa9f649..cd53619435b6 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -126,6 +126,8 @@ enum ieee80211_agg_stop_reason {
126 AGG_STOP_DESTROY_STA, 126 AGG_STOP_DESTROY_STA,
127}; 127};
128 128
129struct sta_info;
130
129/** 131/**
130 * struct tid_ampdu_tx - TID aggregation information (Tx). 132 * struct tid_ampdu_tx - TID aggregation information (Tx).
131 * 133 *
@@ -133,8 +135,10 @@ enum ieee80211_agg_stop_reason {
133 * @session_timer: check if we keep Tx-ing on the TID (by timeout value) 135 * @session_timer: check if we keep Tx-ing on the TID (by timeout value)
134 * @addba_resp_timer: timer for peer's response to addba request 136 * @addba_resp_timer: timer for peer's response to addba request
135 * @pending: pending frames queue -- use sta's spinlock to protect 137 * @pending: pending frames queue -- use sta's spinlock to protect
138 * @sta: station we are attached to
136 * @dialog_token: dialog token for aggregation session 139 * @dialog_token: dialog token for aggregation session
137 * @timeout: session timeout value to be filled in ADDBA requests 140 * @timeout: session timeout value to be filled in ADDBA requests
141 * @tid: TID number
138 * @state: session state (see above) 142 * @state: session state (see above)
139 * @last_tx: jiffies of last tx activity 143 * @last_tx: jiffies of last tx activity
140 * @stop_initiator: initiator of a session stop 144 * @stop_initiator: initiator of a session stop
@@ -158,6 +162,7 @@ struct tid_ampdu_tx {
158 struct timer_list session_timer; 162 struct timer_list session_timer;
159 struct timer_list addba_resp_timer; 163 struct timer_list addba_resp_timer;
160 struct sk_buff_head pending; 164 struct sk_buff_head pending;
165 struct sta_info *sta;
161 unsigned long state; 166 unsigned long state;
162 unsigned long last_tx; 167 unsigned long last_tx;
163 u16 timeout; 168 u16 timeout;
@@ -169,6 +174,7 @@ struct tid_ampdu_tx {
169 u16 failed_bar_ssn; 174 u16 failed_bar_ssn;
170 bool bar_pending; 175 bool bar_pending;
171 bool amsdu; 176 bool amsdu;
177 u8 tid;
172}; 178};
173 179
174/** 180/**
@@ -181,12 +187,14 @@ struct tid_ampdu_tx {
181 * @reorder_time: jiffies when skb was added 187 * @reorder_time: jiffies when skb was added
182 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 188 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
183 * @reorder_timer: releases expired frames from the reorder buffer. 189 * @reorder_timer: releases expired frames from the reorder buffer.
190 * @sta: station we are attached to
184 * @last_rx: jiffies of last rx activity 191 * @last_rx: jiffies of last rx activity
185 * @head_seq_num: head sequence number in reordering buffer. 192 * @head_seq_num: head sequence number in reordering buffer.
186 * @stored_mpdu_num: number of MPDUs in reordering buffer 193 * @stored_mpdu_num: number of MPDUs in reordering buffer
187 * @ssn: Starting Sequence Number expected to be aggregated. 194 * @ssn: Starting Sequence Number expected to be aggregated.
188 * @buf_size: buffer size for incoming A-MPDUs 195 * @buf_size: buffer size for incoming A-MPDUs
189 * @timeout: reset timer value (in TUs). 196 * @timeout: reset timer value (in TUs).
197 * @tid: TID number
190 * @rcu_head: RCU head used for freeing this struct 198 * @rcu_head: RCU head used for freeing this struct
191 * @reorder_lock: serializes access to reorder buffer, see below. 199 * @reorder_lock: serializes access to reorder buffer, see below.
192 * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and 200 * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
@@ -208,6 +216,7 @@ struct tid_ampdu_rx {
208 u64 reorder_buf_filtered; 216 u64 reorder_buf_filtered;
209 struct sk_buff_head *reorder_buf; 217 struct sk_buff_head *reorder_buf;
210 unsigned long *reorder_time; 218 unsigned long *reorder_time;
219 struct sta_info *sta;
211 struct timer_list session_timer; 220 struct timer_list session_timer;
212 struct timer_list reorder_timer; 221 struct timer_list reorder_timer;
213 unsigned long last_rx; 222 unsigned long last_rx;
@@ -216,6 +225,7 @@ struct tid_ampdu_rx {
216 u16 ssn; 225 u16 ssn;
217 u16 buf_size; 226 u16 buf_size;
218 u16 timeout; 227 u16 timeout;
228 u8 tid;
219 u8 auto_seq:1, 229 u8 auto_seq:1,
220 removed:1, 230 removed:1,
221 started:1; 231 started:1;
@@ -344,6 +354,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
344 * @plink_state: peer link state 354 * @plink_state: peer link state
345 * @plink_timeout: timeout of peer link 355 * @plink_timeout: timeout of peer link
346 * @plink_timer: peer link watch timer 356 * @plink_timer: peer link watch timer
357 * @plink_sta: peer link watch timer's sta_info
347 * @t_offset: timing offset relative to this host 358 * @t_offset: timing offset relative to this host
348 * @t_offset_setpoint: reference timing offset of this sta to be used when 359 * @t_offset_setpoint: reference timing offset of this sta to be used when
349 * calculating clockdrift 360 * calculating clockdrift
@@ -356,6 +367,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
356 */ 367 */
357struct mesh_sta { 368struct mesh_sta {
358 struct timer_list plink_timer; 369 struct timer_list plink_timer;
370 struct sta_info *plink_sta;
359 371
360 s64 t_offset; 372 s64 t_offset;
361 s64 t_offset_setpoint; 373 s64 t_offset_setpoint;
@@ -398,7 +410,7 @@ struct ieee80211_sta_rx_stats {
398 u64 msdu[IEEE80211_NUM_TIDS + 1]; 410 u64 msdu[IEEE80211_NUM_TIDS + 1];
399}; 411};
400 412
401/** 413/*
402 * The bandwidth threshold below which the per-station CoDel parameters will be 414 * The bandwidth threshold below which the per-station CoDel parameters will be
403 * scaled to be more lenient (to prevent starvation of slow stations). This 415 * scaled to be more lenient (to prevent starvation of slow stations). This
404 * value will be scaled by the number of active stations when it is being 416 * value will be scaled by the number of active stations when it is being
@@ -445,7 +457,6 @@ struct ieee80211_sta_rx_stats {
445 * plus one for non-QoS frames) 457 * plus one for non-QoS frames)
446 * @tid_seq: per-TID sequence numbers for sending to this STA 458 * @tid_seq: per-TID sequence numbers for sending to this STA
447 * @ampdu_mlme: A-MPDU state machine state 459 * @ampdu_mlme: A-MPDU state machine state
448 * @timer_to_tid: identity mapping to ID timers
449 * @mesh: mesh STA information 460 * @mesh: mesh STA information
450 * @debugfs_dir: debug filesystem directory dentry 461 * @debugfs_dir: debug filesystem directory dentry
451 * @dead: set to true when sta is unlinked 462 * @dead: set to true when sta is unlinked
@@ -552,7 +563,6 @@ struct sta_info {
552 * Aggregation information, locked with lock. 563 * Aggregation information, locked with lock.
553 */ 564 */
554 struct sta_ampdu_mlme ampdu_mlme; 565 struct sta_ampdu_mlme ampdu_mlme;
555 u8 timer_to_tid[IEEE80211_NUM_TIDS];
556 566
557#ifdef CONFIG_MAC80211_DEBUGFS 567#ifdef CONFIG_MAC80211_DEBUGFS
558 struct dentry *debugfs_dir; 568 struct dentry *debugfs_dir;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 94826680cf2b..7b8154474b9e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1396,6 +1396,40 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
1396 fq_flow_get_default_func); 1396 fq_flow_get_default_func);
1397} 1397}
1398 1398
1399static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
1400 struct fq_flow *flow, struct sk_buff *skb,
1401 void *data)
1402{
1403 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1404
1405 return info->control.vif == data;
1406}
1407
1408void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
1409 struct ieee80211_sub_if_data *sdata)
1410{
1411 struct fq *fq = &local->fq;
1412 struct txq_info *txqi;
1413 struct fq_tin *tin;
1414 struct ieee80211_sub_if_data *ap;
1415
1416 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
1417 return;
1418
1419 ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
1420
1421 if (!ap->vif.txq)
1422 return;
1423
1424 txqi = to_txq_info(ap->vif.txq);
1425 tin = &txqi->tin;
1426
1427 spin_lock_bh(&fq->lock);
1428 fq_tin_filter(fq, tin, fq_vlan_filter_func, &sdata->vif,
1429 fq_skb_free_func);
1430 spin_unlock_bh(&fq->lock);
1431}
1432
1399void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, 1433void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
1400 struct sta_info *sta, 1434 struct sta_info *sta,
1401 struct txq_info *txqi, int tid) 1435 struct txq_info *txqi, int tid)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6aef6793d052..d57e5f6bd8b6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
1392 /* insert custom IEs that go before HT */ 1392 /* insert custom IEs that go before HT */
1393 if (ie && ie_len) { 1393 if (ie && ie_len) {
1394 static const u8 before_ht[] = { 1394 static const u8 before_ht[] = {
1395 WLAN_EID_SSID, 1395 /*
1396 WLAN_EID_SUPP_RATES, 1396 * no need to list the ones split off already
1397 WLAN_EID_REQUEST, 1397 * (or generated here)
1398 WLAN_EID_EXT_SUPP_RATES, 1398 */
1399 WLAN_EID_DS_PARAMS, 1399 WLAN_EID_DS_PARAMS,
1400 WLAN_EID_SUPPORTED_REGULATORY_CLASSES, 1400 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
1401 }; 1401 };
@@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
1424 /* insert custom IEs that go before VHT */ 1424 /* insert custom IEs that go before VHT */
1425 if (ie && ie_len) { 1425 if (ie && ie_len) {
1426 static const u8 before_vht[] = { 1426 static const u8 before_vht[] = {
1427 WLAN_EID_SSID, 1427 /*
1428 WLAN_EID_SUPP_RATES, 1428 * no need to list the ones split off already
1429 WLAN_EID_REQUEST, 1429 * (or generated here)
1430 WLAN_EID_EXT_SUPP_RATES, 1430 */
1431 WLAN_EID_DS_PARAMS,
1432 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
1433 WLAN_EID_HT_CAPABILITY,
1434 WLAN_EID_BSS_COEX_2040, 1431 WLAN_EID_BSS_COEX_2040,
1435 WLAN_EID_EXT_CAPABILITY, 1432 WLAN_EID_EXT_CAPABILITY,
1436 WLAN_EID_SSID_LIST, 1433 WLAN_EID_SSID_LIST,
1437 WLAN_EID_CHANNEL_USAGE, 1434 WLAN_EID_CHANNEL_USAGE,
1438 WLAN_EID_INTERWORKING, 1435 WLAN_EID_INTERWORKING,
1439 WLAN_EID_MESH_ID, 1436 WLAN_EID_MESH_ID,
1440 /* 60 GHz can't happen here right now */ 1437 /* 60 GHz (Multi-band, DMG, MMS) can't happen */
1441 }; 1438 };
1442 noffset = ieee80211_ie_split(ie, ie_len, 1439 noffset = ieee80211_ie_split(ie, ie_len,
1443 before_vht, ARRAY_SIZE(before_vht), 1440 before_vht, ARRAY_SIZE(before_vht),
@@ -2980,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
2980 struct ieee80211_mgmt *mgmt; 2977 struct ieee80211_mgmt *mgmt;
2981 struct ieee80211_local *local = sdata->local; 2978 struct ieee80211_local *local = sdata->local;
2982 int freq; 2979 int freq;
2983 int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) + 2980 int hdr_len = offsetofend(struct ieee80211_mgmt,
2984 sizeof(mgmt->u.action.u.chan_switch); 2981 u.action.u.chan_switch);
2985 u8 *pos; 2982 u8 *pos;
2986 2983
2987 if (sdata->vif.type != NL80211_IFTYPE_ADHOC && 2984 if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 19ec2189d3ac..b9276ac849fa 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
386 386
387 bw = ieee80211_sta_cap_rx_bw(sta); 387 bw = ieee80211_sta_cap_rx_bw(sta);
388 bw = min(bw, sta->cur_max_bandwidth); 388 bw = min(bw, sta->cur_max_bandwidth);
389
390 /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
391 * IEEE80211-2016 specification makes higher bandwidth operation
392 * possible on the TDLS link if the peers have wider bandwidth
393 * capability.
394 */
395 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
396 test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
397 return bw;
398
389 bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); 399 bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
390 400
391 return bw; 401 return bw;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 0d722ea98a1b..b58722d9de37 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -464,7 +464,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
464 pos += IEEE80211_CCMP_HDR_LEN; 464 pos += IEEE80211_CCMP_HDR_LEN;
465 ccmp_special_blocks(skb, pn, b_0, aad); 465 ccmp_special_blocks(skb, pn, b_0, aad);
466 return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, 466 return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
467 skb_put(skb, mic_len), mic_len); 467 skb_put(skb, mic_len));
468} 468}
469 469
470 470
@@ -543,7 +543,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
543 key->u.ccmp.tfm, b_0, aad, 543 key->u.ccmp.tfm, b_0, aad,
544 skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, 544 skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
545 data_len, 545 data_len,
546 skb->data + skb->len - mic_len, mic_len)) 546 skb->data + skb->len - mic_len))
547 return RX_DROP_UNUSABLE; 547 return RX_DROP_UNUSABLE;
548 } 548 }
549 549
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 1e1c9b20bab7..2fb703d70803 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
623 u8 iv[16]; 623 u8 iv[16];
624 struct scatterlist src; 624 struct scatterlist src;
625 SKCIPHER_REQUEST_ON_STACK(req, key->tfm0); 625 SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
626 int err; 626 int err, datalen;
627 unsigned char *data;
627 628
628 llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); 629 llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
629 sg_init_one(&src, skb->data, skb->len); 630 /* Compute data payload offset and data length */
631 data = skb_mac_header(skb) + skb->mac_len;
632 datalen = skb_tail_pointer(skb) - data;
633 sg_init_one(&src, data, datalen);
634
630 skcipher_request_set_tfm(req, key->tfm0); 635 skcipher_request_set_tfm(req, key->tfm0);
631 skcipher_request_set_callback(req, 0, NULL, NULL); 636 skcipher_request_set_callback(req, 0, NULL, NULL);
632 skcipher_request_set_crypt(req, &src, &src, skb->len, iv); 637 skcipher_request_set_crypt(req, &src, &src, datalen, iv);
633 err = crypto_skcipher_encrypt(req); 638 err = crypto_skcipher_encrypt(req);
634 skcipher_request_zero(req); 639 skcipher_request_zero(req);
635 return err; 640 return err;
@@ -713,7 +718,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
713 if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA) 718 if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
714 return -EINVAL; 719 return -EINVAL;
715 720
716 if (!hdr.fc.security_enabled || hdr.sec.level == 0) { 721 if (!hdr.fc.security_enabled ||
722 (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) {
717 skb_push(skb, hlen); 723 skb_push(skb, hlen);
718 return 0; 724 return 0;
719 } 725 }
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 5c467ef97311..801ea9098387 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -24,6 +24,7 @@ config NET_MPLS_GSO
24 24
25config MPLS_ROUTING 25config MPLS_ROUTING
26 tristate "MPLS: routing support" 26 tristate "MPLS: routing support"
27 depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
27 ---help--- 28 ---help---
28 Add support for forwarding of mpls packets. 29 Add support for forwarding of mpls packets.
29 30
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c5b9ce41d66f..8ca9915befc8 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -16,6 +16,7 @@
16#include <net/arp.h> 16#include <net/arp.h>
17#include <net/ip_fib.h> 17#include <net/ip_fib.h>
18#include <net/netevent.h> 18#include <net/netevent.h>
19#include <net/ip_tunnels.h>
19#include <net/netns/generic.h> 20#include <net/netns/generic.h>
20#if IS_ENABLED(CONFIG_IPV6) 21#if IS_ENABLED(CONFIG_IPV6)
21#include <net/ipv6.h> 22#include <net/ipv6.h>
@@ -39,6 +40,36 @@ static int one = 1;
39static int label_limit = (1 << 20) - 1; 40static int label_limit = (1 << 20) - 1;
40static int ttl_max = 255; 41static int ttl_max = 255;
41 42
43#if IS_ENABLED(CONFIG_NET_IP_TUNNEL)
44static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
45{
46 return sizeof(struct mpls_shim_hdr);
47}
48
49static const struct ip_tunnel_encap_ops mpls_iptun_ops = {
50 .encap_hlen = ipgre_mpls_encap_hlen,
51};
52
53static int ipgre_tunnel_encap_add_mpls_ops(void)
54{
55 return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
56}
57
58static void ipgre_tunnel_encap_del_mpls_ops(void)
59{
60 ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
61}
62#else
63static int ipgre_tunnel_encap_add_mpls_ops(void)
64{
65 return 0;
66}
67
68static void ipgre_tunnel_encap_del_mpls_ops(void)
69{
70}
71#endif
72
42static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 73static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
43 struct nlmsghdr *nlh, struct net *net, u32 portid, 74 struct nlmsghdr *nlh, struct net *net, u32 portid,
44 unsigned int nlm_flags); 75 unsigned int nlm_flags);
@@ -2485,6 +2516,10 @@ static int __init mpls_init(void)
2485 0); 2516 0);
2486 rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, 2517 rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
2487 mpls_netconf_dump_devconf, 0); 2518 mpls_netconf_dump_devconf, 0);
2519 err = ipgre_tunnel_encap_add_mpls_ops();
2520 if (err)
2521 pr_err("Can't add mpls over gre tunnel ops\n");
2522
2488 err = 0; 2523 err = 0;
2489out: 2524out:
2490 return err; 2525 return err;
@@ -2502,6 +2537,7 @@ static void __exit mpls_exit(void)
2502 dev_remove_pack(&mpls_packet_type); 2537 dev_remove_pack(&mpls_packet_type);
2503 unregister_netdevice_notifier(&mpls_dev_notifier); 2538 unregister_netdevice_notifier(&mpls_dev_notifier);
2504 unregister_pernet_subsys(&mpls_net_ops); 2539 unregister_pernet_subsys(&mpls_net_ops);
2540 ipgre_tunnel_encap_del_mpls_ops();
2505} 2541}
2506module_exit(mpls_exit); 2542module_exit(mpls_exit);
2507 2543
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index f135938bf781..67e708e98ccf 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
73 ncm->data[2] = data; 73 ncm->data[2] = data;
74 ncm->data[4] = ntohl(lsc->oem_status); 74 ncm->data[4] = ntohl(lsc->oem_status);
75 75
76 netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
77 nc->id, data & 0x1 ? "up" : "down");
78
76 chained = !list_empty(&nc->link); 79 chained = !list_empty(&nc->link);
77 state = nc->state; 80 state = nc->state;
78 spin_unlock_irqrestore(&nc->lock, flags); 81 spin_unlock_irqrestore(&nc->lock, flags);
@@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
145 ncm = &nc->modes[NCSI_MODE_LINK]; 148 ncm = &nc->modes[NCSI_MODE_LINK];
146 hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; 149 hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
147 ncm->data[3] = ntohl(hncdsc->status); 150 ncm->data[3] = ntohl(hncdsc->status);
151 netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n",
152 nc->id, ncm->data[3] & 0x3 ? "up" : "down");
148 if (!list_empty(&nc->link) || 153 if (!list_empty(&nc->link) ||
149 nc->state != NCSI_CHANNEL_ACTIVE) { 154 nc->state != NCSI_CHANNEL_ACTIVE) {
150 spin_unlock_irqrestore(&nc->lock, flags); 155 spin_unlock_irqrestore(&nc->lock, flags);
@@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
212 } 217 }
213 218
214 ret = ncsi_validate_aen_pkt(h, nah->payload); 219 ret = ncsi_validate_aen_pkt(h, nah->payload);
215 if (ret) 220 if (ret) {
221 netdev_warn(ndp->ndev.dev,
222 "NCSI: 'bad' packet ignored for AEN type 0x%x\n",
223 h->type);
216 goto out; 224 goto out;
225 }
217 226
218 ret = nah->handler(ndp, h); 227 ret = nah->handler(ndp, h);
228 if (ret)
229 netdev_err(ndp->ndev.dev,
230 "NCSI: Handler for AEN type 0x%x returned %d\n",
231 h->type, ret);
219out: 232out:
220 consume_skb(skb); 233 consume_skb(skb);
221 return ret; 234 return ret;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 28c42b22b748..c989211bbabc 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table)
38 return sizes[table]; 38 return sizes[table];
39} 39}
40 40
41u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) 41static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
42{ 42{
43 struct ncsi_channel_filter *ncf; 43 struct ncsi_channel_filter *ncf;
44 int size; 44 int size;
@@ -184,9 +184,9 @@ report:
184 nd->handler(nd); 184 nd->handler(nd);
185} 185}
186 186
187static void ncsi_channel_monitor(unsigned long data) 187static void ncsi_channel_monitor(struct timer_list *t)
188{ 188{
189 struct ncsi_channel *nc = (struct ncsi_channel *)data; 189 struct ncsi_channel *nc = from_timer(nc, t, monitor.timer);
190 struct ncsi_package *np = nc->package; 190 struct ncsi_package *np = nc->package;
191 struct ncsi_dev_priv *ndp = np->ndp; 191 struct ncsi_dev_priv *ndp = np->ndp;
192 struct ncsi_channel_mode *ncm; 192 struct ncsi_channel_mode *ncm;
@@ -229,6 +229,8 @@ static void ncsi_channel_monitor(unsigned long data)
229 case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: 229 case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
230 break; 230 break;
231 default: 231 default:
232 netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
233 nc->id);
232 if (!(ndp->flags & NCSI_DEV_HWA)) { 234 if (!(ndp->flags & NCSI_DEV_HWA)) {
233 ncsi_report_link(ndp, true); 235 ncsi_report_link(ndp, true);
234 ndp->flags |= NCSI_DEV_RESHUFFLE; 236 ndp->flags |= NCSI_DEV_RESHUFFLE;
@@ -311,8 +313,7 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id)
311 nc->package = np; 313 nc->package = np;
312 nc->state = NCSI_CHANNEL_INACTIVE; 314 nc->state = NCSI_CHANNEL_INACTIVE;
313 nc->monitor.enabled = false; 315 nc->monitor.enabled = false;
314 setup_timer(&nc->monitor.timer, 316 timer_setup(&nc->monitor.timer, ncsi_channel_monitor, 0);
315 ncsi_channel_monitor, (unsigned long)nc);
316 spin_lock_init(&nc->lock); 317 spin_lock_init(&nc->lock);
317 INIT_LIST_HEAD(&nc->link); 318 INIT_LIST_HEAD(&nc->link);
318 for (index = 0; index < NCSI_CAP_MAX; index++) 319 for (index = 0; index < NCSI_CAP_MAX; index++)
@@ -527,9 +528,9 @@ struct ncsi_dev *ncsi_find_dev(struct net_device *dev)
527 return NULL; 528 return NULL;
528} 529}
529 530
530static void ncsi_request_timeout(unsigned long data) 531static void ncsi_request_timeout(struct timer_list *t)
531{ 532{
532 struct ncsi_request *nr = (struct ncsi_request *)data; 533 struct ncsi_request *nr = from_timer(nr, t, timer);
533 struct ncsi_dev_priv *ndp = nr->ndp; 534 struct ncsi_dev_priv *ndp = nr->ndp;
534 unsigned long flags; 535 unsigned long flags;
535 536
@@ -682,7 +683,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
682 data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index); 683 data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
683 if (!data) { 684 if (!data) {
684 netdev_err(ndp->ndev.dev, 685 netdev_err(ndp->ndev.dev,
685 "ncsi: failed to retrieve filter %d\n", index); 686 "NCSI: failed to retrieve filter %d\n", index);
686 /* Set the VLAN id to 0 - this will still disable the entry in 687 /* Set the VLAN id to 0 - this will still disable the entry in
687 * the filter table, but we won't know what it was. 688 * the filter table, but we won't know what it was.
688 */ 689 */
@@ -692,7 +693,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
692 } 693 }
693 694
694 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 695 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
695 "ncsi: removed vlan tag %u at index %d\n", 696 "NCSI: removed vlan tag %u at index %d\n",
696 vid, index + 1); 697 vid, index + 1);
697 ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index); 698 ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
698 699
@@ -718,7 +719,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
718 if (index < 0) { 719 if (index < 0) {
719 /* New tag to add */ 720 /* New tag to add */
720 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 721 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
721 "ncsi: new vlan id to set: %u\n", 722 "NCSI: new vlan id to set: %u\n",
722 vlan->vid); 723 vlan->vid);
723 break; 724 break;
724 } 725 }
@@ -745,7 +746,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
745 } 746 }
746 747
747 netdev_printk(KERN_DEBUG, ndp->ndev.dev, 748 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
748 "ncsi: set vid %u in packet, index %u\n", 749 "NCSI: set vid %u in packet, index %u\n",
749 vlan->vid, index + 1); 750 vlan->vid, index + 1);
750 nca->type = NCSI_PKT_CMD_SVF; 751 nca->type = NCSI_PKT_CMD_SVF;
751 nca->words[1] = vlan->vid; 752 nca->words[1] = vlan->vid;
@@ -784,8 +785,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
784 nca.package = np->id; 785 nca.package = np->id;
785 nca.channel = NCSI_RESERVED_CHANNEL; 786 nca.channel = NCSI_RESERVED_CHANNEL;
786 ret = ncsi_xmit_cmd(&nca); 787 ret = ncsi_xmit_cmd(&nca);
787 if (ret) 788 if (ret) {
789 netdev_err(ndp->ndev.dev,
790 "NCSI: Failed to transmit CMD_SP\n");
788 goto error; 791 goto error;
792 }
789 793
790 nd->state = ncsi_dev_state_config_cis; 794 nd->state = ncsi_dev_state_config_cis;
791 break; 795 break;
@@ -797,8 +801,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
797 nca.package = np->id; 801 nca.package = np->id;
798 nca.channel = nc->id; 802 nca.channel = nc->id;
799 ret = ncsi_xmit_cmd(&nca); 803 ret = ncsi_xmit_cmd(&nca);
800 if (ret) 804 if (ret) {
805 netdev_err(ndp->ndev.dev,
806 "NCSI: Failed to transmit CMD_CIS\n");
801 goto error; 807 goto error;
808 }
802 809
803 nd->state = ncsi_dev_state_config_clear_vids; 810 nd->state = ncsi_dev_state_config_clear_vids;
804 break; 811 break;
@@ -895,10 +902,16 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
895 } 902 }
896 903
897 ret = ncsi_xmit_cmd(&nca); 904 ret = ncsi_xmit_cmd(&nca);
898 if (ret) 905 if (ret) {
906 netdev_err(ndp->ndev.dev,
907 "NCSI: Failed to transmit CMD %x\n",
908 nca.type);
899 goto error; 909 goto error;
910 }
900 break; 911 break;
901 case ncsi_dev_state_config_done: 912 case ncsi_dev_state_config_done:
913 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
914 "NCSI: channel %u config done\n", nc->id);
902 spin_lock_irqsave(&nc->lock, flags); 915 spin_lock_irqsave(&nc->lock, flags);
903 if (nc->reconfigure_needed) { 916 if (nc->reconfigure_needed) {
904 /* This channel's configuration has been updated 917 /* This channel's configuration has been updated
@@ -925,6 +938,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
925 } else { 938 } else {
926 hot_nc = NULL; 939 hot_nc = NULL;
927 nc->state = NCSI_CHANNEL_INACTIVE; 940 nc->state = NCSI_CHANNEL_INACTIVE;
941 netdev_warn(ndp->ndev.dev,
942 "NCSI: channel %u link down after config\n",
943 nc->id);
928 } 944 }
929 spin_unlock_irqrestore(&nc->lock, flags); 945 spin_unlock_irqrestore(&nc->lock, flags);
930 946
@@ -937,8 +953,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
937 ncsi_process_next_channel(ndp); 953 ncsi_process_next_channel(ndp);
938 break; 954 break;
939 default: 955 default:
940 netdev_warn(dev, "Wrong NCSI state 0x%x in config\n", 956 netdev_alert(dev, "Wrong NCSI state 0x%x in config\n",
941 nd->state); 957 nd->state);
942 } 958 }
943 959
944 return; 960 return;
@@ -990,10 +1006,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
990 } 1006 }
991 1007
992 if (!found) { 1008 if (!found) {
1009 netdev_warn(ndp->ndev.dev,
1010 "NCSI: No channel found with link\n");
993 ncsi_report_link(ndp, true); 1011 ncsi_report_link(ndp, true);
994 return -ENODEV; 1012 return -ENODEV;
995 } 1013 }
996 1014
1015 ncm = &found->modes[NCSI_MODE_LINK];
1016 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
1017 "NCSI: Channel %u added to queue (link %s)\n",
1018 found->id, ncm->data[2] & 0x1 ? "up" : "down");
1019
997out: 1020out:
998 spin_lock_irqsave(&ndp->lock, flags); 1021 spin_lock_irqsave(&ndp->lock, flags);
999 list_add_tail_rcu(&found->link, &ndp->channel_queue); 1022 list_add_tail_rcu(&found->link, &ndp->channel_queue);
@@ -1055,6 +1078,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
1055 1078
1056 /* We can have no channels in extremely case */ 1079 /* We can have no channels in extremely case */
1057 if (list_empty(&ndp->channel_queue)) { 1080 if (list_empty(&ndp->channel_queue)) {
1081 netdev_err(ndp->ndev.dev,
1082 "NCSI: No available channels for HWA\n");
1058 ncsi_report_link(ndp, false); 1083 ncsi_report_link(ndp, false);
1059 return -ENOENT; 1084 return -ENOENT;
1060 } 1085 }
@@ -1223,6 +1248,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
1223 1248
1224 return; 1249 return;
1225error: 1250error:
1251 netdev_err(ndp->ndev.dev,
1252 "NCSI: Failed to transmit cmd 0x%x during probe\n",
1253 nca.type);
1226 ncsi_report_link(ndp, true); 1254 ncsi_report_link(ndp, true);
1227} 1255}
1228 1256
@@ -1276,10 +1304,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
1276 switch (old_state) { 1304 switch (old_state) {
1277 case NCSI_CHANNEL_INACTIVE: 1305 case NCSI_CHANNEL_INACTIVE:
1278 ndp->ndev.state = ncsi_dev_state_config; 1306 ndp->ndev.state = ncsi_dev_state_config;
1307 netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n",
1308 nc->id);
1279 ncsi_configure_channel(ndp); 1309 ncsi_configure_channel(ndp);
1280 break; 1310 break;
1281 case NCSI_CHANNEL_ACTIVE: 1311 case NCSI_CHANNEL_ACTIVE:
1282 ndp->ndev.state = ncsi_dev_state_suspend; 1312 ndp->ndev.state = ncsi_dev_state_suspend;
1313 netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n",
1314 nc->id);
1283 ncsi_suspend_channel(ndp); 1315 ncsi_suspend_channel(ndp);
1284 break; 1316 break;
1285 default: 1317 default:
@@ -1299,6 +1331,8 @@ out:
1299 return ncsi_choose_active_channel(ndp); 1331 return ncsi_choose_active_channel(ndp);
1300 } 1332 }
1301 1333
1334 netdev_printk(KERN_DEBUG, ndp->ndev.dev,
1335 "NCSI: No more channels to process\n");
1302 ncsi_report_link(ndp, false); 1336 ncsi_report_link(ndp, false);
1303 return -ENODEV; 1337 return -ENODEV;
1304} 1338}
@@ -1390,7 +1424,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
1390 ncsi_dev_state_config || 1424 ncsi_dev_state_config ||
1391 !list_empty(&nc->link)) { 1425 !list_empty(&nc->link)) {
1392 netdev_printk(KERN_DEBUG, nd->dev, 1426 netdev_printk(KERN_DEBUG, nd->dev,
1393 "ncsi: channel %p marked dirty\n", 1427 "NCSI: channel %p marked dirty\n",
1394 nc); 1428 nc);
1395 nc->reconfigure_needed = true; 1429 nc->reconfigure_needed = true;
1396 } 1430 }
@@ -1410,7 +1444,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
1410 spin_unlock_irqrestore(&ndp->lock, flags); 1444 spin_unlock_irqrestore(&ndp->lock, flags);
1411 1445
1412 netdev_printk(KERN_DEBUG, nd->dev, 1446 netdev_printk(KERN_DEBUG, nd->dev,
1413 "ncsi: kicked channel %p\n", nc); 1447 "NCSI: kicked channel %p\n", nc);
1414 n++; 1448 n++;
1415 } 1449 }
1416 } 1450 }
@@ -1431,7 +1465,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
1431 1465
1432 nd = ncsi_find_dev(dev); 1466 nd = ncsi_find_dev(dev);
1433 if (!nd) { 1467 if (!nd) {
1434 netdev_warn(dev, "ncsi: No net_device?\n"); 1468 netdev_warn(dev, "NCSI: No net_device?\n");
1435 return 0; 1469 return 0;
1436 } 1470 }
1437 1471
@@ -1442,7 +1476,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
1442 n_vids++; 1476 n_vids++;
1443 if (vlan->vid == vid) { 1477 if (vlan->vid == vid) {
1444 netdev_printk(KERN_DEBUG, dev, 1478 netdev_printk(KERN_DEBUG, dev,
1445 "vid %u already registered\n", vid); 1479 "NCSI: vid %u already registered\n", vid);
1446 return 0; 1480 return 0;
1447 } 1481 }
1448 } 1482 }
@@ -1461,7 +1495,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
1461 vlan->vid = vid; 1495 vlan->vid = vid;
1462 list_add_rcu(&vlan->list, &ndp->vlan_vids); 1496 list_add_rcu(&vlan->list, &ndp->vlan_vids);
1463 1497
1464 netdev_printk(KERN_DEBUG, dev, "Added new vid %u\n", vid); 1498 netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid);
1465 1499
1466 found = ncsi_kick_channels(ndp) != 0; 1500 found = ncsi_kick_channels(ndp) != 0;
1467 1501
@@ -1481,7 +1515,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
1481 1515
1482 nd = ncsi_find_dev(dev); 1516 nd = ncsi_find_dev(dev);
1483 if (!nd) { 1517 if (!nd) {
1484 netdev_warn(dev, "ncsi: no net_device?\n"); 1518 netdev_warn(dev, "NCSI: no net_device?\n");
1485 return 0; 1519 return 0;
1486 } 1520 }
1487 1521
@@ -1491,14 +1525,14 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
1491 list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) 1525 list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
1492 if (vlan->vid == vid) { 1526 if (vlan->vid == vid) {
1493 netdev_printk(KERN_DEBUG, dev, 1527 netdev_printk(KERN_DEBUG, dev,
1494 "vid %u found, removing\n", vid); 1528 "NCSI: vid %u found, removing\n", vid);
1495 list_del_rcu(&vlan->list); 1529 list_del_rcu(&vlan->list);
1496 found = true; 1530 found = true;
1497 kfree(vlan); 1531 kfree(vlan);
1498 } 1532 }
1499 1533
1500 if (!found) { 1534 if (!found) {
1501 netdev_err(dev, "ncsi: vid %u wasn't registered!\n", vid); 1535 netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid);
1502 return -EINVAL; 1536 return -EINVAL;
1503 } 1537 }
1504 1538
@@ -1542,9 +1576,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
1542 for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { 1576 for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) {
1543 ndp->requests[i].id = i; 1577 ndp->requests[i].id = i;
1544 ndp->requests[i].ndp = ndp; 1578 ndp->requests[i].ndp = ndp;
1545 setup_timer(&ndp->requests[i].timer, 1579 timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0);
1546 ncsi_request_timeout,
1547 (unsigned long)&ndp->requests[i]);
1548 } 1580 }
1549 1581
1550 spin_lock_irqsave(&ncsi_dev_lock, flags); 1582 spin_lock_irqsave(&ncsi_dev_lock, flags);
@@ -1581,10 +1613,12 @@ int ncsi_start_dev(struct ncsi_dev *nd)
1581 return 0; 1613 return 0;
1582 } 1614 }
1583 1615
1584 if (ndp->flags & NCSI_DEV_HWA) 1616 if (ndp->flags & NCSI_DEV_HWA) {
1617 netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
1585 ret = ncsi_enable_hwa(ndp); 1618 ret = ncsi_enable_hwa(ndp);
1586 else 1619 } else {
1587 ret = ncsi_choose_active_channel(ndp); 1620 ret = ncsi_choose_active_channel(ndp);
1621 }
1588 1622
1589 return ret; 1623 return ret;
1590} 1624}
@@ -1615,6 +1649,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
1615 } 1649 }
1616 } 1650 }
1617 1651
1652 netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n");
1618 ncsi_report_link(ndp, true); 1653 ncsi_report_link(ndp, true);
1619} 1654}
1620EXPORT_SYMBOL_GPL(ncsi_stop_dev); 1655EXPORT_SYMBOL_GPL(ncsi_stop_dev);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 927dad4759d1..efd933ff5570 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr)
146 146
147 ncm = &nc->modes[NCSI_MODE_ENABLE]; 147 ncm = &nc->modes[NCSI_MODE_ENABLE];
148 if (ncm->enable) 148 if (ncm->enable)
149 return -EBUSY; 149 return 0;
150 150
151 ncm->enable = 1; 151 ncm->enable = 1;
152 return 0; 152 return 0;
@@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr)
173 173
174 ncm = &nc->modes[NCSI_MODE_ENABLE]; 174 ncm = &nc->modes[NCSI_MODE_ENABLE];
175 if (!ncm->enable) 175 if (!ncm->enable)
176 return -EBUSY; 176 return 0;
177 177
178 ncm->enable = 0; 178 ncm->enable = 0;
179 return 0; 179 return 0;
@@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr)
217 217
218 ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; 218 ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
219 if (ncm->enable) 219 if (ncm->enable)
220 return -EBUSY; 220 return 0;
221 221
222 ncm->enable = 1; 222 ncm->enable = 1;
223 return 0; 223 return 0;
@@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
239 239
240 ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; 240 ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
241 if (!ncm->enable) 241 if (!ncm->enable)
242 return -EBUSY; 242 return 0;
243 243
244 ncm->enable = 1; 244 ncm->enable = 1;
245 return 0; 245 return 0;
@@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr)
263 /* Check if the AEN has been enabled */ 263 /* Check if the AEN has been enabled */
264 ncm = &nc->modes[NCSI_MODE_AEN]; 264 ncm = &nc->modes[NCSI_MODE_AEN];
265 if (ncm->enable) 265 if (ncm->enable)
266 return -EBUSY; 266 return 0;
267 267
268 /* Update to AEN configuration */ 268 /* Update to AEN configuration */
269 cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd); 269 cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd);
@@ -382,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
382 /* Check if VLAN mode has been enabled */ 382 /* Check if VLAN mode has been enabled */
383 ncm = &nc->modes[NCSI_MODE_VLAN]; 383 ncm = &nc->modes[NCSI_MODE_VLAN];
384 if (ncm->enable) 384 if (ncm->enable)
385 return -EBUSY; 385 return 0;
386 386
387 /* Update to VLAN mode */ 387 /* Update to VLAN mode */
388 cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd); 388 cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
@@ -409,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr)
409 /* Check if VLAN mode has been enabled */ 409 /* Check if VLAN mode has been enabled */
410 ncm = &nc->modes[NCSI_MODE_VLAN]; 410 ncm = &nc->modes[NCSI_MODE_VLAN];
411 if (!ncm->enable) 411 if (!ncm->enable)
412 return -EBUSY; 412 return 0;
413 413
414 /* Update to VLAN mode */ 414 /* Update to VLAN mode */
415 ncm->enable = 0; 415 ncm->enable = 0;
@@ -455,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
455 455
456 bitmap = &ncf->bitmap; 456 bitmap = &ncf->bitmap;
457 if (cmd->at_e & 0x1) { 457 if (cmd->at_e & 0x1) {
458 if (test_and_set_bit(cmd->index, bitmap)) 458 set_bit(cmd->index, bitmap);
459 return -EBUSY;
460 memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6); 459 memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
461 } else { 460 } else {
462 if (!test_and_clear_bit(cmd->index, bitmap)) 461 clear_bit(cmd->index, bitmap);
463 return -EBUSY;
464
465 memset(ncf->data + 6 * cmd->index, 0, 6); 462 memset(ncf->data + 6 * cmd->index, 0, 6);
466 } 463 }
467 464
@@ -485,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr)
485 /* Check if broadcast filter has been enabled */ 482 /* Check if broadcast filter has been enabled */
486 ncm = &nc->modes[NCSI_MODE_BC]; 483 ncm = &nc->modes[NCSI_MODE_BC];
487 if (ncm->enable) 484 if (ncm->enable)
488 return -EBUSY; 485 return 0;
489 486
490 /* Update to broadcast filter mode */ 487 /* Update to broadcast filter mode */
491 cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd); 488 cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd);
@@ -511,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr)
511 /* Check if broadcast filter isn't enabled */ 508 /* Check if broadcast filter isn't enabled */
512 ncm = &nc->modes[NCSI_MODE_BC]; 509 ncm = &nc->modes[NCSI_MODE_BC];
513 if (!ncm->enable) 510 if (!ncm->enable)
514 return -EBUSY; 511 return 0;
515 512
516 /* Update to broadcast filter mode */ 513 /* Update to broadcast filter mode */
517 ncm->enable = 0; 514 ncm->enable = 0;
@@ -538,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr)
538 /* Check if multicast filter has been enabled */ 535 /* Check if multicast filter has been enabled */
539 ncm = &nc->modes[NCSI_MODE_MC]; 536 ncm = &nc->modes[NCSI_MODE_MC];
540 if (ncm->enable) 537 if (ncm->enable)
541 return -EBUSY; 538 return 0;
542 539
543 /* Update to multicast filter mode */ 540 /* Update to multicast filter mode */
544 cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd); 541 cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd);
@@ -564,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr)
564 /* Check if multicast filter has been enabled */ 561 /* Check if multicast filter has been enabled */
565 ncm = &nc->modes[NCSI_MODE_MC]; 562 ncm = &nc->modes[NCSI_MODE_MC];
566 if (!ncm->enable) 563 if (!ncm->enable)
567 return -EBUSY; 564 return 0;
568 565
569 /* Update to multicast filter mode */ 566 /* Update to multicast filter mode */
570 ncm->enable = 0; 567 ncm->enable = 0;
@@ -591,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
591 /* Check if flow control has been enabled */ 588 /* Check if flow control has been enabled */
592 ncm = &nc->modes[NCSI_MODE_FC]; 589 ncm = &nc->modes[NCSI_MODE_FC];
593 if (ncm->enable) 590 if (ncm->enable)
594 return -EBUSY; 591 return 0;
595 592
596 /* Update to flow control mode */ 593 /* Update to flow control mode */
597 cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd); 594 cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd);
@@ -1032,11 +1029,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
1032 if (payload < 0) 1029 if (payload < 0)
1033 payload = ntohs(hdr->length); 1030 payload = ntohs(hdr->length);
1034 ret = ncsi_validate_rsp_pkt(nr, payload); 1031 ret = ncsi_validate_rsp_pkt(nr, payload);
1035 if (ret) 1032 if (ret) {
1033 netdev_warn(ndp->ndev.dev,
1034 "NCSI: 'bad' packet ignored for type 0x%x\n",
1035 hdr->type);
1036 goto out; 1036 goto out;
1037 }
1037 1038
1038 /* Process the packet */ 1039 /* Process the packet */
1039 ret = nrh->handler(nr); 1040 ret = nrh->handler(nr);
1041 if (ret)
1042 netdev_err(ndp->ndev.dev,
1043 "NCSI: Handler for packet type 0x%x returned %d\n",
1044 hdr->type, ret);
1040out: 1045out:
1041 ncsi_free_request(nr); 1046 ncsi_free_request(nr);
1042 return ret; 1047 return ret;
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 8ad2b52a0b32..5ca18f07683b 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -37,11 +37,11 @@
37#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) 37#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
38 38
39static void 39static void
40mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 40mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
41{ 41{
42 struct mtype *map = set->data; 42 struct mtype *map = set->data;
43 43
44 setup_timer(&map->gc, gc, (unsigned long)set); 44 timer_setup(&map->gc, gc, 0);
45 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); 45 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
46} 46}
47 47
@@ -272,10 +272,10 @@ out:
272} 272}
273 273
274static void 274static void
275mtype_gc(unsigned long ul_set) 275mtype_gc(struct timer_list *t)
276{ 276{
277 struct ip_set *set = (struct ip_set *)ul_set; 277 struct mtype *map = from_timer(map, t, gc);
278 struct mtype *map = set->data; 278 struct ip_set *set = map->set;
279 void *x; 279 void *x;
280 u32 id; 280 u32 id;
281 281
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4783efff0bde..d8975a0b4282 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -48,6 +48,7 @@ struct bitmap_ip {
48 size_t memsize; /* members size */ 48 size_t memsize; /* members size */
49 u8 netmask; /* subnet netmask */ 49 u8 netmask; /* subnet netmask */
50 struct timer_list gc; /* garbage collection */ 50 struct timer_list gc; /* garbage collection */
51 struct ip_set *set; /* attached to this ip_set */
51 unsigned char extensions[0] /* data extensions */ 52 unsigned char extensions[0] /* data extensions */
52 __aligned(__alignof__(u64)); 53 __aligned(__alignof__(u64));
53}; 54};
@@ -232,6 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
232 map->netmask = netmask; 233 map->netmask = netmask;
233 set->timeout = IPSET_NO_TIMEOUT; 234 set->timeout = IPSET_NO_TIMEOUT;
234 235
236 map->set = set;
235 set->data = map; 237 set->data = map;
236 set->family = NFPROTO_IPV4; 238 set->family = NFPROTO_IPV4;
237 239
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 9a065f672d3a..4c279fbd2d5d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,6 +52,7 @@ struct bitmap_ipmac {
52 u32 elements; /* number of max elements in the set */ 52 u32 elements; /* number of max elements in the set */
53 size_t memsize; /* members size */ 53 size_t memsize; /* members size */
54 struct timer_list gc; /* garbage collector */ 54 struct timer_list gc; /* garbage collector */
55 struct ip_set *set; /* attached to this ip_set */
55 unsigned char extensions[0] /* MAC + data extensions */ 56 unsigned char extensions[0] /* MAC + data extensions */
56 __aligned(__alignof__(u64)); 57 __aligned(__alignof__(u64));
57}; 58};
@@ -307,6 +308,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
307 map->elements = elements; 308 map->elements = elements;
308 set->timeout = IPSET_NO_TIMEOUT; 309 set->timeout = IPSET_NO_TIMEOUT;
309 310
311 map->set = set;
310 set->data = map; 312 set->data = map;
311 set->family = NFPROTO_IPV4; 313 set->family = NFPROTO_IPV4;
312 314
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f0c733358a4..7f9bbd7c98b5 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -40,6 +40,7 @@ struct bitmap_port {
40 u32 elements; /* number of max elements in the set */ 40 u32 elements; /* number of max elements in the set */
41 size_t memsize; /* members size */ 41 size_t memsize; /* members size */
42 struct timer_list gc; /* garbage collection */ 42 struct timer_list gc; /* garbage collection */
43 struct ip_set *set; /* attached to this ip_set */
43 unsigned char extensions[0] /* data extensions */ 44 unsigned char extensions[0] /* data extensions */
44 __aligned(__alignof__(u64)); 45 __aligned(__alignof__(u64));
45}; 46};
@@ -214,6 +215,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
214 map->last_port = last_port; 215 map->last_port = last_port;
215 set->timeout = IPSET_NO_TIMEOUT; 216 set->timeout = IPSET_NO_TIMEOUT;
216 217
218 map->set = set;
217 set->data = map; 219 set->data = map;
218 set->family = NFPROTO_UNSPEC; 220 set->family = NFPROTO_UNSPEC;
219 221
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 51063d9ed0f7..efffc8eabafe 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -280,6 +280,7 @@ htable_bits(u32 hashsize)
280struct htype { 280struct htype {
281 struct htable __rcu *table; /* the hash table */ 281 struct htable __rcu *table; /* the hash table */
282 struct timer_list gc; /* garbage collection when timeout enabled */ 282 struct timer_list gc; /* garbage collection when timeout enabled */
283 struct ip_set *set; /* attached to this ip_set */
283 u32 maxelem; /* max elements in the hash */ 284 u32 maxelem; /* max elements in the hash */
284 u32 initval; /* random jhash init value */ 285 u32 initval; /* random jhash init value */
285#ifdef IP_SET_HASH_WITH_MARKMASK 286#ifdef IP_SET_HASH_WITH_MARKMASK
@@ -429,11 +430,11 @@ mtype_destroy(struct ip_set *set)
429} 430}
430 431
431static void 432static void
432mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 433mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
433{ 434{
434 struct htype *h = set->data; 435 struct htype *h = set->data;
435 436
436 setup_timer(&h->gc, gc, (unsigned long)set); 437 timer_setup(&h->gc, gc, 0);
437 mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); 438 mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
438 pr_debug("gc initialized, run in every %u\n", 439 pr_debug("gc initialized, run in every %u\n",
439 IPSET_GC_PERIOD(set->timeout)); 440 IPSET_GC_PERIOD(set->timeout));
@@ -526,10 +527,10 @@ mtype_expire(struct ip_set *set, struct htype *h)
526} 527}
527 528
528static void 529static void
529mtype_gc(unsigned long ul_set) 530mtype_gc(struct timer_list *t)
530{ 531{
531 struct ip_set *set = (struct ip_set *)ul_set; 532 struct htype *h = from_timer(h, t, gc);
532 struct htype *h = set->data; 533 struct ip_set *set = h->set;
533 534
534 pr_debug("called\n"); 535 pr_debug("called\n");
535 spin_lock_bh(&set->lock); 536 spin_lock_bh(&set->lock);
@@ -1314,6 +1315,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1314 t->htable_bits = hbits; 1315 t->htable_bits = hbits;
1315 RCU_INIT_POINTER(h->table, t); 1316 RCU_INIT_POINTER(h->table, t);
1316 1317
1318 h->set = set;
1317 set->data = h; 1319 set->data = h;
1318#ifndef IP_SET_PROTO_UNDEF 1320#ifndef IP_SET_PROTO_UNDEF
1319 if (set->family == NFPROTO_IPV4) { 1321 if (set->family == NFPROTO_IPV4) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index a2f19b9906e9..0f164e986bf1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
434 if (unlikely(tb[IPSET_ATTR_IP_TO])) 434 if (unlikely(tb[IPSET_ATTR_IP_TO]))
435 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 435 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
436 if (unlikely(tb[IPSET_ATTR_CIDR])) { 436 if (unlikely(tb[IPSET_ATTR_CIDR])) {
437 u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 437 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
438 438
439 if (cidr != HOST_MASK) 439 if (cidr != HOST_MASK)
440 return -IPSET_ERR_INVALID_CIDR; 440 return -IPSET_ERR_INVALID_CIDR;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 178d4eba013b..e864681b8dc5 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -44,6 +44,7 @@ struct set_adt_elem {
44struct list_set { 44struct list_set {
45 u32 size; /* size of set list array */ 45 u32 size; /* size of set list array */
46 struct timer_list gc; /* garbage collection */ 46 struct timer_list gc; /* garbage collection */
47 struct ip_set *set; /* attached to this ip_set */
47 struct net *net; /* namespace */ 48 struct net *net; /* namespace */
48 struct list_head members; /* the set members */ 49 struct list_head members; /* the set members */
49}; 50};
@@ -453,7 +454,6 @@ static size_t
453list_set_memsize(const struct list_set *map, size_t dsize) 454list_set_memsize(const struct list_set *map, size_t dsize)
454{ 455{
455 struct set_elem *e; 456 struct set_elem *e;
456 size_t memsize;
457 u32 n = 0; 457 u32 n = 0;
458 458
459 rcu_read_lock(); 459 rcu_read_lock();
@@ -461,9 +461,7 @@ list_set_memsize(const struct list_set *map, size_t dsize)
461 n++; 461 n++;
462 rcu_read_unlock(); 462 rcu_read_unlock();
463 463
464 memsize = sizeof(*map) + n * dsize; 464 return (sizeof(*map) + n * dsize);
465
466 return memsize;
467} 465}
468 466
469static int 467static int
@@ -571,10 +569,10 @@ static const struct ip_set_type_variant set_variant = {
571}; 569};
572 570
573static void 571static void
574list_set_gc(unsigned long ul_set) 572list_set_gc(struct timer_list *t)
575{ 573{
576 struct ip_set *set = (struct ip_set *)ul_set; 574 struct list_set *map = from_timer(map, t, gc);
577 struct list_set *map = set->data; 575 struct ip_set *set = map->set;
578 576
579 spin_lock_bh(&set->lock); 577 spin_lock_bh(&set->lock);
580 set_cleanup_entries(set); 578 set_cleanup_entries(set);
@@ -585,11 +583,11 @@ list_set_gc(unsigned long ul_set)
585} 583}
586 584
587static void 585static void
588list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 586list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
589{ 587{
590 struct list_set *map = set->data; 588 struct list_set *map = set->data;
591 589
592 setup_timer(&map->gc, gc, (unsigned long)set); 590 timer_setup(&map->gc, gc, 0);
593 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); 591 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
594} 592}
595 593
@@ -606,6 +604,7 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
606 604
607 map->size = size; 605 map->size = size;
608 map->net = net; 606 map->net = net;
607 map->set = set;
609 INIT_LIST_HEAD(&map->members); 608 INIT_LIST_HEAD(&map->members);
610 set->data = map; 609 set->data = map;
611 610
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 1c8a42c1056c..d5be9c25fad6 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -3,6 +3,141 @@
3 3
4/* Prefixlen maps for fast conversions, by Jan Engelhardt. */ 4/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
5 5
6#ifdef E
7#undef E
8#endif
9
10#define PREFIXES_MAP \
11 E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \
12 E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \
13 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \
14 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \
15 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \
16 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \
17 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \
18 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \
19 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \
20 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \
21 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \
22 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \
23 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \
24 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \
25 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \
26 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \
27 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \
28 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \
29 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \
30 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \
31 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \
32 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \
33 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \
34 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \
35 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \
36 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \
37 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \
38 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \
39 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \
40 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \
41 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \
42 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \
43 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \
44 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \
45 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \
46 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \
47 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \
48 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \
49 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \
50 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \
51 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \
52 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \
53 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \
54 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \
55 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \
56 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \
57 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \
58 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \
59 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \
60 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \
61 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \
62 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \
63 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \
64 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \
65 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \
66 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \
67 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \
68 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \
69 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \
70 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \
71 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \
72 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \
73 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \
74 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \
75 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \
76 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \
77 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \
78 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \
79 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \
80 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \
81 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \
82 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \
83 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \
84 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \
85 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \
86 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \
87 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \
88 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \
89 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \
90 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \
91 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \
92 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \
93 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \
94 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \
95 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \
96 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \
97 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \
98 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \
99 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \
100 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \
101 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \
102 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \
103 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \
104 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \
105 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \
106 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \
107 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \
108 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \
109 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \
110 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \
111 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \
112 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \
113 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \
114 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \
115 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \
116 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \
117 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \
118 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \
119 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \
120 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \
121 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \
122 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \
123 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \
124 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \
125 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \
126 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \
127 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \
128 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \
129 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \
130 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \
131 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \
132 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \
133 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \
134 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \
135 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \
136 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \
137 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \
138 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \
139 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
140
6#define E(a, b, c, d) \ 141#define E(a, b, c, d) \
7 {.ip6 = { \ 142 {.ip6 = { \
8 htonl(a), htonl(b), \ 143 htonl(a), htonl(b), \
@@ -13,135 +148,7 @@
13 * just use prefixlen_netmask_map[prefixlength].ip. 148 * just use prefixlen_netmask_map[prefixlength].ip.
14 */ 149 */
15const union nf_inet_addr ip_set_netmask_map[] = { 150const union nf_inet_addr ip_set_netmask_map[] = {
16 E(0x00000000, 0x00000000, 0x00000000, 0x00000000), 151 PREFIXES_MAP
17 E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
18 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
19 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
20 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
21 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
22 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
23 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
24 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
25 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
26 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
27 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
28 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
29 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
30 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
31 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
32 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
33 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
34 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
35 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
36 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
37 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
38 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
39 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
40 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
41 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
42 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
43 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
44 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
45 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
46 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
47 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
48 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
49 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
50 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
51 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
52 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
53 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
54 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
55 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
56 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
57 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
58 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
59 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
60 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
61 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
62 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
63 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
64 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
65 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
66 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
67 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
68 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
69 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
70 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
71 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
72 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
73 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
74 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
75 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
76 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
77 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
78 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
79 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
80 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
81 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
82 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
83 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
84 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
85 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
86 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
87 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
88 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
89 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
90 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
91 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
92 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
93 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
94 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
95 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
96 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
97 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
98 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
99 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
100 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
101 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
102 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
103 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
104 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
105 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
106 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
107 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
108 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
109 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
110 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
111 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
112 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
113 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
114 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
115 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
116 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
117 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
118 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
119 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
120 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
121 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
122 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
123 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
124 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
125 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
126 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
127 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
128 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
129 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
130 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
131 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
132 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
133 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
134 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
135 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
136 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
137 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
138 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
139 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
140 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
141 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
142 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
143 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
144 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
145}; 152};
146EXPORT_SYMBOL_GPL(ip_set_netmask_map); 153EXPORT_SYMBOL_GPL(ip_set_netmask_map);
147 154
@@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map);
155 * just use prefixlen_hostmask_map[prefixlength].ip. 162 * just use prefixlen_hostmask_map[prefixlength].ip.
156 */ 163 */
157const union nf_inet_addr ip_set_hostmask_map[] = { 164const union nf_inet_addr ip_set_hostmask_map[] = {
158 E(0x00000000, 0x00000000, 0x00000000, 0x00000000), 165 PREFIXES_MAP
159 E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
160 E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
161 E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
162 E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
163 E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
164 E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
165 E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
166 E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
167 E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
168 E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
169 E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
170 E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
171 E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
172 E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
173 E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
174 E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
175 E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
176 E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
177 E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
178 E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
179 E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
180 E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
181 E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
182 E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
183 E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
184 E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
185 E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
186 E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
187 E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
188 E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
189 E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
190 E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
191 E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
192 E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
193 E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
194 E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
195 E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
196 E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
197 E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
198 E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
199 E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
200 E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
201 E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
202 E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
203 E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
204 E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
205 E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
206 E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
207 E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
208 E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
209 E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
210 E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
211 E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
212 E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
213 E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
214 E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
215 E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
216 E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
217 E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
218 E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
219 E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
220 E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
221 E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
222 E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
223 E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
224 E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
225 E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
226 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
227 E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
228 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
229 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
230 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
231 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
232 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
233 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
234 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
235 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
236 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
237 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
238 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
239 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
240 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
241 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
242 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
243 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
244 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
245 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
246 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
247 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
248 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
249 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
250 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
251 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
252 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
253 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
254 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
255 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
256 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
257 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
258 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
259 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
260 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
261 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
262 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
263 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
264 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
265 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
266 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
267 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
268 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
269 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
270 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
271 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
272 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
273 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
274 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
275 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
276 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
277 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
278 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
279 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
280 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
281 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
282 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
283 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
284 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
285 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
286 E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
287}; 166};
288EXPORT_SYMBOL_GPL(ip_set_hostmask_map); 167EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
289 168
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3a43b3470331..3e053cb30070 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
185 hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); 185 hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
186 ret = 1; 186 ret = 1;
187 } else { 187 } else {
188 pr_err("%s(): request for already hashed, called from %pF\n", 188 pr_err("%s(): request for already hashed, called from %pS\n",
189 __func__, __builtin_return_address(0)); 189 __func__, __builtin_return_address(0));
190 ret = 0; 190 ret = 0;
191 } 191 }
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b47e266c6eca..fff213eacf2a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 unsigned int hash; 300 unsigned int hash;
301 301
302 if (svc->flags & IP_VS_SVC_F_HASHED) { 302 if (svc->flags & IP_VS_SVC_F_HASHED) {
303 pr_err("%s(): request for already hashed, called from %pF\n", 303 pr_err("%s(): request for already hashed, called from %pS\n",
304 __func__, __builtin_return_address(0)); 304 __func__, __builtin_return_address(0));
305 return 0; 305 return 0;
306 } 306 }
@@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
334static int ip_vs_svc_unhash(struct ip_vs_service *svc) 334static int ip_vs_svc_unhash(struct ip_vs_service *svc)
335{ 335{
336 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 336 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
337 pr_err("%s(): request for unhash flagged, called from %pF\n", 337 pr_err("%s(): request for unhash flagged, called from %pS\n",
338 __func__, __builtin_return_address(0)); 338 __func__, __builtin_return_address(0));
339 return 0; 339 return 0;
340 } 340 }
@@ -2034,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2034 seq_puts(seq, 2034 seq_puts(seq,
2035 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2035 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2036 } else { 2036 } else {
2037 struct net *net = seq_file_net(seq);
2038 struct netns_ipvs *ipvs = net_ipvs(net);
2037 const struct ip_vs_service *svc = v; 2039 const struct ip_vs_service *svc = v;
2038 const struct ip_vs_iter *iter = seq->private; 2040 const struct ip_vs_iter *iter = seq->private;
2039 const struct ip_vs_dest *dest; 2041 const struct ip_vs_dest *dest;
2040 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2042 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2041 char *sched_name = sched ? sched->name : "none"; 2043 char *sched_name = sched ? sched->name : "none";
2042 2044
2045 if (svc->ipvs != ipvs)
2046 return 0;
2043 if (iter->table == ip_vs_svc_table) { 2047 if (iter->table == ip_vs_svc_table) {
2044#ifdef CONFIG_IP_VS_IPV6 2048#ifdef CONFIG_IP_VS_IPV6
2045 if (svc->af == AF_INET6) 2049 if (svc->af == AF_INET6)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 01130392b7c0..85f643c1e227 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1083,7 +1083,7 @@ static void gc_worker(struct work_struct *work)
1083 next_run = gc_work->next_gc_run; 1083 next_run = gc_work->next_gc_run;
1084 gc_work->last_bucket = i; 1084 gc_work->last_bucket = i;
1085 gc_work->early_drop = false; 1085 gc_work->early_drop = false;
1086 queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); 1086 queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
1087} 1087}
1088 1088
1089static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) 1089static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
@@ -1419,7 +1419,7 @@ repeat:
1419 /* Decide what timeout policy we want to apply to this flow. */ 1419 /* Decide what timeout policy we want to apply to this flow. */
1420 timeouts = nf_ct_timeout_lookup(net, ct, l4proto); 1420 timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
1421 1421
1422 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts); 1422 ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
1423 if (ret <= 0) { 1423 if (ret <= 0) {
1424 /* Invalid: inverse of the return code tells 1424 /* Invalid: inverse of the return code tells
1425 * the netfilter core what to do */ 1425 * the netfilter core what to do */
@@ -1563,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
1563} 1563}
1564EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); 1564EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
1565 1565
1566int nf_ct_port_nlattr_tuple_size(void) 1566unsigned int nf_ct_port_nlattr_tuple_size(void)
1567{ 1567{
1568 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1568 static unsigned int size __read_mostly;
1569
1570 if (!size)
1571 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1572
1573 return size;
1569} 1574}
1570EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); 1575EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1571#endif 1576#endif
@@ -1940,7 +1945,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
1940 return 0; 1945 return 0;
1941} 1946}
1942 1947
1943int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1948int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
1944{ 1949{
1945 unsigned int hashsize; 1950 unsigned int hashsize;
1946 int rc; 1951 int rc;
@@ -2084,7 +2089,7 @@ int nf_conntrack_init_start(void)
2084 goto err_proto; 2089 goto err_proto;
2085 2090
2086 conntrack_gc_work_init(&conntrack_gc_work); 2091 conntrack_gc_work_init(&conntrack_gc_work);
2087 queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); 2092 queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
2088 2093
2089 return 0; 2094 return 0;
2090 2095
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 64778f9a8548..d6748a8a79c5 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -67,9 +67,9 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
67} 67}
68EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report); 68EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
69 69
70static void nf_ct_expectation_timed_out(unsigned long ul_expect) 70static void nf_ct_expectation_timed_out(struct timer_list *t)
71{ 71{
72 struct nf_conntrack_expect *exp = (void *)ul_expect; 72 struct nf_conntrack_expect *exp = from_timer(exp, t, timeout);
73 73
74 spin_lock_bh(&nf_conntrack_expect_lock); 74 spin_lock_bh(&nf_conntrack_expect_lock);
75 nf_ct_unlink_expect(exp); 75 nf_ct_unlink_expect(exp);
@@ -368,8 +368,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
368 /* two references : one for hash insert, one for the timer */ 368 /* two references : one for hash insert, one for the timer */
369 refcount_add(2, &exp->use); 369 refcount_add(2, &exp->use);
370 370
371 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 371 timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0);
372 (unsigned long)exp);
373 helper = rcu_dereference_protected(master_help->helper, 372 helper = rcu_dereference_protected(master_help->helper,
374 lockdep_is_held(&nf_conntrack_expect_lock)); 373 lockdep_is_held(&nf_conntrack_expect_lock));
375 if (helper) { 374 if (helper) {
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 89b2e46925c4..cf1bf2605c10 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -91,41 +91,41 @@ typedef struct field_t {
91} field_t; 91} field_t;
92 92
93/* Bit Stream */ 93/* Bit Stream */
94typedef struct { 94struct bitstr {
95 unsigned char *buf; 95 unsigned char *buf;
96 unsigned char *beg; 96 unsigned char *beg;
97 unsigned char *end; 97 unsigned char *end;
98 unsigned char *cur; 98 unsigned char *cur;
99 unsigned int bit; 99 unsigned int bit;
100} bitstr_t; 100};
101 101
102/* Tool Functions */ 102/* Tool Functions */
103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} 103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} 104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} 105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND) 106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
107static unsigned int get_len(bitstr_t *bs); 107static unsigned int get_len(struct bitstr *bs);
108static unsigned int get_bit(bitstr_t *bs); 108static unsigned int get_bit(struct bitstr *bs);
109static unsigned int get_bits(bitstr_t *bs, unsigned int b); 109static unsigned int get_bits(struct bitstr *bs, unsigned int b);
110static unsigned int get_bitmap(bitstr_t *bs, unsigned int b); 110static unsigned int get_bitmap(struct bitstr *bs, unsigned int b);
111static unsigned int get_uint(bitstr_t *bs, int b); 111static unsigned int get_uint(struct bitstr *bs, int b);
112 112
113/* Decoder Functions */ 113/* Decoder Functions */
114static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level); 114static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level);
115static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level); 115static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level);
116static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level); 116static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level);
117static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level); 117static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level);
118static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level); 118static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level);
119static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level); 119static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
120static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level); 120static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
121static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level); 121static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
122static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level); 122static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
123static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level); 123static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level);
124static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level); 124static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level);
125static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level); 125static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level);
126 126
127/* Decoder Functions Vector */ 127/* Decoder Functions Vector */
128typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int); 128typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int);
129static const decoder_t Decoders[] = { 129static const decoder_t Decoders[] = {
130 decode_nul, 130 decode_nul,
131 decode_bool, 131 decode_bool,
@@ -150,7 +150,7 @@ static const decoder_t Decoders[] = {
150 * Functions 150 * Functions
151 ****************************************************************************/ 151 ****************************************************************************/
152/* Assume bs is aligned && v < 16384 */ 152/* Assume bs is aligned && v < 16384 */
153static unsigned int get_len(bitstr_t *bs) 153static unsigned int get_len(struct bitstr *bs)
154{ 154{
155 unsigned int v; 155 unsigned int v;
156 156
@@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs)
166} 166}
167 167
168/****************************************************************************/ 168/****************************************************************************/
169static unsigned int get_bit(bitstr_t *bs) 169static unsigned int get_bit(struct bitstr *bs)
170{ 170{
171 unsigned int b = (*bs->cur) & (0x80 >> bs->bit); 171 unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
172 172
@@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs)
177 177
178/****************************************************************************/ 178/****************************************************************************/
179/* Assume b <= 8 */ 179/* Assume b <= 8 */
180static unsigned int get_bits(bitstr_t *bs, unsigned int b) 180static unsigned int get_bits(struct bitstr *bs, unsigned int b)
181{ 181{
182 unsigned int v, l; 182 unsigned int v, l;
183 183
@@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b)
203 203
204/****************************************************************************/ 204/****************************************************************************/
205/* Assume b <= 32 */ 205/* Assume b <= 32 */
206static unsigned int get_bitmap(bitstr_t *bs, unsigned int b) 206static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
207{ 207{
208 unsigned int v, l, shift, bytes; 208 unsigned int v, l, shift, bytes;
209 209
@@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
242/**************************************************************************** 242/****************************************************************************
243 * Assume bs is aligned and sizeof(unsigned int) == 4 243 * Assume bs is aligned and sizeof(unsigned int) == 4
244 ****************************************************************************/ 244 ****************************************************************************/
245static unsigned int get_uint(bitstr_t *bs, int b) 245static unsigned int get_uint(struct bitstr *bs, int b)
246{ 246{
247 unsigned int v = 0; 247 unsigned int v = 0;
248 248
@@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b)
264} 264}
265 265
266/****************************************************************************/ 266/****************************************************************************/
267static int decode_nul(bitstr_t *bs, const struct field_t *f, 267static int decode_nul(struct bitstr *bs, const struct field_t *f,
268 char *base, int level) 268 char *base, int level)
269{ 269{
270 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 270 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f,
273} 273}
274 274
275/****************************************************************************/ 275/****************************************************************************/
276static int decode_bool(bitstr_t *bs, const struct field_t *f, 276static int decode_bool(struct bitstr *bs, const struct field_t *f,
277 char *base, int level) 277 char *base, int level)
278{ 278{
279 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 279 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f,
285} 285}
286 286
287/****************************************************************************/ 287/****************************************************************************/
288static int decode_oid(bitstr_t *bs, const struct field_t *f, 288static int decode_oid(struct bitstr *bs, const struct field_t *f,
289 char *base, int level) 289 char *base, int level)
290{ 290{
291 int len; 291 int len;
@@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f,
302} 302}
303 303
304/****************************************************************************/ 304/****************************************************************************/
305static int decode_int(bitstr_t *bs, const struct field_t *f, 305static int decode_int(struct bitstr *bs, const struct field_t *f,
306 char *base, int level) 306 char *base, int level)
307{ 307{
308 unsigned int len; 308 unsigned int len;
@@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f,
346} 346}
347 347
348/****************************************************************************/ 348/****************************************************************************/
349static int decode_enum(bitstr_t *bs, const struct field_t *f, 349static int decode_enum(struct bitstr *bs, const struct field_t *f,
350 char *base, int level) 350 char *base, int level)
351{ 351{
352 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); 352 PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f,
362} 362}
363 363
364/****************************************************************************/ 364/****************************************************************************/
365static int decode_bitstr(bitstr_t *bs, const struct field_t *f, 365static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
366 char *base, int level) 366 char *base, int level)
367{ 367{
368 unsigned int len; 368 unsigned int len;
@@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
396} 396}
397 397
398/****************************************************************************/ 398/****************************************************************************/
399static int decode_numstr(bitstr_t *bs, const struct field_t *f, 399static int decode_numstr(struct bitstr *bs, const struct field_t *f,
400 char *base, int level) 400 char *base, int level)
401{ 401{
402 unsigned int len; 402 unsigned int len;
@@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f,
414} 414}
415 415
416/****************************************************************************/ 416/****************************************************************************/
417static int decode_octstr(bitstr_t *bs, const struct field_t *f, 417static int decode_octstr(struct bitstr *bs, const struct field_t *f,
418 char *base, int level) 418 char *base, int level)
419{ 419{
420 unsigned int len; 420 unsigned int len;
@@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f,
463} 463}
464 464
465/****************************************************************************/ 465/****************************************************************************/
466static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, 466static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
467 char *base, int level) 467 char *base, int level)
468{ 468{
469 unsigned int len; 469 unsigned int len;
@@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
489} 489}
490 490
491/****************************************************************************/ 491/****************************************************************************/
492static int decode_seq(bitstr_t *bs, const struct field_t *f, 492static int decode_seq(struct bitstr *bs, const struct field_t *f,
493 char *base, int level) 493 char *base, int level)
494{ 494{
495 unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len; 495 unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
@@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f,
606} 606}
607 607
608/****************************************************************************/ 608/****************************************************************************/
609static int decode_seqof(bitstr_t *bs, const struct field_t *f, 609static int decode_seqof(struct bitstr *bs, const struct field_t *f,
610 char *base, int level) 610 char *base, int level)
611{ 611{
612 unsigned int count, effective_count = 0, i, len = 0; 612 unsigned int count, effective_count = 0, i, len = 0;
@@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
696 696
697 697
698/****************************************************************************/ 698/****************************************************************************/
699static int decode_choice(bitstr_t *bs, const struct field_t *f, 699static int decode_choice(struct bitstr *bs, const struct field_t *f,
700 char *base, int level) 700 char *base, int level)
701{ 701{
702 unsigned int type, ext, len = 0; 702 unsigned int type, ext, len = 0;
@@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
772 FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT, 772 FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
773 0, _RasMessage 773 0, _RasMessage
774 }; 774 };
775 bitstr_t bs; 775 struct bitstr bs;
776 776
777 bs.buf = bs.beg = bs.cur = buf; 777 bs.buf = bs.beg = bs.cur = buf;
778 bs.end = buf + sz; 778 bs.end = buf + sz;
@@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
789 FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT, 789 FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
790 0, _H323_UserInformation 790 0, _H323_UserInformation
791 }; 791 };
792 bitstr_t bs; 792 struct bitstr bs;
793 793
794 bs.buf = buf; 794 bs.buf = buf;
795 bs.beg = bs.cur = beg; 795 bs.beg = bs.cur = beg;
@@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
808 FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4, 808 FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
809 DECODE | EXT, 0, _MultimediaSystemControlMessage 809 DECODE | EXT, 0, _MultimediaSystemControlMessage
810 }; 810 };
811 bitstr_t bs; 811 struct bitstr bs;
812 812
813 bs.buf = bs.beg = bs.cur = buf; 813 bs.buf = bs.beg = bs.cur = buf;
814 bs.end = buf + sz; 814 bs.end = buf + sz;
@@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
877 if (sz < 1) 877 if (sz < 1)
878 break; 878 break;
879 len = *p++; 879 len = *p++;
880 sz--;
880 if (sz < len) 881 if (sz < len)
881 break; 882 break;
882 p += len; 883 p += len;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index de4053d84364..59c08997bfdf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -533,11 +533,12 @@ nla_put_failure:
533 return -1; 533 return -1;
534} 534}
535 535
536static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) 536#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
537static size_t ctnetlink_proto_size(const struct nf_conn *ct)
537{ 538{
538 const struct nf_conntrack_l3proto *l3proto; 539 const struct nf_conntrack_l3proto *l3proto;
539 const struct nf_conntrack_l4proto *l4proto; 540 const struct nf_conntrack_l4proto *l4proto;
540 size_t len; 541 size_t len, len4 = 0;
541 542
542 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 543 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
543 len = l3proto->nla_size; 544 len = l3proto->nla_size;
@@ -545,9 +546,14 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
545 546
546 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 547 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
547 len += l4proto->nla_size; 548 len += l4proto->nla_size;
549 if (l4proto->nlattr_tuple_size) {
550 len4 = l4proto->nlattr_tuple_size();
551 len4 *= 3u; /* ORIG, REPLY, MASTER */
552 }
548 553
549 return len; 554 return len + len4;
550} 555}
556#endif
551 557
552static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) 558static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
553{ 559{
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b3e489c859ec..c8e9c9503a08 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -27,6 +27,7 @@
27#include <net/netfilter/nf_conntrack_l3proto.h> 27#include <net/netfilter/nf_conntrack_l3proto.h>
28#include <net/netfilter/nf_conntrack_l4proto.h> 28#include <net/netfilter/nf_conntrack_l4proto.h>
29#include <net/netfilter/nf_conntrack_core.h> 29#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_log.h>
30 31
31static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; 32static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
32struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; 33struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
@@ -63,6 +64,52 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
63 *header = NULL; 64 *header = NULL;
64 *table = NULL; 65 *table = NULL;
65} 66}
67
68__printf(5, 6)
69void nf_l4proto_log_invalid(const struct sk_buff *skb,
70 struct net *net,
71 u16 pf, u8 protonum,
72 const char *fmt, ...)
73{
74 struct va_format vaf;
75 va_list args;
76
77 if (net->ct.sysctl_log_invalid != protonum ||
78 net->ct.sysctl_log_invalid != IPPROTO_RAW)
79 return;
80
81 va_start(args, fmt);
82 vaf.fmt = fmt;
83 vaf.va = &args;
84
85 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
86 "nf_ct_proto_%d: %pV ", protonum, &vaf);
87 va_end(args);
88}
89EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
90
91__printf(3, 4)
92void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
93 const struct nf_conn *ct,
94 const char *fmt, ...)
95{
96 struct va_format vaf;
97 struct net *net;
98 va_list args;
99
100 net = nf_ct_net(ct);
101 if (likely(net->ct.sysctl_log_invalid == 0))
102 return;
103
104 va_start(args, fmt);
105 vaf.fmt = fmt;
106 vaf.va = &args;
107
108 nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
109 nf_ct_protonum(ct), "%pV", &vaf);
110 va_end(args);
111}
112EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
66#endif 113#endif
67 114
68const struct nf_conntrack_l4proto * 115const struct nf_conntrack_l4proto *
@@ -125,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
125} 172}
126EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); 173EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
127 174
128int nf_ct_netns_get(struct net *net, u8 nfproto) 175static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
129{ 176{
130 const struct nf_conntrack_l3proto *l3proto; 177 const struct nf_conntrack_l3proto *l3proto;
131 int ret; 178 int ret;
@@ -150,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto)
150 197
151 return ret; 198 return ret;
152} 199}
200
201int nf_ct_netns_get(struct net *net, u8 nfproto)
202{
203 int err;
204
205 if (nfproto == NFPROTO_INET) {
206 err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
207 if (err < 0)
208 goto err1;
209 err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
210 if (err < 0)
211 goto err2;
212 } else {
213 err = nf_ct_netns_do_get(net, nfproto);
214 if (err < 0)
215 goto err1;
216 }
217 return 0;
218
219err2:
220 nf_ct_netns_put(net, NFPROTO_IPV4);
221err1:
222 return err;
223}
153EXPORT_SYMBOL_GPL(nf_ct_netns_get); 224EXPORT_SYMBOL_GPL(nf_ct_netns_get);
154 225
155void nf_ct_netns_put(struct net *net, u8 nfproto) 226static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
156{ 227{
157 const struct nf_conntrack_l3proto *l3proto; 228 const struct nf_conntrack_l3proto *l3proto;
158 229
@@ -171,6 +242,15 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
171 242
172 nf_ct_l3proto_module_put(nfproto); 243 nf_ct_l3proto_module_put(nfproto);
173} 244}
245
246void nf_ct_netns_put(struct net *net, uint8_t nfproto)
247{
248 if (nfproto == NFPROTO_INET) {
249 nf_ct_netns_do_put(net, NFPROTO_IPV4);
250 nf_ct_netns_do_put(net, NFPROTO_IPV6);
251 } else
252 nf_ct_netns_do_put(net, nfproto);
253}
174EXPORT_SYMBOL_GPL(nf_ct_netns_put); 254EXPORT_SYMBOL_GPL(nf_ct_netns_put);
175 255
176const struct nf_conntrack_l4proto * 256const struct nf_conntrack_l4proto *
@@ -351,8 +431,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
351 l4proto->nla_size = 0; 431 l4proto->nla_size = 0;
352 if (l4proto->nlattr_size) 432 if (l4proto->nlattr_size)
353 l4proto->nla_size += l4proto->nlattr_size(); 433 l4proto->nla_size += l4proto->nlattr_size();
354 if (l4proto->nlattr_tuple_size)
355 l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
356 434
357 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 435 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
358 l4proto); 436 l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 0f5a4d79f6b8..2a446f4a554c 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
428 default: 428 default:
429 dn = dccp_pernet(net); 429 dn = dccp_pernet(net);
430 if (dn->dccp_loose == 0) { 430 if (dn->dccp_loose == 0) {
431 msg = "nf_ct_dccp: not picking up existing connection "; 431 msg = "not picking up existing connection ";
432 goto out_invalid; 432 goto out_invalid;
433 } 433 }
434 case CT_DCCP_REQUEST: 434 case CT_DCCP_REQUEST:
435 break; 435 break;
436 case CT_DCCP_INVALID: 436 case CT_DCCP_INVALID:
437 msg = "nf_ct_dccp: invalid state transition "; 437 msg = "invalid state transition ";
438 goto out_invalid; 438 goto out_invalid;
439 } 439 }
440 440
@@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
447 return true; 447 return true;
448 448
449out_invalid: 449out_invalid:
450 if (LOG_INVALID(net, IPPROTO_DCCP)) 450 nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
451 nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
452 NULL, "%s", msg);
453 return false; 451 return false;
454} 452}
455 453
@@ -469,10 +467,8 @@ static unsigned int *dccp_get_timeouts(struct net *net)
469 467
470static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, 468static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
471 unsigned int dataoff, enum ip_conntrack_info ctinfo, 469 unsigned int dataoff, enum ip_conntrack_info ctinfo,
472 u_int8_t pf,
473 unsigned int *timeouts) 470 unsigned int *timeouts)
474{ 471{
475 struct net *net = nf_ct_net(ct);
476 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 472 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
477 struct dccp_hdr _dh, *dh; 473 struct dccp_hdr _dh, *dh;
478 u_int8_t type, old_state, new_state; 474 u_int8_t type, old_state, new_state;
@@ -534,15 +530,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
534 ct->proto.dccp.last_pkt = type; 530 ct->proto.dccp.last_pkt = type;
535 531
536 spin_unlock_bh(&ct->lock); 532 spin_unlock_bh(&ct->lock);
537 if (LOG_INVALID(net, IPPROTO_DCCP)) 533 nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
538 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
539 "nf_ct_dccp: invalid packet ignored ");
540 return NF_ACCEPT; 534 return NF_ACCEPT;
541 case CT_DCCP_INVALID: 535 case CT_DCCP_INVALID:
542 spin_unlock_bh(&ct->lock); 536 spin_unlock_bh(&ct->lock);
543 if (LOG_INVALID(net, IPPROTO_DCCP)) 537 nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
544 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
545 "nf_ct_dccp: invalid state transition ");
546 return -NF_ACCEPT; 538 return -NF_ACCEPT;
547 } 539 }
548 540
@@ -604,8 +596,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
604 return NF_ACCEPT; 596 return NF_ACCEPT;
605 597
606out_invalid: 598out_invalid:
607 if (LOG_INVALID(net, IPPROTO_DCCP)) 599 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
608 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg);
609 return -NF_ACCEPT; 600 return -NF_ACCEPT;
610} 601}
611 602
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 9cd40700842e..1f86ddf6649a 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -60,7 +60,6 @@ static int generic_packet(struct nf_conn *ct,
60 const struct sk_buff *skb, 60 const struct sk_buff *skb,
61 unsigned int dataoff, 61 unsigned int dataoff,
62 enum ip_conntrack_info ctinfo, 62 enum ip_conntrack_info ctinfo,
63 u_int8_t pf,
64 unsigned int *timeout) 63 unsigned int *timeout)
65{ 64{
66 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 65 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 09a90484c27d..a2503005d80b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -244,7 +244,6 @@ static int gre_packet(struct nf_conn *ct,
244 const struct sk_buff *skb, 244 const struct sk_buff *skb,
245 unsigned int dataoff, 245 unsigned int dataoff,
246 enum ip_conntrack_info ctinfo, 246 enum ip_conntrack_info ctinfo,
247 u_int8_t pf,
248 unsigned int *timeouts) 247 unsigned int *timeouts)
249{ 248{
250 /* If we've seen traffic both ways, this is a GRE connection. 249 /* If we've seen traffic both ways, this is a GRE connection.
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6303a88af12b..80faf04ddf15 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -306,7 +306,6 @@ static int sctp_packet(struct nf_conn *ct,
306 const struct sk_buff *skb, 306 const struct sk_buff *skb,
307 unsigned int dataoff, 307 unsigned int dataoff,
308 enum ip_conntrack_info ctinfo, 308 enum ip_conntrack_info ctinfo,
309 u_int8_t pf,
310 unsigned int *timeouts) 309 unsigned int *timeouts)
311{ 310{
312 enum sctp_conntrack new_state, old_state; 311 enum sctp_conntrack new_state, old_state;
@@ -522,8 +521,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
522 } 521 }
523 return NF_ACCEPT; 522 return NF_ACCEPT;
524out_invalid: 523out_invalid:
525 if (LOG_INVALID(net, IPPROTO_SCTP)) 524 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
526 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg);
527 return -NF_ACCEPT; 525 return -NF_ACCEPT;
528} 526}
529 527
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index cba1c6ffe51a..b12fc07111d0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -493,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
493 unsigned int index, 493 unsigned int index,
494 const struct sk_buff *skb, 494 const struct sk_buff *skb,
495 unsigned int dataoff, 495 unsigned int dataoff,
496 const struct tcphdr *tcph, 496 const struct tcphdr *tcph)
497 u_int8_t pf)
498{ 497{
499 struct net *net = nf_ct_net(ct); 498 struct net *net = nf_ct_net(ct);
500 struct nf_tcp_net *tn = tcp_pernet(net); 499 struct nf_tcp_net *tn = tcp_pernet(net);
@@ -702,9 +701,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
702 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || 701 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
703 tn->tcp_be_liberal) 702 tn->tcp_be_liberal)
704 res = true; 703 res = true;
705 if (!res && LOG_INVALID(net, IPPROTO_TCP)) 704 if (!res) {
706 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, 705 nf_ct_l4proto_log_invalid(skb, ct,
707 "nf_ct_tcp: %s ", 706 "%s",
708 before(seq, sender->td_maxend + 1) ? 707 before(seq, sender->td_maxend + 1) ?
709 in_recv_win ? 708 in_recv_win ?
710 before(sack, receiver->td_end + 1) ? 709 before(sack, receiver->td_end + 1) ?
@@ -713,6 +712,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
713 : "ACK is over the upper bound (ACKed data not seen yet)" 712 : "ACK is over the upper bound (ACKed data not seen yet)"
714 : "SEQ is under the lower bound (already ACKed data retransmitted)" 713 : "SEQ is under the lower bound (already ACKed data retransmitted)"
715 : "SEQ is over the upper bound (over the window of the receiver)"); 714 : "SEQ is over the upper bound (over the window of the receiver)");
715 }
716 } 716 }
717 717
718 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " 718 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
@@ -738,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
738 [TCPHDR_ACK|TCPHDR_URG] = 1, 738 [TCPHDR_ACK|TCPHDR_URG] = 1,
739}; 739};
740 740
741static void tcp_error_log(const struct sk_buff *skb, struct net *net,
742 u8 pf, const char *msg)
743{
744 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
745}
746
741/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ 747/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
742static int tcp_error(struct net *net, struct nf_conn *tmpl, 748static int tcp_error(struct net *net, struct nf_conn *tmpl,
743 struct sk_buff *skb, 749 struct sk_buff *skb,
@@ -753,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
753 /* Smaller that minimal TCP header? */ 759 /* Smaller that minimal TCP header? */
754 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 760 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
755 if (th == NULL) { 761 if (th == NULL) {
756 if (LOG_INVALID(net, IPPROTO_TCP)) 762 tcp_error_log(skb, net, pf, "short packet");
757 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
758 "nf_ct_tcp: short packet ");
759 return -NF_ACCEPT; 763 return -NF_ACCEPT;
760 } 764 }
761 765
762 /* Not whole TCP header or malformed packet */ 766 /* Not whole TCP header or malformed packet */
763 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { 767 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
764 if (LOG_INVALID(net, IPPROTO_TCP)) 768 tcp_error_log(skb, net, pf, "truncated packet");
765 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
766 "nf_ct_tcp: truncated/malformed packet ");
767 return -NF_ACCEPT; 769 return -NF_ACCEPT;
768 } 770 }
769 771
@@ -774,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
774 /* FIXME: Source route IP option packets --RR */ 776 /* FIXME: Source route IP option packets --RR */
775 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 777 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
776 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 778 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
777 if (LOG_INVALID(net, IPPROTO_TCP)) 779 tcp_error_log(skb, net, pf, "bad checksum");
778 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
779 "nf_ct_tcp: bad TCP checksum ");
780 return -NF_ACCEPT; 780 return -NF_ACCEPT;
781 } 781 }
782 782
783 /* Check TCP flags. */ 783 /* Check TCP flags. */
784 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); 784 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
785 if (!tcp_valid_flags[tcpflags]) { 785 if (!tcp_valid_flags[tcpflags]) {
786 if (LOG_INVALID(net, IPPROTO_TCP)) 786 tcp_error_log(skb, net, pf, "invalid tcp flag combination");
787 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
788 "nf_ct_tcp: invalid TCP flag combination ");
789 return -NF_ACCEPT; 787 return -NF_ACCEPT;
790 } 788 }
791 789
@@ -802,7 +800,6 @@ static int tcp_packet(struct nf_conn *ct,
802 const struct sk_buff *skb, 800 const struct sk_buff *skb,
803 unsigned int dataoff, 801 unsigned int dataoff,
804 enum ip_conntrack_info ctinfo, 802 enum ip_conntrack_info ctinfo,
805 u_int8_t pf,
806 unsigned int *timeouts) 803 unsigned int *timeouts)
807{ 804{
808 struct net *net = nf_ct_net(ct); 805 struct net *net = nf_ct_net(ct);
@@ -939,10 +936,8 @@ static int tcp_packet(struct nf_conn *ct,
939 IP_CT_EXP_CHALLENGE_ACK; 936 IP_CT_EXP_CHALLENGE_ACK;
940 } 937 }
941 spin_unlock_bh(&ct->lock); 938 spin_unlock_bh(&ct->lock);
942 if (LOG_INVALID(net, IPPROTO_TCP)) 939 nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
943 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, 940 "state %s ", tcp_conntrack_names[old_state]);
944 "nf_ct_tcp: invalid packet ignored in "
945 "state %s ", tcp_conntrack_names[old_state]);
946 return NF_ACCEPT; 941 return NF_ACCEPT;
947 case TCP_CONNTRACK_MAX: 942 case TCP_CONNTRACK_MAX:
948 /* Special case for SYN proxy: when the SYN to the server or 943 /* Special case for SYN proxy: when the SYN to the server or
@@ -964,9 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
964 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 959 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
965 dir, get_conntrack_index(th), old_state); 960 dir, get_conntrack_index(th), old_state);
966 spin_unlock_bh(&ct->lock); 961 spin_unlock_bh(&ct->lock);
967 if (LOG_INVALID(net, IPPROTO_TCP)) 962 nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
968 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
969 "nf_ct_tcp: invalid state ");
970 return -NF_ACCEPT; 963 return -NF_ACCEPT;
971 case TCP_CONNTRACK_TIME_WAIT: 964 case TCP_CONNTRACK_TIME_WAIT:
972 /* RFC5961 compliance cause stack to send "challenge-ACK" 965 /* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -981,9 +974,7 @@ static int tcp_packet(struct nf_conn *ct,
981 /* Detected RFC5961 challenge ACK */ 974 /* Detected RFC5961 challenge ACK */
982 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; 975 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
983 spin_unlock_bh(&ct->lock); 976 spin_unlock_bh(&ct->lock);
984 if (LOG_INVALID(net, IPPROTO_TCP)) 977 nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
985 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
986 "nf_ct_tcp: challenge-ACK ignored ");
987 return NF_ACCEPT; /* Don't change state */ 978 return NF_ACCEPT; /* Don't change state */
988 } 979 }
989 break; 980 break;
@@ -993,9 +984,7 @@ static int tcp_packet(struct nf_conn *ct,
993 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { 984 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
994 /* Invalid RST */ 985 /* Invalid RST */
995 spin_unlock_bh(&ct->lock); 986 spin_unlock_bh(&ct->lock);
996 if (LOG_INVALID(net, IPPROTO_TCP)) 987 nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
997 nf_log_packet(net, pf, 0, skb, NULL, NULL,
998 NULL, "nf_ct_tcp: invalid RST ");
999 return -NF_ACCEPT; 988 return -NF_ACCEPT;
1000 } 989 }
1001 if (index == TCP_RST_SET 990 if (index == TCP_RST_SET
@@ -1022,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct,
1022 } 1011 }
1023 1012
1024 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, 1013 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1025 skb, dataoff, th, pf)) { 1014 skb, dataoff, th)) {
1026 spin_unlock_bh(&ct->lock); 1015 spin_unlock_bh(&ct->lock);
1027 return -NF_ACCEPT; 1016 return -NF_ACCEPT;
1028 } 1017 }
@@ -1288,9 +1277,14 @@ static int tcp_nlattr_size(void)
1288 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1); 1277 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1289} 1278}
1290 1279
1291static int tcp_nlattr_tuple_size(void) 1280static unsigned int tcp_nlattr_tuple_size(void)
1292{ 1281{
1293 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1282 static unsigned int size __read_mostly;
1283
1284 if (!size)
1285 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1286
1287 return size;
1294} 1288}
1295#endif 1289#endif
1296 1290
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8af734cd1a94..3a5f727103af 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -73,7 +73,6 @@ static int udp_packet(struct nf_conn *ct,
73 const struct sk_buff *skb, 73 const struct sk_buff *skb,
74 unsigned int dataoff, 74 unsigned int dataoff,
75 enum ip_conntrack_info ctinfo, 75 enum ip_conntrack_info ctinfo,
76 u_int8_t pf,
77 unsigned int *timeouts) 76 unsigned int *timeouts)
78{ 77{
79 /* If we've seen traffic both ways, this is some kind of UDP 78 /* If we've seen traffic both ways, this is some kind of UDP
@@ -99,6 +98,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
99} 98}
100 99
101#ifdef CONFIG_NF_CT_PROTO_UDPLITE 100#ifdef CONFIG_NF_CT_PROTO_UDPLITE
101static void udplite_error_log(const struct sk_buff *skb, struct net *net,
102 u8 pf, const char *msg)
103{
104 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
105}
106
102static int udplite_error(struct net *net, struct nf_conn *tmpl, 107static int udplite_error(struct net *net, struct nf_conn *tmpl,
103 struct sk_buff *skb, 108 struct sk_buff *skb,
104 unsigned int dataoff, 109 unsigned int dataoff,
@@ -112,9 +117,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
112 /* Header is too small? */ 117 /* Header is too small? */
113 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 118 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
114 if (!hdr) { 119 if (!hdr) {
115 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 120 udplite_error_log(skb, net, pf, "short packet");
116 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
117 "nf_ct_udplite: short packet ");
118 return -NF_ACCEPT; 121 return -NF_ACCEPT;
119 } 122 }
120 123
@@ -122,17 +125,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
122 if (cscov == 0) { 125 if (cscov == 0) {
123 cscov = udplen; 126 cscov = udplen;
124 } else if (cscov < sizeof(*hdr) || cscov > udplen) { 127 } else if (cscov < sizeof(*hdr) || cscov > udplen) {
125 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 128 udplite_error_log(skb, net, pf, "invalid checksum coverage");
126 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
127 "nf_ct_udplite: invalid checksum coverage ");
128 return -NF_ACCEPT; 129 return -NF_ACCEPT;
129 } 130 }
130 131
131 /* UDPLITE mandates checksums */ 132 /* UDPLITE mandates checksums */
132 if (!hdr->check) { 133 if (!hdr->check) {
133 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 134 udplite_error_log(skb, net, pf, "checksum missing");
134 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
135 "nf_ct_udplite: checksum missing ");
136 return -NF_ACCEPT; 135 return -NF_ACCEPT;
137 } 136 }
138 137
@@ -140,9 +139,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
140 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 139 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
141 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, 140 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
142 pf)) { 141 pf)) {
143 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 142 udplite_error_log(skb, net, pf, "bad checksum");
144 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
145 "nf_ct_udplite: bad UDPLite checksum ");
146 return -NF_ACCEPT; 143 return -NF_ACCEPT;
147 } 144 }
148 145
@@ -150,6 +147,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
150} 147}
151#endif 148#endif
152 149
150static void udp_error_log(const struct sk_buff *skb, struct net *net,
151 u8 pf, const char *msg)
152{
153 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
154}
155
153static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, 156static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
154 unsigned int dataoff, 157 unsigned int dataoff,
155 u_int8_t pf, 158 u_int8_t pf,
@@ -162,17 +165,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
162 /* Header is too small? */ 165 /* Header is too small? */
163 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 166 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
164 if (hdr == NULL) { 167 if (hdr == NULL) {
165 if (LOG_INVALID(net, IPPROTO_UDP)) 168 udp_error_log(skb, net, pf, "short packet");
166 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
167 "nf_ct_udp: short packet ");
168 return -NF_ACCEPT; 169 return -NF_ACCEPT;
169 } 170 }
170 171
171 /* Truncated/malformed packets */ 172 /* Truncated/malformed packets */
172 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { 173 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
173 if (LOG_INVALID(net, IPPROTO_UDP)) 174 udp_error_log(skb, net, pf, "truncated/malformed packet");
174 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
175 "nf_ct_udp: truncated/malformed packet ");
176 return -NF_ACCEPT; 175 return -NF_ACCEPT;
177 } 176 }
178 177
@@ -186,9 +185,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
186 * FIXME: Source route IP option packets --RR */ 185 * FIXME: Source route IP option packets --RR */
187 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 186 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
188 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { 187 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
189 if (LOG_INVALID(net, IPPROTO_UDP)) 188 udp_error_log(skb, net, pf, "bad checksum");
190 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
191 "nf_ct_udp: bad UDP checksum ");
192 return -NF_ACCEPT; 189 return -NF_ACCEPT;
193 } 190 }
194 191
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index af8345fc4fbd..6c38421e31f9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -542,17 +542,14 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
542 if (nf_nat_proto_remove(ct, data)) 542 if (nf_nat_proto_remove(ct, data))
543 return 1; 543 return 1;
544 544
545 if ((ct->status & IPS_SRC_NAT_DONE) == 0) 545 /* This module is being removed and conntrack has nat null binding.
546 return 0;
547
548 /* This netns is being destroyed, and conntrack has nat null binding.
549 * Remove it from bysource hash, as the table will be freed soon. 546 * Remove it from bysource hash, as the table will be freed soon.
550 * 547 *
551 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() 548 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
552 * will delete entry from already-freed table. 549 * will delete entry from already-freed table.
553 */ 550 */
554 clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 551 if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
555 __nf_nat_cleanup_conntrack(ct); 552 __nf_nat_cleanup_conntrack(ct);
556 553
557 /* don't delete conntrack. Although that would make things a lot 554 /* don't delete conntrack. Although that would make things a lot
558 * simpler, we'd end up flushing all conntracks on nat rmmod. 555 * simpler, we'd end up flushing all conntracks on nat rmmod.
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e84a578dbe35..d76afafdc699 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void)
134} 134}
135 135
136/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ 136/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
137static int warn_set(const char *val, struct kernel_param *kp) 137static int warn_set(const char *val, const struct kernel_param *kp)
138{ 138{
139 printk(KERN_INFO KBUILD_MODNAME 139 printk(KERN_INFO KBUILD_MODNAME
140 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); 140 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 0648cb096bd8..dcb5f6375d9d 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -106,7 +106,7 @@ static int __init nf_nat_irc_init(void)
106} 106}
107 107
108/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ 108/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
109static int warn_set(const char *val, struct kernel_param *kp) 109static int warn_set(const char *val, const struct kernel_param *kp)
110{ 110{
111 printk(KERN_INFO KBUILD_MODNAME 111 printk(KERN_INFO KBUILD_MODNAME
112 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); 112 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 64e1ee091225..d8327b43e4dc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2549,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
2549 case NFT_SET_POL_PERFORMANCE: 2549 case NFT_SET_POL_PERFORMANCE:
2550 if (est.lookup < best.lookup) 2550 if (est.lookup < best.lookup)
2551 break; 2551 break;
2552 if (est.lookup == best.lookup) { 2552 if (est.lookup == best.lookup &&
2553 if (!desc->size) { 2553 est.space < best.space)
2554 if (est.space < best.space) 2554 break;
2555 break;
2556 } else if (est.size < best.size) {
2557 break;
2558 }
2559 }
2560 continue; 2555 continue;
2561 case NFT_SET_POL_MEMORY: 2556 case NFT_SET_POL_MEMORY:
2562 if (!desc->size) { 2557 if (!desc->size) {
@@ -3593,45 +3588,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
3593 return 0; 3588 return 0;
3594} 3589}
3595 3590
3596static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
3597 struct sk_buff *skb, const struct nlmsghdr *nlh,
3598 const struct nlattr * const nla[],
3599 struct netlink_ext_ack *extack)
3600{
3601 u8 genmask = nft_genmask_cur(net);
3602 const struct nft_set *set;
3603 struct nft_ctx ctx;
3604 int err;
3605
3606 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
3607 if (err < 0)
3608 return err;
3609
3610 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
3611 genmask);
3612 if (IS_ERR(set))
3613 return PTR_ERR(set);
3614
3615 if (nlh->nlmsg_flags & NLM_F_DUMP) {
3616 struct netlink_dump_control c = {
3617 .dump = nf_tables_dump_set,
3618 .done = nf_tables_dump_set_done,
3619 };
3620 struct nft_set_dump_ctx *dump_ctx;
3621
3622 dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
3623 if (!dump_ctx)
3624 return -ENOMEM;
3625
3626 dump_ctx->set = set;
3627 dump_ctx->ctx = ctx;
3628
3629 c.data = dump_ctx;
3630 return netlink_dump_start(nlsk, skb, nlh, &c);
3631 }
3632 return -EOPNOTSUPP;
3633}
3634
3635static int nf_tables_fill_setelem_info(struct sk_buff *skb, 3591static int nf_tables_fill_setelem_info(struct sk_buff *skb,
3636 const struct nft_ctx *ctx, u32 seq, 3592 const struct nft_ctx *ctx, u32 seq,
3637 u32 portid, int event, u16 flags, 3593 u32 portid, int event, u16 flags,
@@ -3677,6 +3633,135 @@ nla_put_failure:
3677 return -1; 3633 return -1;
3678} 3634}
3679 3635
3636static int nft_setelem_parse_flags(const struct nft_set *set,
3637 const struct nlattr *attr, u32 *flags)
3638{
3639 if (attr == NULL)
3640 return 0;
3641
3642 *flags = ntohl(nla_get_be32(attr));
3643 if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
3644 return -EINVAL;
3645 if (!(set->flags & NFT_SET_INTERVAL) &&
3646 *flags & NFT_SET_ELEM_INTERVAL_END)
3647 return -EINVAL;
3648
3649 return 0;
3650}
3651
3652static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3653 const struct nlattr *attr)
3654{
3655 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
3656 const struct nft_set_ext *ext;
3657 struct nft_data_desc desc;
3658 struct nft_set_elem elem;
3659 struct sk_buff *skb;
3660 uint32_t flags = 0;
3661 void *priv;
3662 int err;
3663
3664 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
3665 nft_set_elem_policy, NULL);
3666 if (err < 0)
3667 return err;
3668
3669 if (!nla[NFTA_SET_ELEM_KEY])
3670 return -EINVAL;
3671
3672 err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
3673 if (err < 0)
3674 return err;
3675
3676 err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
3677 nla[NFTA_SET_ELEM_KEY]);
3678 if (err < 0)
3679 return err;
3680
3681 err = -EINVAL;
3682 if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
3683 return err;
3684
3685 priv = set->ops->get(ctx->net, set, &elem, flags);
3686 if (IS_ERR(priv))
3687 return PTR_ERR(priv);
3688
3689 elem.priv = priv;
3690 ext = nft_set_elem_ext(set, &elem);
3691
3692 err = -ENOMEM;
3693 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3694 if (skb == NULL)
3695 goto err1;
3696
3697 err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
3698 NFT_MSG_NEWSETELEM, 0, set, &elem);
3699 if (err < 0)
3700 goto err2;
3701
3702 err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT);
3703 /* This avoids a loop in nfnetlink. */
3704 if (err < 0)
3705 goto err1;
3706
3707 return 0;
3708err2:
3709 kfree_skb(skb);
3710err1:
3711 /* this avoids a loop in nfnetlink. */
3712 return err == -EAGAIN ? -ENOBUFS : err;
3713}
3714
3715static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
3716 struct sk_buff *skb, const struct nlmsghdr *nlh,
3717 const struct nlattr * const nla[],
3718 struct netlink_ext_ack *extack)
3719{
3720 u8 genmask = nft_genmask_cur(net);
3721 struct nft_set *set;
3722 struct nlattr *attr;
3723 struct nft_ctx ctx;
3724 int rem, err = 0;
3725
3726 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
3727 if (err < 0)
3728 return err;
3729
3730 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
3731 genmask);
3732 if (IS_ERR(set))
3733 return PTR_ERR(set);
3734
3735 if (nlh->nlmsg_flags & NLM_F_DUMP) {
3736 struct netlink_dump_control c = {
3737 .dump = nf_tables_dump_set,
3738 .done = nf_tables_dump_set_done,
3739 };
3740 struct nft_set_dump_ctx *dump_ctx;
3741
3742 dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
3743 if (!dump_ctx)
3744 return -ENOMEM;
3745
3746 dump_ctx->set = set;
3747 dump_ctx->ctx = ctx;
3748
3749 c.data = dump_ctx;
3750 return netlink_dump_start(nlsk, skb, nlh, &c);
3751 }
3752
3753 if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
3754 return -EINVAL;
3755
3756 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
3757 err = nft_get_set_elem(&ctx, set, attr);
3758 if (err < 0)
3759 break;
3760 }
3761
3762 return err;
3763}
3764
3680static void nf_tables_setelem_notify(const struct nft_ctx *ctx, 3765static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
3681 const struct nft_set *set, 3766 const struct nft_set *set,
3682 const struct nft_set_elem *elem, 3767 const struct nft_set_elem *elem,
@@ -3777,22 +3862,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
3777 kfree(elem); 3862 kfree(elem);
3778} 3863}
3779 3864
3780static int nft_setelem_parse_flags(const struct nft_set *set,
3781 const struct nlattr *attr, u32 *flags)
3782{
3783 if (attr == NULL)
3784 return 0;
3785
3786 *flags = ntohl(nla_get_be32(attr));
3787 if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
3788 return -EINVAL;
3789 if (!(set->flags & NFT_SET_INTERVAL) &&
3790 *flags & NFT_SET_ELEM_INTERVAL_END)
3791 return -EINVAL;
3792
3793 return 0;
3794}
3795
3796static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, 3865static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3797 const struct nlattr *attr, u32 nlmsg_flags) 3866 const struct nlattr *attr, u32 nlmsg_flags)
3798{ 3867{
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index cad6498f10b0..e5afab86381c 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -151,7 +151,7 @@ instance_put(struct nfulnl_instance *inst)
151 call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); 151 call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
152} 152}
153 153
154static void nfulnl_timer(unsigned long data); 154static void nfulnl_timer(struct timer_list *t);
155 155
156static struct nfulnl_instance * 156static struct nfulnl_instance *
157instance_create(struct net *net, u_int16_t group_num, 157instance_create(struct net *net, u_int16_t group_num,
@@ -184,7 +184,7 @@ instance_create(struct net *net, u_int16_t group_num,
184 /* needs to be two, since we _put() after creation */ 184 /* needs to be two, since we _put() after creation */
185 refcount_set(&inst->use, 2); 185 refcount_set(&inst->use, 2);
186 186
187 setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); 187 timer_setup(&inst->timer, nfulnl_timer, 0);
188 188
189 inst->net = get_net(net); 189 inst->net = get_net(net);
190 inst->peer_user_ns = user_ns; 190 inst->peer_user_ns = user_ns;
@@ -377,9 +377,9 @@ __nfulnl_flush(struct nfulnl_instance *inst)
377} 377}
378 378
379static void 379static void
380nfulnl_timer(unsigned long data) 380nfulnl_timer(struct timer_list *t)
381{ 381{
382 struct nfulnl_instance *inst = (struct nfulnl_instance *)data; 382 struct nfulnl_instance *inst = from_timer(inst, t, timer);
383 383
384 spin_lock_bh(&inst->lock); 384 spin_lock_bh(&inst->lock);
385 if (inst->skb) 385 if (inst->skb)
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0975d7dd6f..2647b895f4b0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
312 [NFTA_CT_SREG] = { .type = NLA_U32 }, 312 [NFTA_CT_SREG] = { .type = NLA_U32 },
313}; 313};
314 314
315static int nft_ct_netns_get(struct net *net, uint8_t family)
316{
317 int err;
318
319 if (family == NFPROTO_INET) {
320 err = nf_ct_netns_get(net, NFPROTO_IPV4);
321 if (err < 0)
322 goto err1;
323 err = nf_ct_netns_get(net, NFPROTO_IPV6);
324 if (err < 0)
325 goto err2;
326 } else {
327 err = nf_ct_netns_get(net, family);
328 if (err < 0)
329 goto err1;
330 }
331 return 0;
332
333err2:
334 nf_ct_netns_put(net, NFPROTO_IPV4);
335err1:
336 return err;
337}
338
339static void nft_ct_netns_put(struct net *net, uint8_t family)
340{
341 if (family == NFPROTO_INET) {
342 nf_ct_netns_put(net, NFPROTO_IPV4);
343 nf_ct_netns_put(net, NFPROTO_IPV6);
344 } else
345 nf_ct_netns_put(net, family);
346}
347
348#ifdef CONFIG_NF_CONNTRACK_ZONES 315#ifdef CONFIG_NF_CONNTRACK_ZONES
349static void nft_ct_tmpl_put_pcpu(void) 316static void nft_ct_tmpl_put_pcpu(void)
350{ 317{
@@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
489 if (err < 0) 456 if (err < 0)
490 return err; 457 return err;
491 458
492 err = nft_ct_netns_get(ctx->net, ctx->afi->family); 459 err = nf_ct_netns_get(ctx->net, ctx->afi->family);
493 if (err < 0) 460 if (err < 0)
494 return err; 461 return err;
495 462
@@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
583 if (err < 0) 550 if (err < 0)
584 goto err1; 551 goto err1;
585 552
586 err = nft_ct_netns_get(ctx->net, ctx->afi->family); 553 err = nf_ct_netns_get(ctx->net, ctx->afi->family);
587 if (err < 0) 554 if (err < 0)
588 goto err1; 555 goto err1;
589 556
@@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
606 struct nft_ct *priv = nft_expr_priv(expr); 573 struct nft_ct *priv = nft_expr_priv(expr);
607 574
608 __nft_ct_set_destroy(ctx, priv); 575 __nft_ct_set_destroy(ctx, priv);
609 nft_ct_netns_put(ctx->net, ctx->afi->family); 576 nf_ct_netns_put(ctx->net, ctx->afi->family);
610} 577}
611 578
612static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) 579static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 734989c40579..45fb2752fb63 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
106 return NULL; 106 return NULL;
107} 107}
108 108
109static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
110 const struct nft_set_elem *elem, unsigned int flags)
111{
112 const struct nft_bitmap *priv = nft_set_priv(set);
113 u8 genmask = nft_genmask_cur(net);
114 struct nft_bitmap_elem *be;
115
116 list_for_each_entry_rcu(be, &priv->list, head) {
117 if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) ||
118 !nft_set_elem_active(&be->ext, genmask))
119 continue;
120
121 return be;
122 }
123 return ERR_PTR(-ENOENT);
124}
125
109static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, 126static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
110 const struct nft_set_elem *elem, 127 const struct nft_set_elem *elem,
111 struct nft_set_ext **ext) 128 struct nft_set_ext **ext)
@@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
294 .activate = nft_bitmap_activate, 311 .activate = nft_bitmap_activate,
295 .lookup = nft_bitmap_lookup, 312 .lookup = nft_bitmap_lookup,
296 .walk = nft_bitmap_walk, 313 .walk = nft_bitmap_walk,
314 .get = nft_bitmap_get,
297}; 315};
298 316
299static struct nft_set_type nft_bitmap_type __read_mostly = { 317static struct nft_set_type nft_bitmap_type __read_mostly = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 9c0d5a7ce5f9..f8166c1d5430 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
95 return !!he; 95 return !!he;
96} 96}
97 97
98static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
99 const struct nft_set_elem *elem, unsigned int flags)
100{
101 struct nft_rhash *priv = nft_set_priv(set);
102 struct nft_rhash_elem *he;
103 struct nft_rhash_cmp_arg arg = {
104 .genmask = nft_genmask_cur(net),
105 .set = set,
106 .key = elem->key.val.data,
107 };
108
109 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
110 if (he != NULL)
111 return he;
112
113 return ERR_PTR(-ENOENT);
114}
115
98static bool nft_rhash_update(struct nft_set *set, const u32 *key, 116static bool nft_rhash_update(struct nft_set *set, const u32 *key,
99 void *(*new)(struct nft_set *, 117 void *(*new)(struct nft_set *,
100 const struct nft_expr *, 118 const struct nft_expr *,
@@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
409 return false; 427 return false;
410} 428}
411 429
430static void *nft_hash_get(const struct net *net, const struct nft_set *set,
431 const struct nft_set_elem *elem, unsigned int flags)
432{
433 struct nft_hash *priv = nft_set_priv(set);
434 u8 genmask = nft_genmask_cur(net);
435 struct nft_hash_elem *he;
436 u32 hash;
437
438 hash = jhash(elem->key.val.data, set->klen, priv->seed);
439 hash = reciprocal_scale(hash, priv->buckets);
440 hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
441 if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
442 nft_set_elem_active(&he->ext, genmask))
443 return he;
444 }
445 return ERR_PTR(-ENOENT);
446}
447
412/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ 448/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
413static inline u32 nft_hash_key(const u32 *key, u32 klen) 449static inline u32 nft_hash_key(const u32 *key, u32 klen)
414{ 450{
@@ -494,7 +530,7 @@ static void *nft_hash_deactivate(const struct net *net,
494 hash = reciprocal_scale(hash, priv->buckets); 530 hash = reciprocal_scale(hash, priv->buckets);
495 hlist_for_each_entry(he, &priv->table[hash], node) { 531 hlist_for_each_entry(he, &priv->table[hash], node) {
496 if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, 532 if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
497 set->klen) || 533 set->klen) &&
498 nft_set_elem_active(&he->ext, genmask)) { 534 nft_set_elem_active(&he->ext, genmask)) {
499 nft_set_elem_change_active(net, set, &he->ext); 535 nft_set_elem_change_active(net, set, &he->ext);
500 return he; 536 return he;
@@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = {
600 .lookup = nft_rhash_lookup, 636 .lookup = nft_rhash_lookup,
601 .update = nft_rhash_update, 637 .update = nft_rhash_update,
602 .walk = nft_rhash_walk, 638 .walk = nft_rhash_walk,
639 .get = nft_rhash_get,
603 .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, 640 .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
604}; 641};
605 642
@@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
617 .remove = nft_hash_remove, 654 .remove = nft_hash_remove,
618 .lookup = nft_hash_lookup, 655 .lookup = nft_hash_lookup,
619 .walk = nft_hash_walk, 656 .walk = nft_hash_walk,
657 .get = nft_hash_get,
620 .features = NFT_SET_MAP | NFT_SET_OBJECT, 658 .features = NFT_SET_MAP | NFT_SET_OBJECT,
621}; 659};
622 660
@@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
634 .remove = nft_hash_remove, 672 .remove = nft_hash_remove,
635 .lookup = nft_hash_lookup_fast, 673 .lookup = nft_hash_lookup_fast,
636 .walk = nft_hash_walk, 674 .walk = nft_hash_walk,
675 .get = nft_hash_get,
637 .features = NFT_SET_MAP | NFT_SET_OBJECT, 676 .features = NFT_SET_MAP | NFT_SET_OBJECT,
638}; 677};
639 678
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d83a4ec5900d..e6f08bc5f359 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -113,6 +113,78 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
113 return ret; 113 return ret;
114} 114}
115 115
116static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
117 const u32 *key, struct nft_rbtree_elem **elem,
118 unsigned int seq, unsigned int flags, u8 genmask)
119{
120 struct nft_rbtree_elem *rbe, *interval = NULL;
121 struct nft_rbtree *priv = nft_set_priv(set);
122 const struct rb_node *parent;
123 const void *this;
124 int d;
125
126 parent = rcu_dereference_raw(priv->root.rb_node);
127 while (parent != NULL) {
128 if (read_seqcount_retry(&priv->count, seq))
129 return false;
130
131 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
132
133 this = nft_set_ext_key(&rbe->ext);
134 d = memcmp(this, key, set->klen);
135 if (d < 0) {
136 parent = rcu_dereference_raw(parent->rb_left);
137 interval = rbe;
138 } else if (d > 0) {
139 parent = rcu_dereference_raw(parent->rb_right);
140 } else {
141 if (!nft_set_elem_active(&rbe->ext, genmask))
142 parent = rcu_dereference_raw(parent->rb_left);
143
144 if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
145 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
146 (flags & NFT_SET_ELEM_INTERVAL_END)) {
147 *elem = rbe;
148 return true;
149 }
150 return false;
151 }
152 }
153
154 if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
155 nft_set_elem_active(&interval->ext, genmask) &&
156 !nft_rbtree_interval_end(interval)) {
157 *elem = interval;
158 return true;
159 }
160
161 return false;
162}
163
164static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
165 const struct nft_set_elem *elem, unsigned int flags)
166{
167 struct nft_rbtree *priv = nft_set_priv(set);
168 unsigned int seq = read_seqcount_begin(&priv->count);
169 struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
170 const u32 *key = (const u32 *)&elem->key.val;
171 u8 genmask = nft_genmask_cur(net);
172 bool ret;
173
174 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
175 if (ret || !read_seqcount_retry(&priv->count, seq))
176 return rbe;
177
178 read_lock_bh(&priv->lock);
179 seq = read_seqcount_begin(&priv->count);
180 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
181 if (!ret)
182 rbe = ERR_PTR(-ENOENT);
183 read_unlock_bh(&priv->lock);
184
185 return rbe;
186}
187
116static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, 188static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
117 struct nft_rbtree_elem *new, 189 struct nft_rbtree_elem *new,
118 struct nft_set_ext **ext) 190 struct nft_set_ext **ext)
@@ -336,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
336 .activate = nft_rbtree_activate, 408 .activate = nft_rbtree_activate,
337 .lookup = nft_rbtree_lookup, 409 .lookup = nft_rbtree_lookup,
338 .walk = nft_rbtree_walk, 410 .walk = nft_rbtree_walk,
411 .get = nft_rbtree_get,
339 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, 412 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
340}; 413};
341 414
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d8571f414208..a77dd514297c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table,
1153 int *error) 1153 int *error)
1154{ 1154{
1155 struct xt_table_info *private; 1155 struct xt_table_info *private;
1156 unsigned int cpu;
1156 int ret; 1157 int ret;
1157 1158
1158 ret = xt_jumpstack_alloc(newinfo); 1159 ret = xt_jumpstack_alloc(newinfo);
@@ -1182,14 +1183,28 @@ xt_replace_table(struct xt_table *table,
1182 smp_wmb(); 1183 smp_wmb();
1183 table->private = newinfo; 1184 table->private = newinfo;
1184 1185
1186 /* make sure all cpus see new ->private value */
1187 smp_wmb();
1188
1185 /* 1189 /*
1186 * Even though table entries have now been swapped, other CPU's 1190 * Even though table entries have now been swapped, other CPU's
1187 * may still be using the old entries. This is okay, because 1191 * may still be using the old entries...
1188 * resynchronization happens because of the locking done
1189 * during the get_counters() routine.
1190 */ 1192 */
1191 local_bh_enable(); 1193 local_bh_enable();
1192 1194
1195 /* ... so wait for even xt_recseq on all cpus */
1196 for_each_possible_cpu(cpu) {
1197 seqcount_t *s = &per_cpu(xt_recseq, cpu);
1198 u32 seq = raw_read_seqcount(s);
1199
1200 if (seq & 1) {
1201 do {
1202 cond_resched();
1203 cpu_relax();
1204 } while (seq == raw_read_seqcount(s));
1205 }
1206 }
1207
1193#ifdef CONFIG_AUDIT 1208#ifdef CONFIG_AUDIT
1194 if (audit_enabled) { 1209 if (audit_enabled) {
1195 audit_log(current->audit_context, GFP_KERNEL, 1210 audit_log(current->audit_context, GFP_KERNEL,
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index daf45da448fa..ee3421ad108d 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -107,9 +107,9 @@ static void idletimer_tg_work(struct work_struct *work)
107 sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); 107 sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
108} 108}
109 109
110static void idletimer_tg_expired(unsigned long data) 110static void idletimer_tg_expired(struct timer_list *t)
111{ 111{
112 struct idletimer_tg *timer = (struct idletimer_tg *) data; 112 struct idletimer_tg *timer = from_timer(timer, t, timer);
113 113
114 pr_debug("timer %s expired\n", timer->attr.attr.name); 114 pr_debug("timer %s expired\n", timer->attr.attr.name);
115 115
@@ -143,8 +143,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
143 143
144 list_add(&info->timer->entry, &idletimer_tg_list); 144 list_add(&info->timer->entry, &idletimer_tg_list);
145 145
146 setup_timer(&info->timer->timer, idletimer_tg_expired, 146 timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
147 (unsigned long) info->timer);
148 info->timer->refcnt = 1; 147 info->timer->refcnt = 1;
149 148
150 mod_timer(&info->timer->timer, 149 mod_timer(&info->timer->timer,
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 3ba31c194cce..0971634e5444 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -85,9 +85,10 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
85 return XT_CONTINUE; 85 return XT_CONTINUE;
86} 86}
87 87
88static void led_timeout_callback(unsigned long data) 88static void led_timeout_callback(struct timer_list *t)
89{ 89{
90 struct xt_led_info_internal *ledinternal = (struct xt_led_info_internal *)data; 90 struct xt_led_info_internal *ledinternal = from_timer(ledinternal, t,
91 timer);
91 92
92 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); 93 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
93} 94}
@@ -143,8 +144,7 @@ static int led_tg_check(const struct xt_tgchk_param *par)
143 144
144 /* See if we need to set up a timer */ 145 /* See if we need to set up a timer */
145 if (ledinfo->delay > 0) 146 if (ledinfo->delay > 0)
146 setup_timer(&ledinternal->timer, led_timeout_callback, 147 timer_setup(&ledinternal->timer, led_timeout_callback, 0);
147 (unsigned long)ledinternal);
148 148
149 list_add_tail(&ledinternal->list, &xt_led_triggers); 149 list_add_tail(&ledinternal->list, &xt_led_triggers);
150 150
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 29123934887b..041da0d9c06f 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -56,7 +56,7 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
56 int retval, fd; 56 int retval, fd;
57 57
58 set_fs(KERNEL_DS); 58 set_fs(KERNEL_DS);
59 fd = bpf_obj_get_user(path); 59 fd = bpf_obj_get_user(path, 0);
60 set_fs(oldfs); 60 set_fs(oldfs);
61 if (fd < 0) 61 if (fd < 0)
62 return fd; 62 return fd;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index ffa8eec980e9..a6214f235333 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -46,7 +46,6 @@
46struct xt_connlimit_conn { 46struct xt_connlimit_conn {
47 struct hlist_node node; 47 struct hlist_node node;
48 struct nf_conntrack_tuple tuple; 48 struct nf_conntrack_tuple tuple;
49 union nf_inet_addr addr;
50}; 49};
51 50
52struct xt_connlimit_rb { 51struct xt_connlimit_rb {
@@ -72,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr)
72} 71}
73 72
74static inline unsigned int 73static inline unsigned int
75connlimit_iphash6(const union nf_inet_addr *addr, 74connlimit_iphash6(const union nf_inet_addr *addr)
76 const union nf_inet_addr *mask)
77{ 75{
78 union nf_inet_addr res; 76 return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
79 unsigned int i;
80
81 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
82 res.ip6[i] = addr->ip6[i] & mask->ip6[i];
83
84 return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
85 connlimit_rnd) % CONNLIMIT_SLOTS; 77 connlimit_rnd) % CONNLIMIT_SLOTS;
86} 78}
87 79
@@ -95,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn)
95} 87}
96 88
97static int 89static int
98same_source_net(const union nf_inet_addr *addr, 90same_source(const union nf_inet_addr *addr,
99 const union nf_inet_addr *mask, 91 const union nf_inet_addr *u3, u_int8_t family)
100 const union nf_inet_addr *u3, u_int8_t family)
101{ 92{
102 if (family == NFPROTO_IPV4) { 93 if (family == NFPROTO_IPV4)
103 return ntohl(addr->ip & mask->ip) - 94 return ntohl(addr->ip) - ntohl(u3->ip);
104 ntohl(u3->ip & mask->ip);
105 } else {
106 union nf_inet_addr lh, rh;
107 unsigned int i;
108
109 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
110 lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
111 rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
112 }
113 95
114 return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)); 96 return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
115 }
116} 97}
117 98
118static bool add_hlist(struct hlist_head *head, 99static bool add_hlist(struct hlist_head *head,
@@ -125,7 +106,6 @@ static bool add_hlist(struct hlist_head *head,
125 if (conn == NULL) 106 if (conn == NULL)
126 return false; 107 return false;
127 conn->tuple = *tuple; 108 conn->tuple = *tuple;
128 conn->addr = *addr;
129 hlist_add_head(&conn->node, head); 109 hlist_add_head(&conn->node, head);
130 return true; 110 return true;
131} 111}
@@ -196,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root,
196static unsigned int 176static unsigned int
197count_tree(struct net *net, struct rb_root *root, 177count_tree(struct net *net, struct rb_root *root,
198 const struct nf_conntrack_tuple *tuple, 178 const struct nf_conntrack_tuple *tuple,
199 const union nf_inet_addr *addr, const union nf_inet_addr *mask, 179 const union nf_inet_addr *addr,
200 u8 family, const struct nf_conntrack_zone *zone) 180 u8 family, const struct nf_conntrack_zone *zone)
201{ 181{
202 struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; 182 struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
@@ -217,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root,
217 rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); 197 rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
218 198
219 parent = *rbnode; 199 parent = *rbnode;
220 diff = same_source_net(addr, mask, &rbconn->addr, family); 200 diff = same_source(addr, &rbconn->addr, family);
221 if (diff < 0) { 201 if (diff < 0) {
222 rbnode = &((*rbnode)->rb_left); 202 rbnode = &((*rbnode)->rb_left);
223 } else if (diff > 0) { 203 } else if (diff > 0) {
@@ -270,7 +250,6 @@ count_tree(struct net *net, struct rb_root *root,
270 } 250 }
271 251
272 conn->tuple = *tuple; 252 conn->tuple = *tuple;
273 conn->addr = *addr;
274 rbconn->addr = *addr; 253 rbconn->addr = *addr;
275 254
276 INIT_HLIST_HEAD(&rbconn->hhead); 255 INIT_HLIST_HEAD(&rbconn->hhead);
@@ -285,7 +264,6 @@ static int count_them(struct net *net,
285 struct xt_connlimit_data *data, 264 struct xt_connlimit_data *data,
286 const struct nf_conntrack_tuple *tuple, 265 const struct nf_conntrack_tuple *tuple,
287 const union nf_inet_addr *addr, 266 const union nf_inet_addr *addr,
288 const union nf_inet_addr *mask,
289 u_int8_t family, 267 u_int8_t family,
290 const struct nf_conntrack_zone *zone) 268 const struct nf_conntrack_zone *zone)
291{ 269{
@@ -294,14 +272,14 @@ static int count_them(struct net *net,
294 u32 hash; 272 u32 hash;
295 273
296 if (family == NFPROTO_IPV6) 274 if (family == NFPROTO_IPV6)
297 hash = connlimit_iphash6(addr, mask); 275 hash = connlimit_iphash6(addr);
298 else 276 else
299 hash = connlimit_iphash(addr->ip & mask->ip); 277 hash = connlimit_iphash(addr->ip);
300 root = &data->climit_root[hash]; 278 root = &data->climit_root[hash];
301 279
302 spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); 280 spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
303 281
304 count = count_tree(net, root, tuple, addr, mask, family, zone); 282 count = count_tree(net, root, tuple, addr, family, zone);
305 283
306 spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); 284 spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
307 285
@@ -332,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
332 310
333 if (xt_family(par) == NFPROTO_IPV6) { 311 if (xt_family(par) == NFPROTO_IPV6) {
334 const struct ipv6hdr *iph = ipv6_hdr(skb); 312 const struct ipv6hdr *iph = ipv6_hdr(skb);
313 unsigned int i;
314
335 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? 315 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
336 &iph->daddr : &iph->saddr, sizeof(addr.ip6)); 316 &iph->daddr : &iph->saddr, sizeof(addr.ip6));
317
318 for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
319 addr.ip6[i] &= info->mask.ip6[i];
337 } else { 320 } else {
338 const struct iphdr *iph = ip_hdr(skb); 321 const struct iphdr *iph = ip_hdr(skb);
339 addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? 322 addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
340 iph->daddr : iph->saddr; 323 iph->daddr : iph->saddr;
324
325 addr.ip &= info->mask.ip;
341 } 326 }
342 327
343 connections = count_them(net, info->data, tuple_ptr, &addr, 328 connections = count_them(net, info->data, tuple_ptr, &addr,
344 &info->mask, xt_family(par), zone); 329 xt_family(par), zone);
345 if (connections == 0) 330 if (connections == 0)
346 /* kmalloc failed, drop it entirely */ 331 /* kmalloc failed, drop it entirely */
347 goto hotdrop; 332 goto hotdrop;
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index d0f38bc9af6d..ac709f0f197b 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -87,7 +87,7 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
87 struct list_head *i = s; 87 struct list_head *i = s;
88 struct netlbl_af4list *n = __af4list_entry(s); 88 struct netlbl_af4list *n = __af4list_entry(s);
89 while (i != h && !n->valid) { 89 while (i != h && !n->valid) {
90 i = rcu_dereference(i->next); 90 i = rcu_dereference(list_next_rcu(i));
91 n = __af4list_entry(i); 91 n = __af4list_entry(i);
92 } 92 }
93 return n; 93 return n;
@@ -154,7 +154,7 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
154 struct list_head *i = s; 154 struct list_head *i = s;
155 struct netlbl_af6list *n = __af6list_entry(s); 155 struct netlbl_af6list *n = __af6list_entry(s);
156 while (i != h && !n->valid) { 156 while (i != h && !n->valid) {
157 i = rcu_dereference(i->next); 157 i = rcu_dereference(list_next_rcu(i));
158 n = __af6list_entry(i); 158 n = __af6list_entry(i);
159 } 159 }
160 return n; 160 return n;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b93148e8e9fb..b9e0ee4e22f5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -128,7 +128,6 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
128}; 128};
129 129
130static int netlink_dump(struct sock *sk); 130static int netlink_dump(struct sock *sk);
131static void netlink_skb_destructor(struct sk_buff *skb);
132 131
133/* nl_table locking explained: 132/* nl_table locking explained:
134 * Lookup and traversal are protected with an RCU read-side lock. Insertion 133 * Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -2136,7 +2135,7 @@ static int netlink_dump(struct sock *sk)
2136 struct sk_buff *skb = NULL; 2135 struct sk_buff *skb = NULL;
2137 struct nlmsghdr *nlh; 2136 struct nlmsghdr *nlh;
2138 struct module *module; 2137 struct module *module;
2139 int len, err = -ENOBUFS; 2138 int err = -ENOBUFS;
2140 int alloc_min_size; 2139 int alloc_min_size;
2141 int alloc_size; 2140 int alloc_size;
2142 2141
@@ -2183,9 +2182,11 @@ static int netlink_dump(struct sock *sk)
2183 skb_reserve(skb, skb_tailroom(skb) - alloc_size); 2182 skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2184 netlink_skb_set_owner_r(skb, sk); 2183 netlink_skb_set_owner_r(skb, sk);
2185 2184
2186 len = cb->dump(skb, cb); 2185 if (nlk->dump_done_errno > 0)
2186 nlk->dump_done_errno = cb->dump(skb, cb);
2187 2187
2188 if (len > 0) { 2188 if (nlk->dump_done_errno > 0 ||
2189 skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
2189 mutex_unlock(nlk->cb_mutex); 2190 mutex_unlock(nlk->cb_mutex);
2190 2191
2191 if (sk_filter(sk, skb)) 2192 if (sk_filter(sk, skb))
@@ -2195,13 +2196,15 @@ static int netlink_dump(struct sock *sk)
2195 return 0; 2196 return 0;
2196 } 2197 }
2197 2198
2198 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 2199 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
2199 if (!nlh) 2200 sizeof(nlk->dump_done_errno), NLM_F_MULTI);
2201 if (WARN_ON(!nlh))
2200 goto errout_skb; 2202 goto errout_skb;
2201 2203
2202 nl_dump_check_consistent(cb, nlh); 2204 nl_dump_check_consistent(cb, nlh);
2203 2205
2204 memcpy(nlmsg_data(nlh), &len, sizeof(len)); 2206 memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
2207 sizeof(nlk->dump_done_errno));
2205 2208
2206 if (sk_filter(sk, skb)) 2209 if (sk_filter(sk, skb))
2207 kfree_skb(skb); 2210 kfree_skb(skb);
@@ -2273,6 +2276,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2273 } 2276 }
2274 2277
2275 nlk->cb_running = true; 2278 nlk->cb_running = true;
2279 nlk->dump_done_errno = INT_MAX;
2276 2280
2277 mutex_unlock(nlk->cb_mutex); 2281 mutex_unlock(nlk->cb_mutex);
2278 2282
@@ -2313,17 +2317,16 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
2313 * requests to cap the error message, and get extra error data if 2317 * requests to cap the error message, and get extra error data if
2314 * requested. 2318 * requested.
2315 */ 2319 */
2320 if (nlk_has_extack && extack && extack->_msg)
2321 tlvlen += nla_total_size(strlen(extack->_msg) + 1);
2322
2316 if (err) { 2323 if (err) {
2317 if (!(nlk->flags & NETLINK_F_CAP_ACK)) 2324 if (!(nlk->flags & NETLINK_F_CAP_ACK))
2318 payload += nlmsg_len(nlh); 2325 payload += nlmsg_len(nlh);
2319 else 2326 else
2320 flags |= NLM_F_CAPPED; 2327 flags |= NLM_F_CAPPED;
2321 if (nlk_has_extack && extack) { 2328 if (nlk_has_extack && extack && extack->bad_attr)
2322 if (extack->_msg) 2329 tlvlen += nla_total_size(sizeof(u32));
2323 tlvlen += nla_total_size(strlen(extack->_msg) + 1);
2324 if (extack->bad_attr)
2325 tlvlen += nla_total_size(sizeof(u32));
2326 }
2327 } else { 2330 } else {
2328 flags |= NLM_F_CAPPED; 2331 flags |= NLM_F_CAPPED;
2329 2332
@@ -2336,16 +2339,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
2336 2339
2337 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); 2340 skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
2338 if (!skb) { 2341 if (!skb) {
2339 struct sock *sk; 2342 NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
2340 2343 NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk);
2341 sk = netlink_lookup(sock_net(in_skb->sk),
2342 in_skb->sk->sk_protocol,
2343 NETLINK_CB(in_skb).portid);
2344 if (sk) {
2345 sk->sk_err = ENOBUFS;
2346 sk->sk_error_report(sk);
2347 sock_put(sk);
2348 }
2349 return; 2344 return;
2350 } 2345 }
2351 2346
@@ -2356,10 +2351,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
2356 memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); 2351 memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
2357 2352
2358 if (nlk_has_extack && extack) { 2353 if (nlk_has_extack && extack) {
2354 if (extack->_msg) {
2355 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
2356 extack->_msg));
2357 }
2359 if (err) { 2358 if (err) {
2360 if (extack->_msg)
2361 WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
2362 extack->_msg));
2363 if (extack->bad_attr && 2359 if (extack->bad_attr &&
2364 !WARN_ON((u8 *)extack->bad_attr < in_skb->data || 2360 !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
2365 (u8 *)extack->bad_attr >= in_skb->data + 2361 (u8 *)extack->bad_attr >= in_skb->data +
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 028188597eaa..962de7b3c023 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -34,6 +34,7 @@ struct netlink_sock {
34 wait_queue_head_t wait; 34 wait_queue_head_t wait;
35 bool bound; 35 bool bound;
36 bool cb_running; 36 bool cb_running;
37 int dump_done_errno;
37 struct netlink_callback cb; 38 struct netlink_callback cb;
38 struct mutex *cb_mutex; 39 struct mutex *cb_mutex;
39 struct mutex cb_def_mutex; 40 struct mutex cb_def_mutex;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ebf16f7f9089..7ed9d4422a73 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -241,9 +241,9 @@ void nr_destroy_socket(struct sock *);
241/* 241/*
242 * Handler for deferred kills. 242 * Handler for deferred kills.
243 */ 243 */
244static void nr_destroy_timer(unsigned long data) 244static void nr_destroy_timer(struct timer_list *t)
245{ 245{
246 struct sock *sk=(struct sock *)data; 246 struct sock *sk = from_timer(sk, t, sk_timer);
247 bh_lock_sock(sk); 247 bh_lock_sock(sk);
248 sock_hold(sk); 248 sock_hold(sk);
249 nr_destroy_socket(sk); 249 nr_destroy_socket(sk);
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 80dbd0beb516..fbfdae452ff9 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -125,7 +125,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
125 125
126 case NR_DISCREQ: 126 case NR_DISCREQ:
127 nr_write_internal(sk, NR_DISCACK); 127 nr_write_internal(sk, NR_DISCACK);
128 128 /* fall through */
129 case NR_DISCACK: 129 case NR_DISCACK:
130 nr_disconnect(sk, 0); 130 nr_disconnect(sk, 0);
131 break; 131 break;
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 989ae647825e..215ad22a9647 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -15,7 +15,7 @@
15#include <net/netrom.h> 15#include <net/netrom.h>
16#include <linux/init.h> 16#include <linux/init.h>
17 17
18static void nr_loopback_timer(unsigned long); 18static void nr_loopback_timer(struct timer_list *);
19 19
20static struct sk_buff_head loopback_queue; 20static struct sk_buff_head loopback_queue;
21static DEFINE_TIMER(loopback_timer, nr_loopback_timer); 21static DEFINE_TIMER(loopback_timer, nr_loopback_timer);
@@ -48,7 +48,7 @@ int nr_loopback_queue(struct sk_buff *skb)
48 return 1; 48 return 1;
49} 49}
50 50
51static void nr_loopback_timer(unsigned long param) 51static void nr_loopback_timer(struct timer_list *unused)
52{ 52{
53 struct sk_buff *skb; 53 struct sk_buff *skb;
54 ax25_address *nr_dest; 54 ax25_address *nr_dest;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 0c59354e280e..75e6ba970fde 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
80 80
81static void nr_remove_neigh(struct nr_neigh *); 81static void nr_remove_neigh(struct nr_neigh *);
82 82
83/* re-sort the routes in quality order. */
84static void re_sort_routes(struct nr_node *nr_node, int x, int y)
85{
86 if (nr_node->routes[y].quality > nr_node->routes[x].quality) {
87 if (nr_node->which == x)
88 nr_node->which = y;
89 else if (nr_node->which == y)
90 nr_node->which = x;
91
92 swap(nr_node->routes[x], nr_node->routes[y]);
93 }
94}
95
83/* 96/*
84 * Add a new route to a node, and in the process add the node and the 97 * Add a new route to a node, and in the process add the node and the
85 * neighbour if it is new. 98 * neighbour if it is new.
@@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
90{ 103{
91 struct nr_node *nr_node; 104 struct nr_node *nr_node;
92 struct nr_neigh *nr_neigh; 105 struct nr_neigh *nr_neigh;
93 struct nr_route nr_route;
94 int i, found; 106 int i, found;
95 struct net_device *odev; 107 struct net_device *odev;
96 108
@@ -251,49 +263,11 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
251 /* Now re-sort the routes in quality order */ 263 /* Now re-sort the routes in quality order */
252 switch (nr_node->count) { 264 switch (nr_node->count) {
253 case 3: 265 case 3:
254 if (nr_node->routes[1].quality > nr_node->routes[0].quality) { 266 re_sort_routes(nr_node, 0, 1);
255 switch (nr_node->which) { 267 re_sort_routes(nr_node, 1, 2);
256 case 0: 268 /* fall through */
257 nr_node->which = 1;
258 break;
259 case 1:
260 nr_node->which = 0;
261 break;
262 }
263 nr_route = nr_node->routes[0];
264 nr_node->routes[0] = nr_node->routes[1];
265 nr_node->routes[1] = nr_route;
266 }
267 if (nr_node->routes[2].quality > nr_node->routes[1].quality) {
268 switch (nr_node->which) {
269 case 1: nr_node->which = 2;
270 break;
271
272 case 2: nr_node->which = 1;
273 break;
274
275 default:
276 break;
277 }
278 nr_route = nr_node->routes[1];
279 nr_node->routes[1] = nr_node->routes[2];
280 nr_node->routes[2] = nr_route;
281 }
282 case 2: 269 case 2:
283 if (nr_node->routes[1].quality > nr_node->routes[0].quality) { 270 re_sort_routes(nr_node, 0, 1);
284 switch (nr_node->which) {
285 case 0: nr_node->which = 1;
286 break;
287
288 case 1: nr_node->which = 0;
289 break;
290
291 default: break;
292 }
293 nr_route = nr_node->routes[0];
294 nr_node->routes[0] = nr_node->routes[1];
295 nr_node->routes[1] = nr_route;
296 }
297 case 1: 271 case 1:
298 break; 272 break;
299 } 273 }
@@ -384,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
384 switch (i) { 358 switch (i) {
385 case 0: 359 case 0:
386 nr_node->routes[0] = nr_node->routes[1]; 360 nr_node->routes[0] = nr_node->routes[1];
361 /* fall through */
387 case 1: 362 case 1:
388 nr_node->routes[1] = nr_node->routes[2]; 363 nr_node->routes[1] = nr_node->routes[2];
389 case 2: 364 case 2:
@@ -553,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev)
553 switch (i) { 528 switch (i) {
554 case 0: 529 case 0:
555 t->routes[0] = t->routes[1]; 530 t->routes[0] = t->routes[1];
531 /* fall through */
556 case 1: 532 case 1:
557 t->routes[1] = t->routes[2]; 533 t->routes[1] = t->routes[2];
558 case 2: 534 case 2:
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 94d05806a9a2..cbd51ed5a2d7 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -29,24 +29,23 @@
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
30#include <net/netrom.h> 30#include <net/netrom.h>
31 31
32static void nr_heartbeat_expiry(unsigned long); 32static void nr_heartbeat_expiry(struct timer_list *);
33static void nr_t1timer_expiry(unsigned long); 33static void nr_t1timer_expiry(struct timer_list *);
34static void nr_t2timer_expiry(unsigned long); 34static void nr_t2timer_expiry(struct timer_list *);
35static void nr_t4timer_expiry(unsigned long); 35static void nr_t4timer_expiry(struct timer_list *);
36static void nr_idletimer_expiry(unsigned long); 36static void nr_idletimer_expiry(struct timer_list *);
37 37
38void nr_init_timers(struct sock *sk) 38void nr_init_timers(struct sock *sk)
39{ 39{
40 struct nr_sock *nr = nr_sk(sk); 40 struct nr_sock *nr = nr_sk(sk);
41 41
42 setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk); 42 timer_setup(&nr->t1timer, nr_t1timer_expiry, 0);
43 setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk); 43 timer_setup(&nr->t2timer, nr_t2timer_expiry, 0);
44 setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk); 44 timer_setup(&nr->t4timer, nr_t4timer_expiry, 0);
45 setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk); 45 timer_setup(&nr->idletimer, nr_idletimer_expiry, 0);
46 46
47 /* initialized by sock_init_data */ 47 /* initialized by sock_init_data */
48 sk->sk_timer.data = (unsigned long)sk; 48 sk->sk_timer.function = nr_heartbeat_expiry;
49 sk->sk_timer.function = &nr_heartbeat_expiry;
50} 49}
51 50
52void nr_start_t1timer(struct sock *sk) 51void nr_start_t1timer(struct sock *sk)
@@ -113,9 +112,9 @@ int nr_t1timer_running(struct sock *sk)
113 return timer_pending(&nr_sk(sk)->t1timer); 112 return timer_pending(&nr_sk(sk)->t1timer);
114} 113}
115 114
116static void nr_heartbeat_expiry(unsigned long param) 115static void nr_heartbeat_expiry(struct timer_list *t)
117{ 116{
118 struct sock *sk = (struct sock *)param; 117 struct sock *sk = from_timer(sk, t, sk_timer);
119 struct nr_sock *nr = nr_sk(sk); 118 struct nr_sock *nr = nr_sk(sk);
120 119
121 bh_lock_sock(sk); 120 bh_lock_sock(sk);
@@ -152,10 +151,10 @@ static void nr_heartbeat_expiry(unsigned long param)
152 bh_unlock_sock(sk); 151 bh_unlock_sock(sk);
153} 152}
154 153
155static void nr_t2timer_expiry(unsigned long param) 154static void nr_t2timer_expiry(struct timer_list *t)
156{ 155{
157 struct sock *sk = (struct sock *)param; 156 struct nr_sock *nr = from_timer(nr, t, t2timer);
158 struct nr_sock *nr = nr_sk(sk); 157 struct sock *sk = &nr->sock;
159 158
160 bh_lock_sock(sk); 159 bh_lock_sock(sk);
161 if (nr->condition & NR_COND_ACK_PENDING) { 160 if (nr->condition & NR_COND_ACK_PENDING) {
@@ -165,19 +164,20 @@ static void nr_t2timer_expiry(unsigned long param)
165 bh_unlock_sock(sk); 164 bh_unlock_sock(sk);
166} 165}
167 166
168static void nr_t4timer_expiry(unsigned long param) 167static void nr_t4timer_expiry(struct timer_list *t)
169{ 168{
170 struct sock *sk = (struct sock *)param; 169 struct nr_sock *nr = from_timer(nr, t, t4timer);
170 struct sock *sk = &nr->sock;
171 171
172 bh_lock_sock(sk); 172 bh_lock_sock(sk);
173 nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; 173 nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
174 bh_unlock_sock(sk); 174 bh_unlock_sock(sk);
175} 175}
176 176
177static void nr_idletimer_expiry(unsigned long param) 177static void nr_idletimer_expiry(struct timer_list *t)
178{ 178{
179 struct sock *sk = (struct sock *)param; 179 struct nr_sock *nr = from_timer(nr, t, idletimer);
180 struct nr_sock *nr = nr_sk(sk); 180 struct sock *sk = &nr->sock;
181 181
182 bh_lock_sock(sk); 182 bh_lock_sock(sk);
183 183
@@ -202,10 +202,10 @@ static void nr_idletimer_expiry(unsigned long param)
202 bh_unlock_sock(sk); 202 bh_unlock_sock(sk);
203} 203}
204 204
205static void nr_t1timer_expiry(unsigned long param) 205static void nr_t1timer_expiry(struct timer_list *t)
206{ 206{
207 struct sock *sk = (struct sock *)param; 207 struct nr_sock *nr = from_timer(nr, t, t1timer);
208 struct nr_sock *nr = nr_sk(sk); 208 struct sock *sk = &nr->sock;
209 209
210 bh_lock_sock(sk); 210 bh_lock_sock(sk);
211 switch (nr->state) { 211 switch (nr->state) {
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5cf33df888c3..947a470f929d 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1015,9 +1015,9 @@ exit:
1015 device_unlock(&dev->dev); 1015 device_unlock(&dev->dev);
1016} 1016}
1017 1017
1018static void nfc_check_pres_timeout(unsigned long data) 1018static void nfc_check_pres_timeout(struct timer_list *t)
1019{ 1019{
1020 struct nfc_dev *dev = (struct nfc_dev *)data; 1020 struct nfc_dev *dev = from_timer(dev, t, check_pres_timer);
1021 1021
1022 schedule_work(&dev->check_pres_work); 1022 schedule_work(&dev->check_pres_work);
1023} 1023}
@@ -1094,10 +1094,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
1094 dev->targets_generation = 1; 1094 dev->targets_generation = 1;
1095 1095
1096 if (ops->check_presence) { 1096 if (ops->check_presence) {
1097 init_timer(&dev->check_pres_timer); 1097 timer_setup(&dev->check_pres_timer, nfc_check_pres_timeout, 0);
1098 dev->check_pres_timer.data = (unsigned long)dev;
1099 dev->check_pres_timer.function = nfc_check_pres_timeout;
1100
1101 INIT_WORK(&dev->check_pres_work, nfc_check_pres_work); 1098 INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
1102 } 1099 }
1103 1100
@@ -1106,7 +1103,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
1106err_free_dev: 1103err_free_dev:
1107 kfree(dev); 1104 kfree(dev);
1108 1105
1109 return ERR_PTR(rc); 1106 return NULL;
1110} 1107}
1111EXPORT_SYMBOL(nfc_allocate_device); 1108EXPORT_SYMBOL(nfc_allocate_device);
1112 1109
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index de6dd37d04c7..ec0a8998e52d 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -650,6 +650,7 @@ static void digital_deactivate_target(struct nfc_dev *nfc_dev,
650 return; 650 return;
651 } 651 }
652 652
653 digital_abort_cmd(ddev);
653 ddev->curr_protocol = 0; 654 ddev->curr_protocol = 0;
654} 655}
655 656
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef0acc5..ac8030c4bcf8 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -428,9 +428,9 @@ exit_noskb:
428 nfc_hci_driver_failure(hdev, r); 428 nfc_hci_driver_failure(hdev, r);
429} 429}
430 430
431static void nfc_hci_cmd_timeout(unsigned long data) 431static void nfc_hci_cmd_timeout(struct timer_list *t)
432{ 432{
433 struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data; 433 struct nfc_hci_dev *hdev = from_timer(hdev, t, cmd_timer);
434 434
435 schedule_work(&hdev->msg_tx_work); 435 schedule_work(&hdev->msg_tx_work);
436} 436}
@@ -1004,9 +1004,7 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
1004 1004
1005 INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work); 1005 INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
1006 1006
1007 init_timer(&hdev->cmd_timer); 1007 timer_setup(&hdev->cmd_timer, nfc_hci_cmd_timeout, 0);
1008 hdev->cmd_timer.data = (unsigned long)hdev;
1009 hdev->cmd_timer.function = nfc_hci_cmd_timeout;
1010 1008
1011 skb_queue_head_init(&hdev->rx_hcp_frags); 1009 skb_queue_head_init(&hdev->rx_hcp_frags);
1012 1010
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a009ce6..fe988936ad92 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -580,27 +580,27 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
580 } 580 }
581} 581}
582 582
583static void llc_shdlc_connect_timeout(unsigned long data) 583static void llc_shdlc_connect_timeout(struct timer_list *t)
584{ 584{
585 struct llc_shdlc *shdlc = (struct llc_shdlc *)data; 585 struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer);
586 586
587 pr_debug("\n"); 587 pr_debug("\n");
588 588
589 schedule_work(&shdlc->sm_work); 589 schedule_work(&shdlc->sm_work);
590} 590}
591 591
592static void llc_shdlc_t1_timeout(unsigned long data) 592static void llc_shdlc_t1_timeout(struct timer_list *t)
593{ 593{
594 struct llc_shdlc *shdlc = (struct llc_shdlc *)data; 594 struct llc_shdlc *shdlc = from_timer(shdlc, t, t1_timer);
595 595
596 pr_debug("SoftIRQ: need to send ack\n"); 596 pr_debug("SoftIRQ: need to send ack\n");
597 597
598 schedule_work(&shdlc->sm_work); 598 schedule_work(&shdlc->sm_work);
599} 599}
600 600
601static void llc_shdlc_t2_timeout(unsigned long data) 601static void llc_shdlc_t2_timeout(struct timer_list *t)
602{ 602{
603 struct llc_shdlc *shdlc = (struct llc_shdlc *)data; 603 struct llc_shdlc *shdlc = from_timer(shdlc, t, t2_timer);
604 604
605 pr_debug("SoftIRQ: need to retransmit\n"); 605 pr_debug("SoftIRQ: need to retransmit\n");
606 606
@@ -763,17 +763,9 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
763 mutex_init(&shdlc->state_mutex); 763 mutex_init(&shdlc->state_mutex);
764 shdlc->state = SHDLC_DISCONNECTED; 764 shdlc->state = SHDLC_DISCONNECTED;
765 765
766 init_timer(&shdlc->connect_timer); 766 timer_setup(&shdlc->connect_timer, llc_shdlc_connect_timeout, 0);
767 shdlc->connect_timer.data = (unsigned long)shdlc; 767 timer_setup(&shdlc->t1_timer, llc_shdlc_t1_timeout, 0);
768 shdlc->connect_timer.function = llc_shdlc_connect_timeout; 768 timer_setup(&shdlc->t2_timer, llc_shdlc_t2_timeout, 0);
769
770 init_timer(&shdlc->t1_timer);
771 shdlc->t1_timer.data = (unsigned long)shdlc;
772 shdlc->t1_timer.function = llc_shdlc_t1_timeout;
773
774 init_timer(&shdlc->t2_timer);
775 shdlc->t2_timer.data = (unsigned long)shdlc;
776 shdlc->t2_timer.function = llc_shdlc_t2_timeout;
777 769
778 shdlc->w = SHDLC_MAX_WINDOW; 770 shdlc->w = SHDLC_MAX_WINDOW;
779 shdlc->srej_support = SHDLC_SREJ_SUPPORT; 771 shdlc->srej_support = SHDLC_SREJ_SUPPORT;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5cf3cce..ef4026a23e80 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -242,9 +242,9 @@ static void nfc_llcp_timeout_work(struct work_struct *work)
242 nfc_dep_link_down(local->dev); 242 nfc_dep_link_down(local->dev);
243} 243}
244 244
245static void nfc_llcp_symm_timer(unsigned long data) 245static void nfc_llcp_symm_timer(struct timer_list *t)
246{ 246{
247 struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; 247 struct nfc_llcp_local *local = from_timer(local, t, link_timer);
248 248
249 pr_err("SYMM timeout\n"); 249 pr_err("SYMM timeout\n");
250 250
@@ -285,9 +285,9 @@ static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
285 nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); 285 nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
286} 286}
287 287
288static void nfc_llcp_sdreq_timer(unsigned long data) 288static void nfc_llcp_sdreq_timer(struct timer_list *t)
289{ 289{
290 struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; 290 struct nfc_llcp_local *local = from_timer(local, t, sdreq_timer);
291 291
292 schedule_work(&local->sdreq_timeout_work); 292 schedule_work(&local->sdreq_timeout_work);
293} 293}
@@ -1573,9 +1573,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
1573 INIT_LIST_HEAD(&local->list); 1573 INIT_LIST_HEAD(&local->list);
1574 kref_init(&local->ref); 1574 kref_init(&local->ref);
1575 mutex_init(&local->sdp_lock); 1575 mutex_init(&local->sdp_lock);
1576 init_timer(&local->link_timer); 1576 timer_setup(&local->link_timer, nfc_llcp_symm_timer, 0);
1577 local->link_timer.data = (unsigned long) local;
1578 local->link_timer.function = nfc_llcp_symm_timer;
1579 1577
1580 skb_queue_head_init(&local->tx_queue); 1578 skb_queue_head_init(&local->tx_queue);
1581 INIT_WORK(&local->tx_work, nfc_llcp_tx_work); 1579 INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1599,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
1601 1599
1602 mutex_init(&local->sdreq_lock); 1600 mutex_init(&local->sdreq_lock);
1603 INIT_HLIST_HEAD(&local->pending_sdreqs); 1601 INIT_HLIST_HEAD(&local->pending_sdreqs);
1604 init_timer(&local->sdreq_timer); 1602 timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
1605 local->sdreq_timer.data = (unsigned long) local;
1606 local->sdreq_timer.function = nfc_llcp_sdreq_timer;
1607 INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); 1603 INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
1608 1604
1609 list_add(&local->list, &llcp_devices); 1605 list_add(&local->list, &llcp_devices);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index c25e9b4179c3..074960154993 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -591,18 +591,18 @@ static int nci_close_device(struct nci_dev *ndev)
591} 591}
592 592
593/* NCI command timer function */ 593/* NCI command timer function */
594static void nci_cmd_timer(unsigned long arg) 594static void nci_cmd_timer(struct timer_list *t)
595{ 595{
596 struct nci_dev *ndev = (void *) arg; 596 struct nci_dev *ndev = from_timer(ndev, t, cmd_timer);
597 597
598 atomic_set(&ndev->cmd_cnt, 1); 598 atomic_set(&ndev->cmd_cnt, 1);
599 queue_work(ndev->cmd_wq, &ndev->cmd_work); 599 queue_work(ndev->cmd_wq, &ndev->cmd_work);
600} 600}
601 601
602/* NCI data exchange timer function */ 602/* NCI data exchange timer function */
603static void nci_data_timer(unsigned long arg) 603static void nci_data_timer(struct timer_list *t)
604{ 604{
605 struct nci_dev *ndev = (void *) arg; 605 struct nci_dev *ndev = from_timer(ndev, t, data_timer);
606 606
607 set_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); 607 set_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
608 queue_work(ndev->rx_wq, &ndev->rx_work); 608 queue_work(ndev->rx_wq, &ndev->rx_work);
@@ -1232,10 +1232,8 @@ int nci_register_device(struct nci_dev *ndev)
1232 skb_queue_head_init(&ndev->rx_q); 1232 skb_queue_head_init(&ndev->rx_q);
1233 skb_queue_head_init(&ndev->tx_q); 1233 skb_queue_head_init(&ndev->tx_q);
1234 1234
1235 setup_timer(&ndev->cmd_timer, nci_cmd_timer, 1235 timer_setup(&ndev->cmd_timer, nci_cmd_timer, 0);
1236 (unsigned long) ndev); 1236 timer_setup(&ndev->data_timer, nci_data_timer, 0);
1237 setup_timer(&ndev->data_timer, nci_data_timer,
1238 (unsigned long) ndev);
1239 1237
1240 mutex_init(&ndev->req_lock); 1238 mutex_init(&ndev->req_lock);
1241 INIT_LIST_HEAD(&ndev->conn_info_list); 1239 INIT_LIST_HEAD(&ndev->conn_info_list);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index b251fb936a27..c0b83dc9d993 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -75,7 +75,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
75 if (!hdr) 75 if (!hdr)
76 return -EMSGSIZE; 76 return -EMSGSIZE;
77 77
78 genl_dump_check_consistent(cb, hdr, &nfc_genl_family); 78 genl_dump_check_consistent(cb, hdr);
79 79
80 if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) || 80 if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) ||
81 nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) || 81 nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) ||
@@ -603,7 +603,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
603 return -EMSGSIZE; 603 return -EMSGSIZE;
604 604
605 if (cb) 605 if (cb)
606 genl_dump_check_consistent(cb, hdr, &nfc_genl_family); 606 genl_dump_check_consistent(cb, hdr);
607 607
608 if (nfc_genl_setup_device_added(dev, msg)) 608 if (nfc_genl_setup_device_added(dev, msg))
609 goto nla_put_failure; 609 goto nla_put_failure;
@@ -928,6 +928,30 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
928 return rc; 928 return rc;
929} 929}
930 930
931static int nfc_genl_deactivate_target(struct sk_buff *skb,
932 struct genl_info *info)
933{
934 struct nfc_dev *dev;
935 u32 device_idx, target_idx;
936 int rc;
937
938 if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
939 return -EINVAL;
940
941 device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
942
943 dev = nfc_get_device(device_idx);
944 if (!dev)
945 return -ENODEV;
946
947 target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
948
949 rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
950
951 nfc_put_device(dev);
952 return rc;
953}
954
931static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) 955static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
932{ 956{
933 struct nfc_dev *dev; 957 struct nfc_dev *dev;
@@ -1332,7 +1356,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
1332 goto nla_put_failure; 1356 goto nla_put_failure;
1333 1357
1334 if (cb) 1358 if (cb)
1335 genl_dump_check_consistent(cb, hdr, &nfc_genl_family); 1359 genl_dump_check_consistent(cb, hdr);
1336 1360
1337 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || 1361 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
1338 nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) || 1362 nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
@@ -1751,6 +1775,11 @@ static const struct genl_ops nfc_genl_ops[] = {
1751 .doit = nfc_genl_vendor_cmd, 1775 .doit = nfc_genl_vendor_cmd,
1752 .policy = nfc_genl_policy, 1776 .policy = nfc_genl_policy,
1753 }, 1777 },
1778 {
1779 .cmd = NFC_CMD_DEACTIVATE_TARGET,
1780 .doit = nfc_genl_deactivate_target,
1781 .policy = nfc_genl_policy,
1782 },
1754}; 1783};
1755 1784
1756static struct genl_family nfc_genl_family __ro_after_init = { 1785static struct genl_family nfc_genl_family __ro_after_init = {
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 58fb827439a8..d7da99a0b0b8 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -14,6 +14,66 @@
14#include <net/nsh.h> 14#include <net/nsh.h>
15#include <net/tun_proto.h> 15#include <net/tun_proto.h>
16 16
17int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh)
18{
19 struct nshhdr *nh;
20 size_t length = nsh_hdr_len(pushed_nh);
21 u8 next_proto;
22
23 if (skb->mac_len) {
24 next_proto = TUN_P_ETHERNET;
25 } else {
26 next_proto = tun_p_from_eth_p(skb->protocol);
27 if (!next_proto)
28 return -EAFNOSUPPORT;
29 }
30
31 /* Add the NSH header */
32 if (skb_cow_head(skb, length) < 0)
33 return -ENOMEM;
34
35 skb_push(skb, length);
36 nh = (struct nshhdr *)(skb->data);
37 memcpy(nh, pushed_nh, length);
38 nh->np = next_proto;
39 skb_postpush_rcsum(skb, nh, length);
40
41 skb->protocol = htons(ETH_P_NSH);
42 skb_reset_mac_header(skb);
43 skb_reset_network_header(skb);
44 skb_reset_mac_len(skb);
45
46 return 0;
47}
48EXPORT_SYMBOL_GPL(nsh_push);
49
50int nsh_pop(struct sk_buff *skb)
51{
52 struct nshhdr *nh;
53 size_t length;
54 __be16 inner_proto;
55
56 if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN))
57 return -ENOMEM;
58 nh = (struct nshhdr *)(skb->data);
59 length = nsh_hdr_len(nh);
60 inner_proto = tun_p_to_eth_p(nh->np);
61 if (!pskb_may_pull(skb, length))
62 return -ENOMEM;
63
64 if (!inner_proto)
65 return -EAFNOSUPPORT;
66
67 skb_pull_rcsum(skb, length);
68 skb_reset_mac_header(skb);
69 skb_reset_network_header(skb);
70 skb_reset_mac_len(skb);
71 skb->protocol = inner_proto;
72
73 return 0;
74}
75EXPORT_SYMBOL_GPL(nsh_pop);
76
17static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, 77static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
18 netdev_features_t features) 78 netdev_features_t features)
19{ 79{
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ce947292ae77..2650205cdaf9 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -14,6 +14,7 @@ config OPENVSWITCH
14 select MPLS 14 select MPLS
15 select NET_MPLS_GSO 15 select NET_MPLS_GSO
16 select DST_CACHE 16 select DST_CACHE
17 select NET_NSH
17 ---help--- 18 ---help---
18 Open vSwitch is a multilayer Ethernet switch targeted at virtualized 19 Open vSwitch is a multilayer Ethernet switch targeted at virtualized
19 environments. In addition to supporting a variety of features 20 environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 299f4476cf44..41109c326f3a 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -12,6 +12,7 @@ openvswitch-y := \
12 flow.o \ 12 flow.o \
13 flow_netlink.o \ 13 flow_netlink.o \
14 flow_table.o \ 14 flow_table.o \
15 meter.o \
15 vport.o \ 16 vport.o \
16 vport-internal_dev.o \ 17 vport-internal_dev.o \
17 vport-netdev.o 18 vport-netdev.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a54a556fcdb5..30a5df27116e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -43,6 +43,7 @@
43#include "flow.h" 43#include "flow.h"
44#include "conntrack.h" 44#include "conntrack.h"
45#include "vport.h" 45#include "vport.h"
46#include "flow_netlink.h"
46 47
47struct deferred_action { 48struct deferred_action {
48 struct sk_buff *skb; 49 struct sk_buff *skb;
@@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
380 return 0; 381 return 0;
381} 382}
382 383
384static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
385 const struct nshhdr *nh)
386{
387 int err;
388
389 err = nsh_push(skb, nh);
390 if (err)
391 return err;
392
393 /* safe right before invalidate_flow_key */
394 key->mac_proto = MAC_PROTO_NONE;
395 invalidate_flow_key(key);
396 return 0;
397}
398
399static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
400{
401 int err;
402
403 err = nsh_pop(skb);
404 if (err)
405 return err;
406
407 /* safe right before invalidate_flow_key */
408 if (skb->protocol == htons(ETH_P_TEB))
409 key->mac_proto = MAC_PROTO_ETHERNET;
410 else
411 key->mac_proto = MAC_PROTO_NONE;
412 invalidate_flow_key(key);
413 return 0;
414}
415
383static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, 416static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
384 __be32 addr, __be32 new_addr) 417 __be32 addr, __be32 new_addr)
385{ 418{
@@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
602 return 0; 635 return 0;
603} 636}
604 637
638static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
639 const struct nlattr *a)
640{
641 struct nshhdr *nh;
642 size_t length;
643 int err;
644 u8 flags;
645 u8 ttl;
646 int i;
647
648 struct ovs_key_nsh key;
649 struct ovs_key_nsh mask;
650
651 err = nsh_key_from_nlattr(a, &key, &mask);
652 if (err)
653 return err;
654
655 /* Make sure the NSH base header is there */
656 if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
657 return -ENOMEM;
658
659 nh = nsh_hdr(skb);
660 length = nsh_hdr_len(nh);
661
662 /* Make sure the whole NSH header is there */
663 err = skb_ensure_writable(skb, skb_network_offset(skb) +
664 length);
665 if (unlikely(err))
666 return err;
667
668 nh = nsh_hdr(skb);
669 skb_postpull_rcsum(skb, nh, length);
670 flags = nsh_get_flags(nh);
671 flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
672 flow_key->nsh.base.flags = flags;
673 ttl = nsh_get_ttl(nh);
674 ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
675 flow_key->nsh.base.ttl = ttl;
676 nsh_set_flags_and_ttl(nh, flags, ttl);
677 nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
678 mask.base.path_hdr);
679 flow_key->nsh.base.path_hdr = nh->path_hdr;
680 switch (nh->mdtype) {
681 case NSH_M_TYPE1:
682 for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
683 nh->md1.context[i] =
684 OVS_MASKED(nh->md1.context[i], key.context[i],
685 mask.context[i]);
686 }
687 memcpy(flow_key->nsh.context, nh->md1.context,
688 sizeof(nh->md1.context));
689 break;
690 case NSH_M_TYPE2:
691 memset(flow_key->nsh.context, 0,
692 sizeof(flow_key->nsh.context));
693 break;
694 default:
695 return -EINVAL;
696 }
697 skb_postpush_rcsum(skb, nh, length);
698 return 0;
699}
700
605/* Must follow skb_ensure_writable() since that can move the skb data. */ 701/* Must follow skb_ensure_writable() since that can move the skb data. */
606static void set_tp_port(struct sk_buff *skb, __be16 *port, 702static void set_tp_port(struct sk_buff *skb, __be16 *port,
607 __be16 new_port, __sum16 *check) 703 __be16 new_port, __sum16 *check)
@@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb,
1024 get_mask(a, struct ovs_key_ethernet *)); 1120 get_mask(a, struct ovs_key_ethernet *));
1025 break; 1121 break;
1026 1122
1123 case OVS_KEY_ATTR_NSH:
1124 err = set_nsh(skb, flow_key, a);
1125 break;
1126
1027 case OVS_KEY_ATTR_IPV4: 1127 case OVS_KEY_ATTR_IPV4:
1028 err = set_ipv4(skb, flow_key, nla_data(a), 1128 err = set_ipv4(skb, flow_key, nla_data(a),
1029 get_mask(a, struct ovs_key_ipv4 *)); 1129 get_mask(a, struct ovs_key_ipv4 *));
@@ -1203,6 +1303,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1203 return err == -EINPROGRESS ? 0 : err; 1303 return err == -EINPROGRESS ? 0 : err;
1204 break; 1304 break;
1205 1305
1306 case OVS_ACTION_ATTR_CT_CLEAR:
1307 err = ovs_ct_clear(skb, key);
1308 break;
1309
1206 case OVS_ACTION_ATTR_PUSH_ETH: 1310 case OVS_ACTION_ATTR_PUSH_ETH:
1207 err = push_eth(skb, key, nla_data(a)); 1311 err = push_eth(skb, key, nla_data(a));
1208 break; 1312 break;
@@ -1210,6 +1314,28 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1210 case OVS_ACTION_ATTR_POP_ETH: 1314 case OVS_ACTION_ATTR_POP_ETH:
1211 err = pop_eth(skb, key); 1315 err = pop_eth(skb, key);
1212 break; 1316 break;
1317
1318 case OVS_ACTION_ATTR_PUSH_NSH: {
1319 u8 buffer[NSH_HDR_MAX_LEN];
1320 struct nshhdr *nh = (struct nshhdr *)buffer;
1321
1322 err = nsh_hdr_from_nlattr(nla_data(a), nh,
1323 NSH_HDR_MAX_LEN);
1324 if (unlikely(err))
1325 break;
1326 err = push_nsh(skb, key, nh);
1327 break;
1328 }
1329
1330 case OVS_ACTION_ATTR_POP_NSH:
1331 err = pop_nsh(skb, key);
1332 break;
1333
1334 case OVS_ACTION_ATTR_METER:
1335 if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
1336 consume_skb(skb);
1337 return 0;
1338 }
1213 } 1339 }
1214 1340
1215 if (unlikely(err)) { 1341 if (unlikely(err)) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index d558e882ca0c..b27c5c6d9cab 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -752,6 +752,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
752 } 752 }
753 } 753 }
754 /* Non-ICMP, fall thru to initialize if needed. */ 754 /* Non-ICMP, fall thru to initialize if needed. */
755 /* fall through */
755 case IP_CT_NEW: 756 case IP_CT_NEW:
756 /* Seen it before? This can happen for loopback, retrans, 757 /* Seen it before? This can happen for loopback, retrans,
757 * or local packets. 758 * or local packets.
@@ -1129,6 +1130,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
1129 return err; 1130 return err;
1130} 1131}
1131 1132
1133int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
1134{
1135 if (skb_nfct(skb)) {
1136 nf_conntrack_put(skb_nfct(skb));
1137 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
1138 ovs_ct_fill_key(skb, key);
1139 }
1140
1141 return 0;
1142}
1143
1132static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, 1144static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
1133 const struct sw_flow_key *key, bool log) 1145 const struct sw_flow_key *key, bool log)
1134{ 1146{
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index bc7efd1867ab..399dfdd2c4f9 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -30,6 +30,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
30 30
31int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, 31int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
32 const struct ovs_conntrack_info *); 32 const struct ovs_conntrack_info *);
33int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
33 34
34void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); 35void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
35int ovs_ct_put_key(const struct sw_flow_key *swkey, 36int ovs_ct_put_key(const struct sw_flow_key *swkey,
@@ -73,6 +74,12 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
73 return -ENOTSUPP; 74 return -ENOTSUPP;
74} 75}
75 76
77static inline int ovs_ct_clear(struct sk_buff *skb,
78 struct sw_flow_key *key)
79{
80 return -ENOTSUPP;
81}
82
76static inline void ovs_ct_fill_key(const struct sk_buff *skb, 83static inline void ovs_ct_fill_key(const struct sk_buff *skb,
77 struct sw_flow_key *key) 84 struct sw_flow_key *key)
78{ 85{
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c3aec6227c91..0dab33fb9844 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
55#include "flow.h" 55#include "flow.h"
56#include "flow_table.h" 56#include "flow_table.h"
57#include "flow_netlink.h" 57#include "flow_netlink.h"
58#include "meter.h"
58#include "vport-internal_dev.h" 59#include "vport-internal_dev.h"
59#include "vport-netdev.h" 60#include "vport-netdev.h"
60 61
@@ -142,35 +143,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
142 const struct dp_upcall_info *, 143 const struct dp_upcall_info *,
143 uint32_t cutlen); 144 uint32_t cutlen);
144 145
145/* Must be called with rcu_read_lock. */
146static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
147{
148 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
149
150 if (dev) {
151 struct vport *vport = ovs_internal_dev_get_vport(dev);
152 if (vport)
153 return vport->dp;
154 }
155
156 return NULL;
157}
158
159/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
160 * returned dp pointer valid.
161 */
162static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
163{
164 struct datapath *dp;
165
166 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
167 rcu_read_lock();
168 dp = get_dp_rcu(net, dp_ifindex);
169 rcu_read_unlock();
170
171 return dp;
172}
173
174/* Must be called with rcu_read_lock or ovs_mutex. */ 146/* Must be called with rcu_read_lock or ovs_mutex. */
175const char *ovs_dp_name(const struct datapath *dp) 147const char *ovs_dp_name(const struct datapath *dp)
176{ 148{
@@ -203,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
203 ovs_flow_tbl_destroy(&dp->table); 175 ovs_flow_tbl_destroy(&dp->table);
204 free_percpu(dp->stats_percpu); 176 free_percpu(dp->stats_percpu);
205 kfree(dp->ports); 177 kfree(dp->ports);
178 ovs_meters_exit(dp);
206 kfree(dp); 179 kfree(dp);
207} 180}
208 181
@@ -1601,6 +1574,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1601 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1574 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1602 INIT_HLIST_HEAD(&dp->ports[i]); 1575 INIT_HLIST_HEAD(&dp->ports[i]);
1603 1576
1577 err = ovs_meters_init(dp);
1578 if (err)
1579 goto err_destroy_ports_array;
1580
1604 /* Set up our datapath device. */ 1581 /* Set up our datapath device. */
1605 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1582 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1606 parms.type = OVS_VPORT_TYPE_INTERNAL; 1583 parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1629,7 +1606,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1629 ovs_dp_reset_user_features(skb, info); 1606 ovs_dp_reset_user_features(skb, info);
1630 } 1607 }
1631 1608
1632 goto err_destroy_ports_array; 1609 goto err_destroy_meters;
1633 } 1610 }
1634 1611
1635 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1612 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1644,8 +1621,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1644 ovs_notify(&dp_datapath_genl_family, reply, info); 1621 ovs_notify(&dp_datapath_genl_family, reply, info);
1645 return 0; 1622 return 0;
1646 1623
1647err_destroy_ports_array: 1624err_destroy_meters:
1648 ovs_unlock(); 1625 ovs_unlock();
1626 ovs_meters_exit(dp);
1627err_destroy_ports_array:
1649 kfree(dp->ports); 1628 kfree(dp->ports);
1650err_destroy_percpu: 1629err_destroy_percpu:
1651 free_percpu(dp->stats_percpu); 1630 free_percpu(dp->stats_percpu);
@@ -1848,7 +1827,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
1848 1827
1849/* Called with ovs_mutex or RCU read lock. */ 1828/* Called with ovs_mutex or RCU read lock. */
1850static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1829static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1851 u32 portid, u32 seq, u32 flags, u8 cmd) 1830 struct net *net, u32 portid, u32 seq,
1831 u32 flags, u8 cmd)
1852{ 1832{
1853 struct ovs_header *ovs_header; 1833 struct ovs_header *ovs_header;
1854 struct ovs_vport_stats vport_stats; 1834 struct ovs_vport_stats vport_stats;
@@ -1864,9 +1844,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1864 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 1844 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1865 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 1845 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1866 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 1846 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1867 ovs_vport_name(vport))) 1847 ovs_vport_name(vport)) ||
1848 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
1868 goto nla_put_failure; 1849 goto nla_put_failure;
1869 1850
1851 if (!net_eq(net, dev_net(vport->dev))) {
1852 int id = peernet2id_alloc(net, dev_net(vport->dev));
1853
1854 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
1855 goto nla_put_failure;
1856 }
1857
1870 ovs_vport_get_stats(vport, &vport_stats); 1858 ovs_vport_get_stats(vport, &vport_stats);
1871 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, 1859 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1872 sizeof(struct ovs_vport_stats), &vport_stats, 1860 sizeof(struct ovs_vport_stats), &vport_stats,
@@ -1896,8 +1884,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1896} 1884}
1897 1885
1898/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 1886/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1899struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1887struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
1900 u32 seq, u8 cmd) 1888 u32 portid, u32 seq, u8 cmd)
1901{ 1889{
1902 struct sk_buff *skb; 1890 struct sk_buff *skb;
1903 int retval; 1891 int retval;
@@ -1906,7 +1894,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1906 if (!skb) 1894 if (!skb)
1907 return ERR_PTR(-ENOMEM); 1895 return ERR_PTR(-ENOMEM);
1908 1896
1909 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); 1897 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
1910 BUG_ON(retval < 0); 1898 BUG_ON(retval < 0);
1911 1899
1912 return skb; 1900 return skb;
@@ -1920,6 +1908,8 @@ static struct vport *lookup_vport(struct net *net,
1920 struct datapath *dp; 1908 struct datapath *dp;
1921 struct vport *vport; 1909 struct vport *vport;
1922 1910
1911 if (a[OVS_VPORT_ATTR_IFINDEX])
1912 return ERR_PTR(-EOPNOTSUPP);
1923 if (a[OVS_VPORT_ATTR_NAME]) { 1913 if (a[OVS_VPORT_ATTR_NAME]) {
1924 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 1914 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1925 if (!vport) 1915 if (!vport)
@@ -1944,6 +1934,7 @@ static struct vport *lookup_vport(struct net *net,
1944 return vport; 1934 return vport;
1945 } else 1935 } else
1946 return ERR_PTR(-EINVAL); 1936 return ERR_PTR(-EINVAL);
1937
1947} 1938}
1948 1939
1949/* Called with ovs_mutex */ 1940/* Called with ovs_mutex */
@@ -1983,6 +1974,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1983 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 1974 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1984 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1975 !a[OVS_VPORT_ATTR_UPCALL_PID])
1985 return -EINVAL; 1976 return -EINVAL;
1977 if (a[OVS_VPORT_ATTR_IFINDEX])
1978 return -EOPNOTSUPP;
1986 1979
1987 port_no = a[OVS_VPORT_ATTR_PORT_NO] 1980 port_no = a[OVS_VPORT_ATTR_PORT_NO]
1988 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; 1981 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
@@ -2032,8 +2025,9 @@ restart:
2032 goto exit_unlock_free; 2025 goto exit_unlock_free;
2033 } 2026 }
2034 2027
2035 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2028 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2036 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 2029 info->snd_portid, info->snd_seq, 0,
2030 OVS_VPORT_CMD_NEW);
2037 2031
2038 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) 2032 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2039 update_headroom(dp); 2033 update_headroom(dp);
@@ -2090,8 +2084,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2090 goto exit_unlock_free; 2084 goto exit_unlock_free;
2091 } 2085 }
2092 2086
2093 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2087 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2094 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 2088 info->snd_portid, info->snd_seq, 0,
2089 OVS_VPORT_CMD_NEW);
2095 BUG_ON(err < 0); 2090 BUG_ON(err < 0);
2096 2091
2097 ovs_unlock(); 2092 ovs_unlock();
@@ -2128,8 +2123,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2128 goto exit_unlock_free; 2123 goto exit_unlock_free;
2129 } 2124 }
2130 2125
2131 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2126 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2132 info->snd_seq, 0, OVS_VPORT_CMD_DEL); 2127 info->snd_portid, info->snd_seq, 0,
2128 OVS_VPORT_CMD_DEL);
2133 BUG_ON(err < 0); 2129 BUG_ON(err < 0);
2134 2130
2135 /* the vport deletion may trigger dp headroom update */ 2131 /* the vport deletion may trigger dp headroom update */
@@ -2169,8 +2165,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2169 err = PTR_ERR(vport); 2165 err = PTR_ERR(vport);
2170 if (IS_ERR(vport)) 2166 if (IS_ERR(vport))
2171 goto exit_unlock_free; 2167 goto exit_unlock_free;
2172 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2168 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2173 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 2169 info->snd_portid, info->snd_seq, 0,
2170 OVS_VPORT_CMD_NEW);
2174 BUG_ON(err < 0); 2171 BUG_ON(err < 0);
2175 rcu_read_unlock(); 2172 rcu_read_unlock();
2176 2173
@@ -2202,6 +2199,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2202 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 2199 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2203 if (j >= skip && 2200 if (j >= skip &&
2204 ovs_vport_cmd_fill_info(vport, skb, 2201 ovs_vport_cmd_fill_info(vport, skb,
2202 sock_net(skb->sk),
2205 NETLINK_CB(cb->skb).portid, 2203 NETLINK_CB(cb->skb).portid,
2206 cb->nlh->nlmsg_seq, 2204 cb->nlh->nlmsg_seq,
2207 NLM_F_MULTI, 2205 NLM_F_MULTI,
@@ -2228,6 +2226,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2228 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 2226 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2229 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 2227 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2230 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 2228 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2229 [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2230 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2231}; 2231};
2232 2232
2233static const struct genl_ops dp_vport_genl_ops[] = { 2233static const struct genl_ops dp_vport_genl_ops[] = {
@@ -2273,6 +2273,7 @@ static struct genl_family * const dp_genl_families[] = {
2273 &dp_vport_genl_family, 2273 &dp_vport_genl_family,
2274 &dp_flow_genl_family, 2274 &dp_flow_genl_family,
2275 &dp_packet_genl_family, 2275 &dp_packet_genl_family,
2276 &dp_meter_genl_family,
2276}; 2277};
2277 2278
2278static void dp_unregister_genl(int n_families) 2279static void dp_unregister_genl(int n_families)
@@ -2453,3 +2454,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2453MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); 2454MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2454MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); 2455MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2455MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); 2456MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2457MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 480600649d0b..523d65526766 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -30,6 +30,8 @@
30#include "conntrack.h" 30#include "conntrack.h"
31#include "flow.h" 31#include "flow.h"
32#include "flow_table.h" 32#include "flow_table.h"
33#include "meter.h"
34#include "vport-internal_dev.h"
33 35
34#define DP_MAX_PORTS USHRT_MAX 36#define DP_MAX_PORTS USHRT_MAX
35#define DP_VPORT_HASH_BUCKETS 1024 37#define DP_VPORT_HASH_BUCKETS 1024
@@ -91,6 +93,9 @@ struct datapath {
91 u32 user_features; 93 u32 user_features;
92 94
93 u32 max_headroom; 95 u32 max_headroom;
96
97 /* Switch meters. */
98 struct hlist_head *meters;
94}; 99};
95 100
96/** 101/**
@@ -190,6 +195,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
190 return ovs_lookup_vport(dp, port_no); 195 return ovs_lookup_vport(dp, port_no);
191} 196}
192 197
198/* Must be called with rcu_read_lock. */
199static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
200{
201 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
202
203 if (dev) {
204 struct vport *vport = ovs_internal_dev_get_vport(dev);
205
206 if (vport)
207 return vport->dp;
208 }
209
210 return NULL;
211}
212
213/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
214 * returned dp pointer valid.
215 */
216static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
217{
218 struct datapath *dp;
219
220 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
221 rcu_read_lock();
222 dp = get_dp_rcu(net, dp_ifindex);
223 rcu_read_unlock();
224
225 return dp;
226}
227
193extern struct notifier_block ovs_dp_device_notifier; 228extern struct notifier_block ovs_dp_device_notifier;
194extern struct genl_family dp_vport_genl_family; 229extern struct genl_family dp_vport_genl_family;
195 230
@@ -200,8 +235,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
200 uint32_t cutlen); 235 uint32_t cutlen);
201 236
202const char *ovs_dp_name(const struct datapath *dp); 237const char *ovs_dp_name(const struct datapath *dp);
203struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, 238struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
204 u8 cmd); 239 u32 portid, u32 seq, u8 cmd);
205 240
206int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, 241int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
207 const struct sw_flow_actions *, struct sw_flow_key *); 242 const struct sw_flow_actions *, struct sw_flow_key *);
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 653d073bae45..f3ee2f2825c0 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -30,8 +30,8 @@ static void dp_detach_port_notify(struct vport *vport)
30 struct datapath *dp; 30 struct datapath *dp;
31 31
32 dp = vport->dp; 32 dp = vport->dp;
33 notify = ovs_vport_cmd_build_info(vport, 0, 0, 33 notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp),
34 OVS_VPORT_CMD_DEL); 34 0, 0, OVS_VPORT_CMD_DEL);
35 ovs_dp_detach_port(vport); 35 ovs_dp_detach_port(vport);
36 if (IS_ERR(notify)) { 36 if (IS_ERR(notify)) {
37 genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0, 37 genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8c94cef25a72..864ddb1e3642 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -46,6 +46,7 @@
46#include <net/ipv6.h> 46#include <net/ipv6.h>
47#include <net/mpls.h> 47#include <net/mpls.h>
48#include <net/ndisc.h> 48#include <net/ndisc.h>
49#include <net/nsh.h>
49 50
50#include "conntrack.h" 51#include "conntrack.h"
51#include "datapath.h" 52#include "datapath.h"
@@ -490,6 +491,52 @@ invalid:
490 return 0; 491 return 0;
491} 492}
492 493
494static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
495{
496 struct nshhdr *nh;
497 unsigned int nh_ofs = skb_network_offset(skb);
498 u8 version, length;
499 int err;
500
501 err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
502 if (unlikely(err))
503 return err;
504
505 nh = nsh_hdr(skb);
506 version = nsh_get_ver(nh);
507 length = nsh_hdr_len(nh);
508
509 if (version != 0)
510 return -EINVAL;
511
512 err = check_header(skb, nh_ofs + length);
513 if (unlikely(err))
514 return err;
515
516 nh = nsh_hdr(skb);
517 key->nsh.base.flags = nsh_get_flags(nh);
518 key->nsh.base.ttl = nsh_get_ttl(nh);
519 key->nsh.base.mdtype = nh->mdtype;
520 key->nsh.base.np = nh->np;
521 key->nsh.base.path_hdr = nh->path_hdr;
522 switch (key->nsh.base.mdtype) {
523 case NSH_M_TYPE1:
524 if (length != NSH_M_TYPE1_LEN)
525 return -EINVAL;
526 memcpy(key->nsh.context, nh->md1.context,
527 sizeof(nh->md1));
528 break;
529 case NSH_M_TYPE2:
530 memset(key->nsh.context, 0,
531 sizeof(nh->md1));
532 break;
533 default:
534 return -EINVAL;
535 }
536
537 return 0;
538}
539
493/** 540/**
494 * key_extract - extracts a flow key from an Ethernet frame. 541 * key_extract - extracts a flow key from an Ethernet frame.
495 * @skb: sk_buff that contains the frame, with skb->data pointing to the 542 * @skb: sk_buff that contains the frame, with skb->data pointing to the
@@ -735,6 +782,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
735 memset(&key->tp, 0, sizeof(key->tp)); 782 memset(&key->tp, 0, sizeof(key->tp));
736 } 783 }
737 } 784 }
785 } else if (key->eth.type == htons(ETH_P_NSH)) {
786 error = parse_nsh(skb, key);
787 if (error)
788 return error;
738 } 789 }
739 return 0; 790 return 0;
740} 791}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1875bba4f865..c670dd24b8b7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -35,6 +35,7 @@
35#include <net/inet_ecn.h> 35#include <net/inet_ecn.h>
36#include <net/ip_tunnels.h> 36#include <net/ip_tunnels.h>
37#include <net/dst_metadata.h> 37#include <net/dst_metadata.h>
38#include <net/nsh.h>
38 39
39struct sk_buff; 40struct sk_buff;
40 41
@@ -66,6 +67,11 @@ struct vlan_head {
66 (offsetof(struct sw_flow_key, recirc_id) + \ 67 (offsetof(struct sw_flow_key, recirc_id) + \
67 FIELD_SIZEOF(struct sw_flow_key, recirc_id)) 68 FIELD_SIZEOF(struct sw_flow_key, recirc_id))
68 69
70struct ovs_key_nsh {
71 struct ovs_nsh_key_base base;
72 __be32 context[NSH_MD1_CONTEXT_SIZE];
73};
74
69struct sw_flow_key { 75struct sw_flow_key {
70 u8 tun_opts[IP_TUNNEL_OPTS_MAX]; 76 u8 tun_opts[IP_TUNNEL_OPTS_MAX];
71 u8 tun_opts_len; 77 u8 tun_opts_len;
@@ -143,6 +149,7 @@ struct sw_flow_key {
143 } nd; 149 } nd;
144 }; 150 };
145 } ipv6; 151 } ipv6;
152 struct ovs_key_nsh nsh; /* network service header */
146 }; 153 };
147 struct { 154 struct {
148 /* Connection tracking fields not packed above. */ 155 /* Connection tracking fields not packed above. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index e8eb427ce6d1..dc424798ba6f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,8 @@
48#include <net/ndisc.h> 48#include <net/ndisc.h>
49#include <net/mpls.h> 49#include <net/mpls.h>
50#include <net/vxlan.h> 50#include <net/vxlan.h>
51#include <net/tun_proto.h>
52#include <net/erspan.h>
51 53
52#include "flow_netlink.h" 54#include "flow_netlink.h"
53 55
@@ -75,16 +77,20 @@ static bool actions_may_change_flow(const struct nlattr *actions)
75 break; 77 break;
76 78
77 case OVS_ACTION_ATTR_CT: 79 case OVS_ACTION_ATTR_CT:
80 case OVS_ACTION_ATTR_CT_CLEAR:
78 case OVS_ACTION_ATTR_HASH: 81 case OVS_ACTION_ATTR_HASH:
79 case OVS_ACTION_ATTR_POP_ETH: 82 case OVS_ACTION_ATTR_POP_ETH:
80 case OVS_ACTION_ATTR_POP_MPLS: 83 case OVS_ACTION_ATTR_POP_MPLS:
84 case OVS_ACTION_ATTR_POP_NSH:
81 case OVS_ACTION_ATTR_POP_VLAN: 85 case OVS_ACTION_ATTR_POP_VLAN:
82 case OVS_ACTION_ATTR_PUSH_ETH: 86 case OVS_ACTION_ATTR_PUSH_ETH:
83 case OVS_ACTION_ATTR_PUSH_MPLS: 87 case OVS_ACTION_ATTR_PUSH_MPLS:
88 case OVS_ACTION_ATTR_PUSH_NSH:
84 case OVS_ACTION_ATTR_PUSH_VLAN: 89 case OVS_ACTION_ATTR_PUSH_VLAN:
85 case OVS_ACTION_ATTR_SAMPLE: 90 case OVS_ACTION_ATTR_SAMPLE:
86 case OVS_ACTION_ATTR_SET: 91 case OVS_ACTION_ATTR_SET:
87 case OVS_ACTION_ATTR_SET_MASKED: 92 case OVS_ACTION_ATTR_SET_MASKED:
93 case OVS_ACTION_ATTR_METER:
88 default: 94 default:
89 return true; 95 return true;
90 } 96 }
@@ -173,7 +179,8 @@ static bool match_validate(const struct sw_flow_match *match,
173 | (1 << OVS_KEY_ATTR_ICMPV6) 179 | (1 << OVS_KEY_ATTR_ICMPV6)
174 | (1 << OVS_KEY_ATTR_ARP) 180 | (1 << OVS_KEY_ATTR_ARP)
175 | (1 << OVS_KEY_ATTR_ND) 181 | (1 << OVS_KEY_ATTR_ND)
176 | (1 << OVS_KEY_ATTR_MPLS)); 182 | (1 << OVS_KEY_ATTR_MPLS)
183 | (1 << OVS_KEY_ATTR_NSH));
177 184
178 /* Always allowed mask fields. */ 185 /* Always allowed mask fields. */
179 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 186 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -282,6 +289,14 @@ static bool match_validate(const struct sw_flow_match *match,
282 } 289 }
283 } 290 }
284 291
292 if (match->key->eth.type == htons(ETH_P_NSH)) {
293 key_expected |= 1 << OVS_KEY_ATTR_NSH;
294 if (match->mask &&
295 match->mask->key.eth.type == htons(0xffff)) {
296 mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
297 }
298 }
299
285 if ((key_attrs & key_expected) != key_expected) { 300 if ((key_attrs & key_expected) != key_expected) {
286 /* Key attributes check failed. */ 301 /* Key attributes check failed. */
287 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 302 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
@@ -319,7 +334,21 @@ size_t ovs_tun_key_attr_size(void)
319 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 334 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
320 */ 335 */
321 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 336 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
322 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 337 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */
338 + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
339}
340
341static size_t ovs_nsh_key_attr_size(void)
342{
343 /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
344 * updating this function.
345 */
346 return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
347 /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
348 * mutually exclusive, so the bigger one can cover
349 * the small one.
350 */
351 + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
323} 352}
324 353
325size_t ovs_key_attr_size(void) 354size_t ovs_key_attr_size(void)
@@ -327,7 +356,7 @@ size_t ovs_key_attr_size(void)
327 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 356 /* Whenever adding new OVS_KEY_ FIELDS, we should consider
328 * updating this function. 357 * updating this function.
329 */ 358 */
330 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); 359 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
331 360
332 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 361 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
333 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 362 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -341,6 +370,8 @@ size_t ovs_key_attr_size(void)
341 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ 370 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
342 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ 371 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
343 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ 372 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
373 + nla_total_size(0) /* OVS_KEY_ATTR_NSH */
374 + ovs_nsh_key_attr_size()
344 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 375 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
345 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 376 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
346 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 377 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -371,6 +402,14 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
371 .next = ovs_vxlan_ext_key_lens }, 402 .next = ovs_vxlan_ext_key_lens },
372 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 403 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
373 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 404 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
405 [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
406};
407
408static const struct ovs_len_tbl
409ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
410 [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
411 [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) },
412 [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE },
374}; 413};
375 414
376/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 415/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -405,6 +444,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
405 .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, 444 .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
406 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { 445 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
407 .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, 446 .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
447 [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
448 .next = ovs_nsh_key_attr_lens, },
408}; 449};
409 450
410static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) 451static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -593,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
593 return 0; 634 return 0;
594} 635}
595 636
637static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
638 struct sw_flow_match *match, bool is_mask,
639 bool log)
640{
641 unsigned long opt_key_offset;
642 struct erspan_metadata opts;
643
644 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
645
646 memset(&opts, 0, sizeof(opts));
647 opts.index = nla_get_be32(attr);
648
649 /* Index has only 20-bit */
650 if (ntohl(opts.index) & ~INDEX_MASK) {
651 OVS_NLERR(log, "ERSPAN index number %x too large.",
652 ntohl(opts.index));
653 return -EINVAL;
654 }
655
656 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
657 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
658 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
659 is_mask);
660
661 return 0;
662}
663
596static int ip_tun_from_nlattr(const struct nlattr *attr, 664static int ip_tun_from_nlattr(const struct nlattr *attr,
597 struct sw_flow_match *match, bool is_mask, 665 struct sw_flow_match *match, bool is_mask,
598 bool log) 666 bool log)
@@ -700,6 +768,19 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
700 break; 768 break;
701 case OVS_TUNNEL_KEY_ATTR_PAD: 769 case OVS_TUNNEL_KEY_ATTR_PAD:
702 break; 770 break;
771 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
772 if (opts_type) {
773 OVS_NLERR(log, "Multiple metadata blocks provided");
774 return -EINVAL;
775 }
776
777 err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
778 if (err)
779 return err;
780
781 tun_flags |= TUNNEL_ERSPAN_OPT;
782 opts_type = type;
783 break;
703 default: 784 default:
704 OVS_NLERR(log, "Unknown IP tunnel attribute %d", 785 OVS_NLERR(log, "Unknown IP tunnel attribute %d",
705 type); 786 type);
@@ -824,6 +905,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
824 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 905 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
825 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 906 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
826 return -EMSGSIZE; 907 return -EMSGSIZE;
908 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
909 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
910 ((struct erspan_metadata *)tun_opts)->index))
911 return -EMSGSIZE;
827 } 912 }
828 913
829 return 0; 914 return 0;
@@ -1179,6 +1264,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1179 return 0; 1264 return 0;
1180} 1265}
1181 1266
1267int nsh_hdr_from_nlattr(const struct nlattr *attr,
1268 struct nshhdr *nh, size_t size)
1269{
1270 struct nlattr *a;
1271 int rem;
1272 u8 flags = 0;
1273 u8 ttl = 0;
1274 int mdlen = 0;
1275
1276 /* validate_nsh has check this, so we needn't do duplicate check here
1277 */
1278 if (size < NSH_BASE_HDR_LEN)
1279 return -ENOBUFS;
1280
1281 nla_for_each_nested(a, attr, rem) {
1282 int type = nla_type(a);
1283
1284 switch (type) {
1285 case OVS_NSH_KEY_ATTR_BASE: {
1286 const struct ovs_nsh_key_base *base = nla_data(a);
1287
1288 flags = base->flags;
1289 ttl = base->ttl;
1290 nh->np = base->np;
1291 nh->mdtype = base->mdtype;
1292 nh->path_hdr = base->path_hdr;
1293 break;
1294 }
1295 case OVS_NSH_KEY_ATTR_MD1:
1296 mdlen = nla_len(a);
1297 if (mdlen > size - NSH_BASE_HDR_LEN)
1298 return -ENOBUFS;
1299 memcpy(&nh->md1, nla_data(a), mdlen);
1300 break;
1301
1302 case OVS_NSH_KEY_ATTR_MD2:
1303 mdlen = nla_len(a);
1304 if (mdlen > size - NSH_BASE_HDR_LEN)
1305 return -ENOBUFS;
1306 memcpy(&nh->md2, nla_data(a), mdlen);
1307 break;
1308
1309 default:
1310 return -EINVAL;
1311 }
1312 }
1313
1314 /* nsh header length = NSH_BASE_HDR_LEN + mdlen */
1315 nh->ver_flags_ttl_len = 0;
1316 nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1317
1318 return 0;
1319}
1320
1321int nsh_key_from_nlattr(const struct nlattr *attr,
1322 struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1323{
1324 struct nlattr *a;
1325 int rem;
1326
1327 /* validate_nsh has check this, so we needn't do duplicate check here
1328 */
1329 nla_for_each_nested(a, attr, rem) {
1330 int type = nla_type(a);
1331
1332 switch (type) {
1333 case OVS_NSH_KEY_ATTR_BASE: {
1334 const struct ovs_nsh_key_base *base = nla_data(a);
1335 const struct ovs_nsh_key_base *base_mask = base + 1;
1336
1337 nsh->base = *base;
1338 nsh_mask->base = *base_mask;
1339 break;
1340 }
1341 case OVS_NSH_KEY_ATTR_MD1: {
1342 const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1343 const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1344
1345 memcpy(nsh->context, md1->context, sizeof(*md1));
1346 memcpy(nsh_mask->context, md1_mask->context,
1347 sizeof(*md1_mask));
1348 break;
1349 }
1350 case OVS_NSH_KEY_ATTR_MD2:
1351 /* Not supported yet */
1352 return -ENOTSUPP;
1353 default:
1354 return -EINVAL;
1355 }
1356 }
1357
1358 return 0;
1359}
1360
1361static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1362 struct sw_flow_match *match, bool is_mask,
1363 bool is_push_nsh, bool log)
1364{
1365 struct nlattr *a;
1366 int rem;
1367 bool has_base = false;
1368 bool has_md1 = false;
1369 bool has_md2 = false;
1370 u8 mdtype = 0;
1371 int mdlen = 0;
1372
1373 if (WARN_ON(is_push_nsh && is_mask))
1374 return -EINVAL;
1375
1376 nla_for_each_nested(a, attr, rem) {
1377 int type = nla_type(a);
1378 int i;
1379
1380 if (type > OVS_NSH_KEY_ATTR_MAX) {
1381 OVS_NLERR(log, "nsh attr %d is out of range max %d",
1382 type, OVS_NSH_KEY_ATTR_MAX);
1383 return -EINVAL;
1384 }
1385
1386 if (!check_attr_len(nla_len(a),
1387 ovs_nsh_key_attr_lens[type].len)) {
1388 OVS_NLERR(
1389 log,
1390 "nsh attr %d has unexpected len %d expected %d",
1391 type,
1392 nla_len(a),
1393 ovs_nsh_key_attr_lens[type].len
1394 );
1395 return -EINVAL;
1396 }
1397
1398 switch (type) {
1399 case OVS_NSH_KEY_ATTR_BASE: {
1400 const struct ovs_nsh_key_base *base = nla_data(a);
1401
1402 has_base = true;
1403 mdtype = base->mdtype;
1404 SW_FLOW_KEY_PUT(match, nsh.base.flags,
1405 base->flags, is_mask);
1406 SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1407 base->ttl, is_mask);
1408 SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1409 base->mdtype, is_mask);
1410 SW_FLOW_KEY_PUT(match, nsh.base.np,
1411 base->np, is_mask);
1412 SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1413 base->path_hdr, is_mask);
1414 break;
1415 }
1416 case OVS_NSH_KEY_ATTR_MD1: {
1417 const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1418
1419 has_md1 = true;
1420 for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1421 SW_FLOW_KEY_PUT(match, nsh.context[i],
1422 md1->context[i], is_mask);
1423 break;
1424 }
1425 case OVS_NSH_KEY_ATTR_MD2:
1426 if (!is_push_nsh) /* Not supported MD type 2 yet */
1427 return -ENOTSUPP;
1428
1429 has_md2 = true;
1430 mdlen = nla_len(a);
1431 if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1432 OVS_NLERR(
1433 log,
1434 "Invalid MD length %d for MD type %d",
1435 mdlen,
1436 mdtype
1437 );
1438 return -EINVAL;
1439 }
1440 break;
1441 default:
1442 OVS_NLERR(log, "Unknown nsh attribute %d",
1443 type);
1444 return -EINVAL;
1445 }
1446 }
1447
1448 if (rem > 0) {
1449 OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1450 return -EINVAL;
1451 }
1452
1453 if (has_md1 && has_md2) {
1454 OVS_NLERR(
1455 1,
1456 "invalid nsh attribute: md1 and md2 are exclusive."
1457 );
1458 return -EINVAL;
1459 }
1460
1461 if (!is_mask) {
1462 if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1463 (has_md2 && mdtype != NSH_M_TYPE2)) {
1464 OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1465 mdtype);
1466 return -EINVAL;
1467 }
1468
1469 if (is_push_nsh &&
1470 (!has_base || (!has_md1 && !has_md2))) {
1471 OVS_NLERR(
1472 1,
1473 "push_nsh: missing base or metadata attributes"
1474 );
1475 return -EINVAL;
1476 }
1477 }
1478
1479 return 0;
1480}
1481
1182static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, 1482static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1183 u64 attrs, const struct nlattr **a, 1483 u64 attrs, const struct nlattr **a,
1184 bool is_mask, bool log) 1484 bool is_mask, bool log)
@@ -1306,6 +1606,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1306 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1606 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1307 } 1607 }
1308 1608
1609 if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1610 if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1611 is_mask, false, log) < 0)
1612 return -EINVAL;
1613 attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1614 }
1615
1309 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 1616 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1310 const struct ovs_key_mpls *mpls_key; 1617 const struct ovs_key_mpls *mpls_key;
1311 1618
@@ -1622,6 +1929,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1622 return 0; 1929 return 0;
1623} 1930}
1624 1931
1932static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1933 struct sk_buff *skb)
1934{
1935 struct nlattr *start;
1936
1937 start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
1938 if (!start)
1939 return -EMSGSIZE;
1940
1941 if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1942 goto nla_put_failure;
1943
1944 if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1945 if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1946 sizeof(nsh->context), nsh->context))
1947 goto nla_put_failure;
1948 }
1949
1950 /* Don't support MD type 2 yet */
1951
1952 nla_nest_end(skb, start);
1953
1954 return 0;
1955
1956nla_put_failure:
1957 return -EMSGSIZE;
1958}
1959
1625static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1960static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1626 const struct sw_flow_key *output, bool is_mask, 1961 const struct sw_flow_key *output, bool is_mask,
1627 struct sk_buff *skb) 1962 struct sk_buff *skb)
@@ -1750,6 +2085,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1750 ipv6_key->ipv6_tclass = output->ip.tos; 2085 ipv6_key->ipv6_tclass = output->ip.tos;
1751 ipv6_key->ipv6_hlimit = output->ip.ttl; 2086 ipv6_key->ipv6_hlimit = output->ip.ttl;
1752 ipv6_key->ipv6_frag = output->ip.frag; 2087 ipv6_key->ipv6_frag = output->ip.frag;
2088 } else if (swkey->eth.type == htons(ETH_P_NSH)) {
2089 if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2090 goto nla_put_failure;
1753 } else if (swkey->eth.type == htons(ETH_P_ARP) || 2091 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1754 swkey->eth.type == htons(ETH_P_RARP)) { 2092 swkey->eth.type == htons(ETH_P_RARP)) {
1755 struct ovs_key_arp *arp_key; 2093 struct ovs_key_arp *arp_key;
@@ -2195,6 +2533,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
2195 break; 2533 break;
2196 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 2534 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2197 break; 2535 break;
2536 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2537 break;
2198 } 2538 }
2199 }; 2539 };
2200 2540
@@ -2242,6 +2582,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
2242 return err; 2582 return err;
2243} 2583}
2244 2584
2585static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2586 bool is_push_nsh, bool log)
2587{
2588 struct sw_flow_match match;
2589 struct sw_flow_key key;
2590 int ret = 0;
2591
2592 ovs_match_init(&match, &key, true, NULL);
2593 ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2594 is_push_nsh, log);
2595 return !ret;
2596}
2597
2245/* Return false if there are any non-masked bits set. 2598/* Return false if there are any non-masked bits set.
2246 * Mask follows data immediately, before any netlink padding. 2599 * Mask follows data immediately, before any netlink padding.
2247 */ 2600 */
@@ -2384,6 +2737,13 @@ static int validate_set(const struct nlattr *a,
2384 2737
2385 break; 2738 break;
2386 2739
2740 case OVS_KEY_ATTR_NSH:
2741 if (eth_type != htons(ETH_P_NSH))
2742 return -EINVAL;
2743 if (!validate_nsh(nla_data(a), masked, false, log))
2744 return -EINVAL;
2745 break;
2746
2387 default: 2747 default:
2388 return -EINVAL; 2748 return -EINVAL;
2389 } 2749 }
@@ -2479,9 +2839,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2479 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2839 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2480 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), 2840 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2481 [OVS_ACTION_ATTR_CT] = (u32)-1, 2841 [OVS_ACTION_ATTR_CT] = (u32)-1,
2842 [OVS_ACTION_ATTR_CT_CLEAR] = 0,
2482 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2843 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2483 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), 2844 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2484 [OVS_ACTION_ATTR_POP_ETH] = 0, 2845 [OVS_ACTION_ATTR_POP_ETH] = 0,
2846 [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
2847 [OVS_ACTION_ATTR_POP_NSH] = 0,
2848 [OVS_ACTION_ATTR_METER] = sizeof(u32),
2485 }; 2849 };
2486 const struct ovs_action_push_vlan *vlan; 2850 const struct ovs_action_push_vlan *vlan;
2487 int type = nla_type(a); 2851 int type = nla_type(a);
@@ -2620,6 +2984,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2620 skip_copy = true; 2984 skip_copy = true;
2621 break; 2985 break;
2622 2986
2987 case OVS_ACTION_ATTR_CT_CLEAR:
2988 break;
2989
2623 case OVS_ACTION_ATTR_PUSH_ETH: 2990 case OVS_ACTION_ATTR_PUSH_ETH:
2624 /* Disallow pushing an Ethernet header if one 2991 /* Disallow pushing an Ethernet header if one
2625 * is already present */ 2992 * is already present */
@@ -2636,6 +3003,38 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2636 mac_proto = MAC_PROTO_ETHERNET; 3003 mac_proto = MAC_PROTO_ETHERNET;
2637 break; 3004 break;
2638 3005
3006 case OVS_ACTION_ATTR_PUSH_NSH:
3007 if (mac_proto != MAC_PROTO_ETHERNET) {
3008 u8 next_proto;
3009
3010 next_proto = tun_p_from_eth_p(eth_type);
3011 if (!next_proto)
3012 return -EINVAL;
3013 }
3014 mac_proto = MAC_PROTO_NONE;
3015 if (!validate_nsh(nla_data(a), false, true, true))
3016 return -EINVAL;
3017 break;
3018
3019 case OVS_ACTION_ATTR_POP_NSH: {
3020 __be16 inner_proto;
3021
3022 if (eth_type != htons(ETH_P_NSH))
3023 return -EINVAL;
3024 inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3025 if (!inner_proto)
3026 return -EINVAL;
3027 if (key->nsh.base.np == TUN_P_ETHERNET)
3028 mac_proto = MAC_PROTO_ETHERNET;
3029 else
3030 mac_proto = MAC_PROTO_NONE;
3031 break;
3032 }
3033
3034 case OVS_ACTION_ATTR_METER:
3035 /* Non-existent meters are simply ignored. */
3036 break;
3037
2639 default: 3038 default:
2640 OVS_NLERR(log, "Unknown Action type %d", type); 3039 OVS_NLERR(log, "Unknown Action type %d", type);
2641 return -EINVAL; 3040 return -EINVAL;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 929c665ac3aa..6657606b2b47 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr,
79void ovs_nla_free_flow_actions(struct sw_flow_actions *); 79void ovs_nla_free_flow_actions(struct sw_flow_actions *);
80void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); 80void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
81 81
82int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh,
83 struct ovs_key_nsh *nsh_mask);
84int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
85 size_t size);
86
82#endif /* flow_netlink.h */ 87#endif /* flow_netlink.h */
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
new file mode 100644
index 000000000000..3fbfc78991ac
--- /dev/null
+++ b/net/openvswitch/meter.c
@@ -0,0 +1,597 @@
1/*
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/if.h>
12#include <linux/skbuff.h>
13#include <linux/ip.h>
14#include <linux/kernel.h>
15#include <linux/openvswitch.h>
16#include <linux/netlink.h>
17#include <linux/rculist.h>
18
19#include <net/netlink.h>
20#include <net/genetlink.h>
21
22#include "datapath.h"
23#include "meter.h"
24
25#define METER_HASH_BUCKETS 1024
26
27static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
28 [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
29 [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
30 [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
31 [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
32 [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
33 [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
34 [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
35 [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
36};
37
38static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
39 [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
40 [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
41 [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
42 [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
43};
44
45static void ovs_meter_free(struct dp_meter *meter)
46{
47 if (!meter)
48 return;
49
50 kfree_rcu(meter, rcu);
51}
52
53static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
54 u32 meter_id)
55{
56 return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
57}
58
59/* Call with ovs_mutex or RCU read lock. */
60static struct dp_meter *lookup_meter(const struct datapath *dp,
61 u32 meter_id)
62{
63 struct dp_meter *meter;
64 struct hlist_head *head;
65
66 head = meter_hash_bucket(dp, meter_id);
67 hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
68 if (meter->id == meter_id)
69 return meter;
70 }
71 return NULL;
72}
73
74static void attach_meter(struct datapath *dp, struct dp_meter *meter)
75{
76 struct hlist_head *head = meter_hash_bucket(dp, meter->id);
77
78 hlist_add_head_rcu(&meter->dp_hash_node, head);
79}
80
81static void detach_meter(struct dp_meter *meter)
82{
83 ASSERT_OVSL();
84 if (meter)
85 hlist_del_rcu(&meter->dp_hash_node);
86}
87
88static struct sk_buff *
89ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
90 struct ovs_header **ovs_reply_header)
91{
92 struct sk_buff *skb;
93 struct ovs_header *ovs_header = info->userhdr;
94
95 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
96 if (!skb)
97 return ERR_PTR(-ENOMEM);
98
99 *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
100 info->snd_seq,
101 &dp_meter_genl_family, 0, cmd);
102 if (!*ovs_reply_header) {
103 nlmsg_free(skb);
104 return ERR_PTR(-EMSGSIZE);
105 }
106 (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
107
108 return skb;
109}
110
111static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
112 struct dp_meter *meter)
113{
114 struct nlattr *nla;
115 struct dp_meter_band *band;
116 u16 i;
117
118 if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
119 goto error;
120
121 if (!meter)
122 return 0;
123
124 if (nla_put(reply, OVS_METER_ATTR_STATS,
125 sizeof(struct ovs_flow_stats), &meter->stats) ||
126 nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
127 OVS_METER_ATTR_PAD))
128 goto error;
129
130 nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
131 if (!nla)
132 goto error;
133
134 band = meter->bands;
135
136 for (i = 0; i < meter->n_bands; ++i, ++band) {
137 struct nlattr *band_nla;
138
139 band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
140 if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
141 sizeof(struct ovs_flow_stats),
142 &band->stats))
143 goto error;
144 nla_nest_end(reply, band_nla);
145 }
146 nla_nest_end(reply, nla);
147
148 return 0;
149error:
150 return -EMSGSIZE;
151}
152
153static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
154{
155 struct sk_buff *reply;
156 struct ovs_header *ovs_reply_header;
157 struct nlattr *nla, *band_nla;
158 int err;
159
160 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
161 &ovs_reply_header);
162 if (IS_ERR(reply))
163 return PTR_ERR(reply);
164
165 if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
166 nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
167 goto nla_put_failure;
168
169 nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
170 if (!nla)
171 goto nla_put_failure;
172
173 band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
174 if (!band_nla)
175 goto nla_put_failure;
176 /* Currently only DROP band type is supported. */
177 if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
178 goto nla_put_failure;
179 nla_nest_end(reply, band_nla);
180 nla_nest_end(reply, nla);
181
182 genlmsg_end(reply, ovs_reply_header);
183 return genlmsg_reply(reply, info);
184
185nla_put_failure:
186 nlmsg_free(reply);
187 err = -EMSGSIZE;
188 return err;
189}
190
191static struct dp_meter *dp_meter_create(struct nlattr **a)
192{
193 struct nlattr *nla;
194 int rem;
195 u16 n_bands = 0;
196 struct dp_meter *meter;
197 struct dp_meter_band *band;
198 int err;
199
200 /* Validate attributes, count the bands. */
201 if (!a[OVS_METER_ATTR_BANDS])
202 return ERR_PTR(-EINVAL);
203
204 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
205 if (++n_bands > DP_MAX_BANDS)
206 return ERR_PTR(-EINVAL);
207
208 /* Allocate and set up the meter before locking anything. */
209 meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
210 sizeof(*meter), GFP_KERNEL);
211 if (!meter)
212 return ERR_PTR(-ENOMEM);
213
214 meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
215 meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
216 meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
217 spin_lock_init(&meter->lock);
218 if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
219 meter->stats = *(struct ovs_flow_stats *)
220 nla_data(a[OVS_METER_ATTR_STATS]);
221 }
222 meter->n_bands = n_bands;
223
224 /* Set up meter bands. */
225 band = meter->bands;
226 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
227 struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
228 u32 band_max_delta_t;
229
230 err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
231 nla_data(nla), nla_len(nla), band_policy,
232 NULL);
233 if (err)
234 goto exit_free_meter;
235
236 if (!attr[OVS_BAND_ATTR_TYPE] ||
237 !attr[OVS_BAND_ATTR_RATE] ||
238 !attr[OVS_BAND_ATTR_BURST]) {
239 err = -EINVAL;
240 goto exit_free_meter;
241 }
242
243 band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
244 band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
245 band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
246 /* Figure out max delta_t that is enough to fill any bucket.
247 * Keep max_delta_t size to the bucket units:
248 * pkts => 1/1000 packets, kilobits => bits.
249 */
250 band_max_delta_t = (band->burst_size + band->rate) * 1000;
251 /* Start with a full bucket. */
252 band->bucket = band_max_delta_t;
253 if (band_max_delta_t > meter->max_delta_t)
254 meter->max_delta_t = band_max_delta_t;
255 band++;
256 }
257
258 return meter;
259
260exit_free_meter:
261 kfree(meter);
262 return ERR_PTR(err);
263}
264
265static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
266{
267 struct nlattr **a = info->attrs;
268 struct dp_meter *meter, *old_meter;
269 struct sk_buff *reply;
270 struct ovs_header *ovs_reply_header;
271 struct ovs_header *ovs_header = info->userhdr;
272 struct datapath *dp;
273 int err;
274 u32 meter_id;
275 bool failed;
276
277 meter = dp_meter_create(a);
278 if (IS_ERR_OR_NULL(meter))
279 return PTR_ERR(meter);
280
281 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
282 &ovs_reply_header);
283 if (IS_ERR(reply)) {
284 err = PTR_ERR(reply);
285 goto exit_free_meter;
286 }
287
288 ovs_lock();
289 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
290 if (!dp) {
291 err = -ENODEV;
292 goto exit_unlock;
293 }
294
295 if (!a[OVS_METER_ATTR_ID]) {
296 err = -ENODEV;
297 goto exit_unlock;
298 }
299
300 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
301
302 /* Cannot fail after this. */
303 old_meter = lookup_meter(dp, meter_id);
304 detach_meter(old_meter);
305 attach_meter(dp, meter);
306 ovs_unlock();
307
308 /* Build response with the meter_id and stats from
309 * the old meter, if any.
310 */
311 failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
312 WARN_ON(failed);
313 if (old_meter) {
314 spin_lock_bh(&old_meter->lock);
315 if (old_meter->keep_stats) {
316 err = ovs_meter_cmd_reply_stats(reply, meter_id,
317 old_meter);
318 WARN_ON(err);
319 }
320 spin_unlock_bh(&old_meter->lock);
321 ovs_meter_free(old_meter);
322 }
323
324 genlmsg_end(reply, ovs_reply_header);
325 return genlmsg_reply(reply, info);
326
327exit_unlock:
328 ovs_unlock();
329 nlmsg_free(reply);
330exit_free_meter:
331 kfree(meter);
332 return err;
333}
334
335static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
336{
337 struct nlattr **a = info->attrs;
338 u32 meter_id;
339 struct ovs_header *ovs_header = info->userhdr;
340 struct ovs_header *ovs_reply_header;
341 struct datapath *dp;
342 int err;
343 struct sk_buff *reply;
344 struct dp_meter *meter;
345
346 if (!a[OVS_METER_ATTR_ID])
347 return -EINVAL;
348
349 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
350
351 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
352 &ovs_reply_header);
353 if (IS_ERR(reply))
354 return PTR_ERR(reply);
355
356 ovs_lock();
357
358 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
359 if (!dp) {
360 err = -ENODEV;
361 goto exit_unlock;
362 }
363
364 /* Locate meter, copy stats. */
365 meter = lookup_meter(dp, meter_id);
366 if (!meter) {
367 err = -ENOENT;
368 goto exit_unlock;
369 }
370
371 spin_lock_bh(&meter->lock);
372 err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
373 spin_unlock_bh(&meter->lock);
374 if (err)
375 goto exit_unlock;
376
377 ovs_unlock();
378
379 genlmsg_end(reply, ovs_reply_header);
380 return genlmsg_reply(reply, info);
381
382exit_unlock:
383 ovs_unlock();
384 nlmsg_free(reply);
385 return err;
386}
387
388static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
389{
390 struct nlattr **a = info->attrs;
391 u32 meter_id;
392 struct ovs_header *ovs_header = info->userhdr;
393 struct ovs_header *ovs_reply_header;
394 struct datapath *dp;
395 int err;
396 struct sk_buff *reply;
397 struct dp_meter *old_meter;
398
399 if (!a[OVS_METER_ATTR_ID])
400 return -EINVAL;
401 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
402
403 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
404 &ovs_reply_header);
405 if (IS_ERR(reply))
406 return PTR_ERR(reply);
407
408 ovs_lock();
409
410 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
411 if (!dp) {
412 err = -ENODEV;
413 goto exit_unlock;
414 }
415
416 old_meter = lookup_meter(dp, meter_id);
417 if (old_meter) {
418 spin_lock_bh(&old_meter->lock);
419 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
420 WARN_ON(err);
421 spin_unlock_bh(&old_meter->lock);
422 detach_meter(old_meter);
423 }
424 ovs_unlock();
425 ovs_meter_free(old_meter);
426 genlmsg_end(reply, ovs_reply_header);
427 return genlmsg_reply(reply, info);
428
429exit_unlock:
430 ovs_unlock();
431 nlmsg_free(reply);
432 return err;
433}
434
435/* Meter action execution.
436 *
437 * Return true 'meter_id' drop band is triggered. The 'skb' should be
438 * dropped by the caller'.
439 */
440bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
441 struct sw_flow_key *key, u32 meter_id)
442{
443 struct dp_meter *meter;
444 struct dp_meter_band *band;
445 long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
446 long long int long_delta_ms;
447 u32 delta_ms;
448 u32 cost;
449 int i, band_exceeded_max = -1;
450 u32 band_exceeded_rate = 0;
451
452 meter = lookup_meter(dp, meter_id);
453 /* Do not drop the packet when there is no meter. */
454 if (!meter)
455 return false;
456
457 /* Lock the meter while using it. */
458 spin_lock(&meter->lock);
459
460 long_delta_ms = (now_ms - meter->used); /* ms */
461
462 /* Make sure delta_ms will not be too large, so that bucket will not
463 * wrap around below.
464 */
465 delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
466 ? meter->max_delta_t : (u32)long_delta_ms;
467
468 /* Update meter statistics.
469 */
470 meter->used = now_ms;
471 meter->stats.n_packets += 1;
472 meter->stats.n_bytes += skb->len;
473
474 /* Bucket rate is either in kilobits per second, or in packets per
475 * second. We maintain the bucket in the units of either bits or
476 * 1/1000th of a packet, correspondingly.
477 * Then, when rate is multiplied with milliseconds, we get the
478 * bucket units:
479 * msec * kbps = bits, and
480 * msec * packets/sec = 1/1000 packets.
481 *
482 * 'cost' is the number of bucket units in this packet.
483 */
484 cost = (meter->kbps) ? skb->len * 8 : 1000;
485
486 /* Update all bands and find the one hit with the highest rate. */
487 for (i = 0; i < meter->n_bands; ++i) {
488 long long int max_bucket_size;
489
490 band = &meter->bands[i];
491 max_bucket_size = (band->burst_size + band->rate) * 1000;
492
493 band->bucket += delta_ms * band->rate;
494 if (band->bucket > max_bucket_size)
495 band->bucket = max_bucket_size;
496
497 if (band->bucket >= cost) {
498 band->bucket -= cost;
499 } else if (band->rate > band_exceeded_rate) {
500 band_exceeded_rate = band->rate;
501 band_exceeded_max = i;
502 }
503 }
504
505 if (band_exceeded_max >= 0) {
506 /* Update band statistics. */
507 band = &meter->bands[band_exceeded_max];
508 band->stats.n_packets += 1;
509 band->stats.n_bytes += skb->len;
510
511 /* Drop band triggered, let the caller drop the 'skb'. */
512 if (band->type == OVS_METER_BAND_TYPE_DROP) {
513 spin_unlock(&meter->lock);
514 return true;
515 }
516 }
517
518 spin_unlock(&meter->lock);
519 return false;
520}
521
522static struct genl_ops dp_meter_genl_ops[] = {
523 { .cmd = OVS_METER_CMD_FEATURES,
524 .flags = 0, /* OK for unprivileged users. */
525 .policy = meter_policy,
526 .doit = ovs_meter_cmd_features
527 },
528 { .cmd = OVS_METER_CMD_SET,
529 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
530 * privilege.
531 */
532 .policy = meter_policy,
533 .doit = ovs_meter_cmd_set,
534 },
535 { .cmd = OVS_METER_CMD_GET,
536 .flags = 0, /* OK for unprivileged users. */
537 .policy = meter_policy,
538 .doit = ovs_meter_cmd_get,
539 },
540 { .cmd = OVS_METER_CMD_DEL,
541 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
542 * privilege.
543 */
544 .policy = meter_policy,
545 .doit = ovs_meter_cmd_del
546 },
547};
548
549static const struct genl_multicast_group ovs_meter_multicast_group = {
550 .name = OVS_METER_MCGROUP,
551};
552
553struct genl_family dp_meter_genl_family __ro_after_init = {
554 .hdrsize = sizeof(struct ovs_header),
555 .name = OVS_METER_FAMILY,
556 .version = OVS_METER_VERSION,
557 .maxattr = OVS_METER_ATTR_MAX,
558 .netnsok = true,
559 .parallel_ops = true,
560 .ops = dp_meter_genl_ops,
561 .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
562 .mcgrps = &ovs_meter_multicast_group,
563 .n_mcgrps = 1,
564 .module = THIS_MODULE,
565};
566
567int ovs_meters_init(struct datapath *dp)
568{
569 int i;
570
571 dp->meters = kmalloc_array(METER_HASH_BUCKETS,
572 sizeof(struct hlist_head), GFP_KERNEL);
573
574 if (!dp->meters)
575 return -ENOMEM;
576
577 for (i = 0; i < METER_HASH_BUCKETS; i++)
578 INIT_HLIST_HEAD(&dp->meters[i]);
579
580 return 0;
581}
582
583void ovs_meters_exit(struct datapath *dp)
584{
585 int i;
586
587 for (i = 0; i < METER_HASH_BUCKETS; i++) {
588 struct hlist_head *head = &dp->meters[i];
589 struct dp_meter *meter;
590 struct hlist_node *n;
591
592 hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
593 kfree(meter);
594 }
595
596 kfree(dp->meters);
597}
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
new file mode 100644
index 000000000000..964ace2650f8
--- /dev/null
+++ b/net/openvswitch/meter.h
@@ -0,0 +1,54 @@
1/*
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8
9#ifndef METER_H
10#define METER_H 1
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/netlink.h>
16#include <linux/openvswitch.h>
17#include <linux/genetlink.h>
18#include <linux/skbuff.h>
19
20#include "flow.h"
21struct datapath;
22
23#define DP_MAX_BANDS 1
24
25struct dp_meter_band {
26 u32 type;
27 u32 rate;
28 u32 burst_size;
29 u32 bucket; /* 1/1000 packets, or in bits */
30 struct ovs_flow_stats stats;
31};
32
33struct dp_meter {
34 spinlock_t lock; /* Per meter lock */
35 struct rcu_head rcu;
36 struct hlist_node dp_hash_node; /*Element in datapath->meters
37 * hash table.
38 */
39 u32 id;
40 u16 kbps:1, keep_stats:1;
41 u16 n_bands;
42 u32 max_delta_t;
43 u64 used;
44 struct ovs_flow_stats stats;
45 struct dp_meter_band bands[];
46};
47
48extern struct genl_family dp_meter_genl_family;
49int ovs_meters_init(struct datapath *dp);
50void ovs_meters_exit(struct datapath *dp);
51bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
52 struct sw_flow_key *key, u32 meter_id);
53
54#endif /* meter.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 0389398fa4ab..2e5e7a41d8ef 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
108 108
109 rtnl_lock(); 109 rtnl_lock();
110 err = netdev_master_upper_dev_link(vport->dev, 110 err = netdev_master_upper_dev_link(vport->dev,
111 get_dpdev(vport->dp), NULL, NULL); 111 get_dpdev(vport->dp),
112 NULL, NULL, NULL);
112 if (err) 113 if (err)
113 goto error_unlock; 114 goto error_unlock;
114 115
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2986941164b1..737092ca9b4e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -201,11 +201,8 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *,
201static int prb_queue_frozen(struct tpacket_kbdq_core *); 201static int prb_queue_frozen(struct tpacket_kbdq_core *);
202static void prb_open_block(struct tpacket_kbdq_core *, 202static void prb_open_block(struct tpacket_kbdq_core *,
203 struct tpacket_block_desc *); 203 struct tpacket_block_desc *);
204static void prb_retire_rx_blk_timer_expired(unsigned long); 204static void prb_retire_rx_blk_timer_expired(struct timer_list *);
205static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); 205static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
206static void prb_init_blk_timer(struct packet_sock *,
207 struct tpacket_kbdq_core *,
208 void (*func) (unsigned long));
209static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); 206static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
210static void prb_clear_rxhash(struct tpacket_kbdq_core *, 207static void prb_clear_rxhash(struct tpacket_kbdq_core *,
211 struct tpacket3_hdr *); 208 struct tpacket3_hdr *);
@@ -540,22 +537,14 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
540 prb_del_retire_blk_timer(pkc); 537 prb_del_retire_blk_timer(pkc);
541} 538}
542 539
543static void prb_init_blk_timer(struct packet_sock *po,
544 struct tpacket_kbdq_core *pkc,
545 void (*func) (unsigned long))
546{
547 init_timer(&pkc->retire_blk_timer);
548 pkc->retire_blk_timer.data = (long)po;
549 pkc->retire_blk_timer.function = func;
550 pkc->retire_blk_timer.expires = jiffies;
551}
552
553static void prb_setup_retire_blk_timer(struct packet_sock *po) 540static void prb_setup_retire_blk_timer(struct packet_sock *po)
554{ 541{
555 struct tpacket_kbdq_core *pkc; 542 struct tpacket_kbdq_core *pkc;
556 543
557 pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 544 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
558 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); 545 timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
546 0);
547 pkc->retire_blk_timer.expires = jiffies;
559} 548}
560 549
561static int prb_calc_retire_blk_tmo(struct packet_sock *po, 550static int prb_calc_retire_blk_tmo(struct packet_sock *po,
@@ -673,9 +662,10 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
673 * prb_calc_retire_blk_tmo() calculates the tmo. 662 * prb_calc_retire_blk_tmo() calculates the tmo.
674 * 663 *
675 */ 664 */
676static void prb_retire_rx_blk_timer_expired(unsigned long data) 665static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
677{ 666{
678 struct packet_sock *po = (struct packet_sock *)data; 667 struct packet_sock *po =
668 from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
679 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 669 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
680 unsigned int frozen; 670 unsigned int frozen;
681 struct tpacket_block_desc *pbd; 671 struct tpacket_block_desc *pbd;
@@ -4570,6 +4560,7 @@ static int __net_init packet_net_init(struct net *net)
4570static void __net_exit packet_net_exit(struct net *net) 4560static void __net_exit packet_net_exit(struct net *net)
4571{ 4561{
4572 remove_proc_entry("packet", net->proc_net); 4562 remove_proc_entry("packet", net->proc_net);
4563 WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
4573} 4564}
4574 4565
4575static struct pernet_operations packet_net_ops = { 4566static struct pernet_operations packet_net_ops = {
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index f925753668a7..3b0ef691f5b1 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -35,11 +35,11 @@
35#include <net/phonet/pn_dev.h> 35#include <net/phonet/pn_dev.h>
36 36
37/* Transport protocol registration */ 37/* Transport protocol registration */
38static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; 38static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
39 39
40static struct phonet_protocol *phonet_proto_get(unsigned int protocol) 40static const struct phonet_protocol *phonet_proto_get(unsigned int protocol)
41{ 41{
42 struct phonet_protocol *pp; 42 const struct phonet_protocol *pp;
43 43
44 if (protocol >= PHONET_NPROTO) 44 if (protocol >= PHONET_NPROTO)
45 return NULL; 45 return NULL;
@@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
53 return pp; 53 return pp;
54} 54}
55 55
56static inline void phonet_proto_put(struct phonet_protocol *pp) 56static inline void phonet_proto_put(const struct phonet_protocol *pp)
57{ 57{
58 module_put(pp->prot->owner); 58 module_put(pp->prot->owner);
59} 59}
@@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
65{ 65{
66 struct sock *sk; 66 struct sock *sk;
67 struct pn_sock *pn; 67 struct pn_sock *pn;
68 struct phonet_protocol *pnp; 68 const struct phonet_protocol *pnp;
69 int err; 69 int err;
70 70
71 if (!capable(CAP_SYS_ADMIN)) 71 if (!capable(CAP_SYS_ADMIN))
@@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr)
149 return 1; 149 return 1;
150} 150}
151 151
152struct header_ops phonet_header_ops = { 152const struct header_ops phonet_header_ops = {
153 .create = pn_header_create, 153 .create = pn_header_create,
154 .parse = pn_header_parse, 154 .parse = pn_header_parse,
155}; 155};
@@ -470,7 +470,7 @@ static struct packet_type phonet_packet_type __read_mostly = {
470static DEFINE_MUTEX(proto_tab_lock); 470static DEFINE_MUTEX(proto_tab_lock);
471 471
472int __init_or_module phonet_proto_register(unsigned int protocol, 472int __init_or_module phonet_proto_register(unsigned int protocol,
473 struct phonet_protocol *pp) 473 const struct phonet_protocol *pp)
474{ 474{
475 int err = 0; 475 int err = 0;
476 476
@@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
492} 492}
493EXPORT_SYMBOL(phonet_proto_register); 493EXPORT_SYMBOL(phonet_proto_register);
494 494
495void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp) 495void phonet_proto_unregister(unsigned int protocol,
496 const struct phonet_protocol *pp)
496{ 497{
497 mutex_lock(&proto_tab_lock); 498 mutex_lock(&proto_tab_lock);
498 BUG_ON(proto_tab[protocol] != pp); 499 BUG_ON(proto_tab[protocol] != pp);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 5e710435ffa9..b44fb9018fb8 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -195,7 +195,7 @@ static struct proto pn_proto = {
195 .name = "PHONET", 195 .name = "PHONET",
196}; 196};
197 197
198static struct phonet_protocol pn_dgram_proto = { 198static const struct phonet_protocol pn_dgram_proto = {
199 .ops = &phonet_dgram_ops, 199 .ops = &phonet_dgram_ops,
200 .prot = &pn_proto, 200 .prot = &pn_proto,
201 .sock_type = SOCK_DGRAM, 201 .sock_type = SOCK_DGRAM,
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e81537991ddf..9fc76b19cd3c 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1351,7 +1351,7 @@ static struct proto pep_proto = {
1351 .name = "PNPIPE", 1351 .name = "PNPIPE",
1352}; 1352};
1353 1353
1354static struct phonet_protocol pep_pn_proto = { 1354static const struct phonet_protocol pep_pn_proto = {
1355 .ops = &phonet_stream_ops, 1355 .ops = &phonet_stream_ops,
1356 .prot = &pep_proto, 1356 .prot = &pep_proto,
1357 .sock_type = SOCK_SEQPACKET, 1357 .sock_type = SOCK_SEQPACKET,
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 2cb4c5dfad6f..77787512fc32 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -331,7 +331,10 @@ static int __net_init phonet_init_net(struct net *net)
331 331
332static void __net_exit phonet_exit_net(struct net *net) 332static void __net_exit phonet_exit_net(struct net *net)
333{ 333{
334 struct phonet_net *pnn = phonet_pernet(net);
335
334 remove_proc_entry("phonet", net->proc_net); 336 remove_proc_entry("phonet", net->proc_net);
337 WARN_ON_ONCE(!list_empty(&pnn->pndevs.list));
335} 338}
336 339
337static struct pernet_operations phonet_net_ops = { 340static struct pernet_operations phonet_net_ops = {
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 78418f38464a..77ab05e23001 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,26 +20,15 @@
20 20
21#include "qrtr.h" 21#include "qrtr.h"
22 22
23#define QRTR_PROTO_VER 1 23#define QRTR_PROTO_VER_1 1
24#define QRTR_PROTO_VER_2 3
24 25
25/* auto-bind range */ 26/* auto-bind range */
26#define QRTR_MIN_EPH_SOCKET 0x4000 27#define QRTR_MIN_EPH_SOCKET 0x4000
27#define QRTR_MAX_EPH_SOCKET 0x7fff 28#define QRTR_MAX_EPH_SOCKET 0x7fff
28 29
29enum qrtr_pkt_type {
30 QRTR_TYPE_DATA = 1,
31 QRTR_TYPE_HELLO = 2,
32 QRTR_TYPE_BYE = 3,
33 QRTR_TYPE_NEW_SERVER = 4,
34 QRTR_TYPE_DEL_SERVER = 5,
35 QRTR_TYPE_DEL_CLIENT = 6,
36 QRTR_TYPE_RESUME_TX = 7,
37 QRTR_TYPE_EXIT = 8,
38 QRTR_TYPE_PING = 9,
39};
40
41/** 30/**
42 * struct qrtr_hdr - (I|R)PCrouter packet header 31 * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
43 * @version: protocol version 32 * @version: protocol version
44 * @type: packet type; one of QRTR_TYPE_* 33 * @type: packet type; one of QRTR_TYPE_*
45 * @src_node_id: source node 34 * @src_node_id: source node
@@ -49,7 +38,7 @@ enum qrtr_pkt_type {
49 * @dst_node_id: destination node 38 * @dst_node_id: destination node
50 * @dst_port_id: destination port 39 * @dst_port_id: destination port
51 */ 40 */
52struct qrtr_hdr { 41struct qrtr_hdr_v1 {
53 __le32 version; 42 __le32 version;
54 __le32 type; 43 __le32 type;
55 __le32 src_node_id; 44 __le32 src_node_id;
@@ -60,9 +49,44 @@ struct qrtr_hdr {
60 __le32 dst_port_id; 49 __le32 dst_port_id;
61} __packed; 50} __packed;
62 51
63#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) 52/**
64#define QRTR_NODE_BCAST ((unsigned int)-1) 53 * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
65#define QRTR_PORT_CTRL ((unsigned int)-2) 54 * @version: protocol version
55 * @type: packet type; one of QRTR_TYPE_*
56 * @flags: bitmask of QRTR_FLAGS_*
57 * @optlen: length of optional header data
58 * @size: length of packet, excluding this header and optlen
59 * @src_node_id: source node
60 * @src_port_id: source port
61 * @dst_node_id: destination node
62 * @dst_port_id: destination port
63 */
64struct qrtr_hdr_v2 {
65 u8 version;
66 u8 type;
67 u8 flags;
68 u8 optlen;
69 __le32 size;
70 __le16 src_node_id;
71 __le16 src_port_id;
72 __le16 dst_node_id;
73 __le16 dst_port_id;
74};
75
76#define QRTR_FLAGS_CONFIRM_RX BIT(0)
77
78struct qrtr_cb {
79 u32 src_node;
80 u32 src_port;
81 u32 dst_node;
82 u32 dst_port;
83
84 u8 type;
85 u8 confirm_rx;
86};
87
88#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
89 sizeof(struct qrtr_hdr_v2))
66 90
67struct qrtr_sock { 91struct qrtr_sock {
68 /* WARNING: sk must be the first member */ 92 /* WARNING: sk must be the first member */
@@ -111,8 +135,12 @@ struct qrtr_node {
111 struct list_head item; 135 struct list_head item;
112}; 136};
113 137
114static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb); 138static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
115static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb); 139 int type, struct sockaddr_qrtr *from,
140 struct sockaddr_qrtr *to);
141static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
142 int type, struct sockaddr_qrtr *from,
143 struct sockaddr_qrtr *to);
116 144
117/* Release node resources and free the node. 145/* Release node resources and free the node.
118 * 146 *
@@ -150,10 +178,27 @@ static void qrtr_node_release(struct qrtr_node *node)
150} 178}
151 179
152/* Pass an outgoing packet socket buffer to the endpoint driver. */ 180/* Pass an outgoing packet socket buffer to the endpoint driver. */
153static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) 181static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
182 int type, struct sockaddr_qrtr *from,
183 struct sockaddr_qrtr *to)
154{ 184{
185 struct qrtr_hdr_v1 *hdr;
186 size_t len = skb->len;
155 int rc = -ENODEV; 187 int rc = -ENODEV;
156 188
189 hdr = skb_push(skb, sizeof(*hdr));
190 hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
191 hdr->type = cpu_to_le32(type);
192 hdr->src_node_id = cpu_to_le32(from->sq_node);
193 hdr->src_port_id = cpu_to_le32(from->sq_port);
194 hdr->dst_node_id = cpu_to_le32(to->sq_node);
195 hdr->dst_port_id = cpu_to_le32(to->sq_port);
196
197 hdr->size = cpu_to_le32(len);
198 hdr->confirm_rx = 0;
199
200 skb_put_padto(skb, ALIGN(len, 4));
201
157 mutex_lock(&node->ep_lock); 202 mutex_lock(&node->ep_lock);
158 if (node->ep) 203 if (node->ep)
159 rc = node->ep->xmit(node->ep, skb); 204 rc = node->ep->xmit(node->ep, skb);
@@ -207,125 +252,103 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
207int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) 252int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
208{ 253{
209 struct qrtr_node *node = ep->node; 254 struct qrtr_node *node = ep->node;
210 const struct qrtr_hdr *phdr = data; 255 const struct qrtr_hdr_v1 *v1;
256 const struct qrtr_hdr_v2 *v2;
211 struct sk_buff *skb; 257 struct sk_buff *skb;
212 unsigned int psize; 258 struct qrtr_cb *cb;
213 unsigned int size; 259 unsigned int size;
214 unsigned int type;
215 unsigned int ver; 260 unsigned int ver;
216 unsigned int dst; 261 size_t hdrlen;
217
218 if (len < QRTR_HDR_SIZE || len & 3)
219 return -EINVAL;
220
221 ver = le32_to_cpu(phdr->version);
222 size = le32_to_cpu(phdr->size);
223 type = le32_to_cpu(phdr->type);
224 dst = le32_to_cpu(phdr->dst_port_id);
225
226 psize = (size + 3) & ~3;
227 262
228 if (ver != QRTR_PROTO_VER) 263 if (len & 3)
229 return -EINVAL;
230
231 if (len != psize + QRTR_HDR_SIZE)
232 return -EINVAL;
233
234 if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
235 return -EINVAL; 264 return -EINVAL;
236 265
237 skb = netdev_alloc_skb(NULL, len); 266 skb = netdev_alloc_skb(NULL, len);
238 if (!skb) 267 if (!skb)
239 return -ENOMEM; 268 return -ENOMEM;
240 269
241 skb_reset_transport_header(skb); 270 cb = (struct qrtr_cb *)skb->cb;
242 skb_put_data(skb, data, len);
243
244 skb_queue_tail(&node->rx_queue, skb);
245 schedule_work(&node->work);
246
247 return 0;
248}
249EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
250 271
251static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len, 272 /* Version field in v1 is little endian, so this works for both cases */
252 u32 src_node, u32 dst_node) 273 ver = *(u8*)data;
253{
254 struct qrtr_hdr *hdr;
255 struct sk_buff *skb;
256
257 skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
258 if (!skb)
259 return NULL;
260 skb_reset_transport_header(skb);
261 274
262 hdr = skb_put(skb, QRTR_HDR_SIZE); 275 switch (ver) {
263 hdr->version = cpu_to_le32(QRTR_PROTO_VER); 276 case QRTR_PROTO_VER_1:
264 hdr->type = cpu_to_le32(type); 277 v1 = data;
265 hdr->src_node_id = cpu_to_le32(src_node); 278 hdrlen = sizeof(*v1);
266 hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
267 hdr->confirm_rx = cpu_to_le32(0);
268 hdr->size = cpu_to_le32(pkt_len);
269 hdr->dst_node_id = cpu_to_le32(dst_node);
270 hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
271 279
272 return skb; 280 cb->type = le32_to_cpu(v1->type);
273} 281 cb->src_node = le32_to_cpu(v1->src_node_id);
282 cb->src_port = le32_to_cpu(v1->src_port_id);
283 cb->confirm_rx = !!v1->confirm_rx;
284 cb->dst_node = le32_to_cpu(v1->dst_node_id);
285 cb->dst_port = le32_to_cpu(v1->dst_port_id);
274 286
275/* Allocate and construct a resume-tx packet. */ 287 size = le32_to_cpu(v1->size);
276static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, 288 break;
277 u32 dst_node, u32 port) 289 case QRTR_PROTO_VER_2:
278{ 290 v2 = data;
279 const int pkt_len = 20; 291 hdrlen = sizeof(*v2) + v2->optlen;
280 struct sk_buff *skb; 292
281 __le32 *buf; 293 cb->type = v2->type;
294 cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
295 cb->src_node = le16_to_cpu(v2->src_node_id);
296 cb->src_port = le16_to_cpu(v2->src_port_id);
297 cb->dst_node = le16_to_cpu(v2->dst_node_id);
298 cb->dst_port = le16_to_cpu(v2->dst_port_id);
299
300 if (cb->src_port == (u16)QRTR_PORT_CTRL)
301 cb->src_port = QRTR_PORT_CTRL;
302 if (cb->dst_port == (u16)QRTR_PORT_CTRL)
303 cb->dst_port = QRTR_PORT_CTRL;
304
305 size = le32_to_cpu(v2->size);
306 break;
307 default:
308 pr_err("qrtr: Invalid version %d\n", ver);
309 goto err;
310 }
282 311
283 skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len, 312 if (len != ALIGN(size, 4) + hdrlen)
284 src_node, dst_node); 313 goto err;
285 if (!skb)
286 return NULL;
287 314
288 buf = skb_put_zero(skb, pkt_len); 315 if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
289 buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); 316 goto err;
290 buf[1] = cpu_to_le32(src_node);
291 buf[2] = cpu_to_le32(port);
292 317
293 return skb; 318 skb_put_data(skb, data + hdrlen, size);
294}
295 319
296/* Allocate and construct a BYE message to signal remote termination */ 320 skb_queue_tail(&node->rx_queue, skb);
297static struct sk_buff *qrtr_alloc_local_bye(u32 src_node) 321 schedule_work(&node->work);
298{
299 const int pkt_len = 20;
300 struct sk_buff *skb;
301 __le32 *buf;
302 322
303 skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len, 323 return 0;
304 src_node, qrtr_local_nid);
305 if (!skb)
306 return NULL;
307 324
308 buf = skb_put_zero(skb, pkt_len); 325err:
309 buf[0] = cpu_to_le32(QRTR_TYPE_BYE); 326 kfree_skb(skb);
327 return -EINVAL;
310 328
311 return skb;
312} 329}
330EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
313 331
314static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq) 332/**
333 * qrtr_alloc_ctrl_packet() - allocate control packet skb
334 * @pkt: reference to qrtr_ctrl_pkt pointer
335 *
336 * Returns newly allocated sk_buff, or NULL on failure
337 *
338 * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
339 * on success returns a reference to the control packet in @pkt.
340 */
341static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
315{ 342{
316 const int pkt_len = 20; 343 const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
317 struct sk_buff *skb; 344 struct sk_buff *skb;
318 __le32 *buf;
319 345
320 skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len, 346 skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL);
321 sq->sq_node, QRTR_NODE_BCAST);
322 if (!skb) 347 if (!skb)
323 return NULL; 348 return NULL;
324 349
325 buf = skb_put_zero(skb, pkt_len); 350 skb_reserve(skb, QRTR_HDR_MAX_SIZE);
326 buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); 351 *pkt = skb_put_zero(skb, pkt_len);
327 buf[1] = cpu_to_le32(sq->sq_node);
328 buf[2] = cpu_to_le32(sq->sq_port);
329 352
330 return skb; 353 return skb;
331} 354}
@@ -340,24 +363,26 @@ static void qrtr_port_put(struct qrtr_sock *ipc);
340static void qrtr_node_rx_work(struct work_struct *work) 363static void qrtr_node_rx_work(struct work_struct *work)
341{ 364{
342 struct qrtr_node *node = container_of(work, struct qrtr_node, work); 365 struct qrtr_node *node = container_of(work, struct qrtr_node, work);
366 struct qrtr_ctrl_pkt *pkt;
367 struct sockaddr_qrtr dst;
368 struct sockaddr_qrtr src;
343 struct sk_buff *skb; 369 struct sk_buff *skb;
344 370
345 while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { 371 while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
346 const struct qrtr_hdr *phdr;
347 u32 dst_node, dst_port;
348 struct qrtr_sock *ipc; 372 struct qrtr_sock *ipc;
349 u32 src_node; 373 struct qrtr_cb *cb;
350 int confirm; 374 int confirm;
351 375
352 phdr = (const struct qrtr_hdr *)skb_transport_header(skb); 376 cb = (struct qrtr_cb *)skb->cb;
353 src_node = le32_to_cpu(phdr->src_node_id); 377 src.sq_node = cb->src_node;
354 dst_node = le32_to_cpu(phdr->dst_node_id); 378 src.sq_port = cb->src_port;
355 dst_port = le32_to_cpu(phdr->dst_port_id); 379 dst.sq_node = cb->dst_node;
356 confirm = !!phdr->confirm_rx; 380 dst.sq_port = cb->dst_port;
381 confirm = !!cb->confirm_rx;
357 382
358 qrtr_node_assign(node, src_node); 383 qrtr_node_assign(node, cb->src_node);
359 384
360 ipc = qrtr_port_lookup(dst_port); 385 ipc = qrtr_port_lookup(cb->dst_port);
361 if (!ipc) { 386 if (!ipc) {
362 kfree_skb(skb); 387 kfree_skb(skb);
363 } else { 388 } else {
@@ -368,10 +393,16 @@ static void qrtr_node_rx_work(struct work_struct *work)
368 } 393 }
369 394
370 if (confirm) { 395 if (confirm) {
371 skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); 396 skb = qrtr_alloc_ctrl_packet(&pkt);
372 if (!skb) 397 if (!skb)
373 break; 398 break;
374 if (qrtr_node_enqueue(node, skb)) 399
400 pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
401 pkt->client.node = cpu_to_le32(dst.sq_node);
402 pkt->client.port = cpu_to_le32(dst.sq_port);
403
404 if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
405 &dst, &src))
375 break; 406 break;
376 } 407 }
377 } 408 }
@@ -421,6 +452,9 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
421void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) 452void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
422{ 453{
423 struct qrtr_node *node = ep->node; 454 struct qrtr_node *node = ep->node;
455 struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
456 struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
457 struct qrtr_ctrl_pkt *pkt;
424 struct sk_buff *skb; 458 struct sk_buff *skb;
425 459
426 mutex_lock(&node->ep_lock); 460 mutex_lock(&node->ep_lock);
@@ -428,9 +462,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
428 mutex_unlock(&node->ep_lock); 462 mutex_unlock(&node->ep_lock);
429 463
430 /* Notify the local controller about the event */ 464 /* Notify the local controller about the event */
431 skb = qrtr_alloc_local_bye(node->nid); 465 skb = qrtr_alloc_ctrl_packet(&pkt);
432 if (skb) 466 if (skb) {
433 qrtr_local_enqueue(NULL, skb); 467 pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
468 qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
469 }
434 470
435 qrtr_node_release(node); 471 qrtr_node_release(node);
436 ep->node = NULL; 472 ep->node = NULL;
@@ -466,13 +502,24 @@ static void qrtr_port_put(struct qrtr_sock *ipc)
466/* Remove port assignment. */ 502/* Remove port assignment. */
467static void qrtr_port_remove(struct qrtr_sock *ipc) 503static void qrtr_port_remove(struct qrtr_sock *ipc)
468{ 504{
505 struct qrtr_ctrl_pkt *pkt;
469 struct sk_buff *skb; 506 struct sk_buff *skb;
470 int port = ipc->us.sq_port; 507 int port = ipc->us.sq_port;
508 struct sockaddr_qrtr to;
471 509
472 skb = qrtr_alloc_del_client(&ipc->us); 510 to.sq_family = AF_QIPCRTR;
511 to.sq_node = QRTR_NODE_BCAST;
512 to.sq_port = QRTR_PORT_CTRL;
513
514 skb = qrtr_alloc_ctrl_packet(&pkt);
473 if (skb) { 515 if (skb) {
516 pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
517 pkt->client.node = cpu_to_le32(ipc->us.sq_node);
518 pkt->client.port = cpu_to_le32(ipc->us.sq_port);
519
474 skb_set_owner_w(skb, &ipc->sk); 520 skb_set_owner_w(skb, &ipc->sk);
475 qrtr_bcast_enqueue(NULL, skb); 521 qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
522 &to);
476 } 523 }
477 524
478 if (port == QRTR_PORT_CTRL) 525 if (port == QRTR_PORT_CTRL)
@@ -541,7 +588,7 @@ static void qrtr_reset_ports(void)
541 588
542 sock_hold(&ipc->sk); 589 sock_hold(&ipc->sk);
543 ipc->sk.sk_err = ENETRESET; 590 ipc->sk.sk_err = ENETRESET;
544 wake_up_interruptible(sk_sleep(&ipc->sk)); 591 ipc->sk.sk_error_report(&ipc->sk);
545 sock_put(&ipc->sk); 592 sock_put(&ipc->sk);
546 } 593 }
547 mutex_unlock(&qrtr_port_lock); 594 mutex_unlock(&qrtr_port_lock);
@@ -620,19 +667,23 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
620} 667}
621 668
622/* Queue packet to local peer socket. */ 669/* Queue packet to local peer socket. */
623static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) 670static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
671 int type, struct sockaddr_qrtr *from,
672 struct sockaddr_qrtr *to)
624{ 673{
625 const struct qrtr_hdr *phdr;
626 struct qrtr_sock *ipc; 674 struct qrtr_sock *ipc;
675 struct qrtr_cb *cb;
627 676
628 phdr = (const struct qrtr_hdr *)skb_transport_header(skb); 677 ipc = qrtr_port_lookup(to->sq_port);
629
630 ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
631 if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ 678 if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
632 kfree_skb(skb); 679 kfree_skb(skb);
633 return -ENODEV; 680 return -ENODEV;
634 } 681 }
635 682
683 cb = (struct qrtr_cb *)skb->cb;
684 cb->src_node = from->sq_node;
685 cb->src_port = from->sq_port;
686
636 if (sock_queue_rcv_skb(&ipc->sk, skb)) { 687 if (sock_queue_rcv_skb(&ipc->sk, skb)) {
637 qrtr_port_put(ipc); 688 qrtr_port_put(ipc);
638 kfree_skb(skb); 689 kfree_skb(skb);
@@ -645,7 +696,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
645} 696}
646 697
647/* Queue packet for broadcast. */ 698/* Queue packet for broadcast. */
648static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) 699static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
700 int type, struct sockaddr_qrtr *from,
701 struct sockaddr_qrtr *to)
649{ 702{
650 struct sk_buff *skbn; 703 struct sk_buff *skbn;
651 704
@@ -655,11 +708,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
655 if (!skbn) 708 if (!skbn)
656 break; 709 break;
657 skb_set_owner_w(skbn, skb->sk); 710 skb_set_owner_w(skbn, skb->sk);
658 qrtr_node_enqueue(node, skbn); 711 qrtr_node_enqueue(node, skbn, type, from, to);
659 } 712 }
660 mutex_unlock(&qrtr_node_lock); 713 mutex_unlock(&qrtr_node_lock);
661 714
662 qrtr_local_enqueue(node, skb); 715 qrtr_local_enqueue(node, skb, type, from, to);
663 716
664 return 0; 717 return 0;
665} 718}
@@ -667,13 +720,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
667static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 720static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
668{ 721{
669 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); 722 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
670 int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); 723 int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
724 struct sockaddr_qrtr *, struct sockaddr_qrtr *);
671 struct qrtr_sock *ipc = qrtr_sk(sock->sk); 725 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
672 struct sock *sk = sock->sk; 726 struct sock *sk = sock->sk;
673 struct qrtr_node *node; 727 struct qrtr_node *node;
674 struct qrtr_hdr *hdr;
675 struct sk_buff *skb; 728 struct sk_buff *skb;
676 size_t plen; 729 size_t plen;
730 u32 type = QRTR_TYPE_DATA;
677 int rc; 731 int rc;
678 732
679 if (msg->msg_flags & ~(MSG_DONTWAIT)) 733 if (msg->msg_flags & ~(MSG_DONTWAIT))
@@ -722,37 +776,19 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
722 } 776 }
723 777
724 plen = (len + 3) & ~3; 778 plen = (len + 3) & ~3;
725 skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, 779 skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
726 msg->msg_flags & MSG_DONTWAIT, &rc); 780 msg->msg_flags & MSG_DONTWAIT, &rc);
727 if (!skb) 781 if (!skb)
728 goto out_node; 782 goto out_node;
729 783
730 skb_reset_transport_header(skb); 784 skb_reserve(skb, QRTR_HDR_MAX_SIZE);
731 skb_put(skb, len + QRTR_HDR_SIZE);
732
733 hdr = (struct qrtr_hdr *)skb_transport_header(skb);
734 hdr->version = cpu_to_le32(QRTR_PROTO_VER);
735 hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
736 hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
737 hdr->confirm_rx = cpu_to_le32(0);
738 hdr->size = cpu_to_le32(len);
739 hdr->dst_node_id = cpu_to_le32(addr->sq_node);
740 hdr->dst_port_id = cpu_to_le32(addr->sq_port);
741 785
742 rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, 786 rc = memcpy_from_msg(skb_put(skb, len), msg, len);
743 &msg->msg_iter, len);
744 if (rc) { 787 if (rc) {
745 kfree_skb(skb); 788 kfree_skb(skb);
746 goto out_node; 789 goto out_node;
747 } 790 }
748 791
749 if (plen != len) {
750 rc = skb_pad(skb, plen - len);
751 if (rc)
752 goto out_node;
753 skb_put(skb, plen - len);
754 }
755
756 if (ipc->us.sq_port == QRTR_PORT_CTRL) { 792 if (ipc->us.sq_port == QRTR_PORT_CTRL) {
757 if (len < 4) { 793 if (len < 4) {
758 rc = -EINVAL; 794 rc = -EINVAL;
@@ -761,12 +797,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
761 } 797 }
762 798
763 /* control messages already require the type as 'command' */ 799 /* control messages already require the type as 'command' */
764 skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); 800 skb_copy_bits(skb, 0, &type, 4);
765 } else { 801 type = le32_to_cpu(type);
766 hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
767 } 802 }
768 803
769 rc = enqueue_fn(node, skb); 804 rc = enqueue_fn(node, skb, type, &ipc->us, addr);
770 if (rc >= 0) 805 if (rc >= 0)
771 rc = len; 806 rc = len;
772 807
@@ -781,9 +816,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
781 size_t size, int flags) 816 size_t size, int flags)
782{ 817{
783 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); 818 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
784 const struct qrtr_hdr *phdr;
785 struct sock *sk = sock->sk; 819 struct sock *sk = sock->sk;
786 struct sk_buff *skb; 820 struct sk_buff *skb;
821 struct qrtr_cb *cb;
787 int copied, rc; 822 int copied, rc;
788 823
789 lock_sock(sk); 824 lock_sock(sk);
@@ -800,22 +835,22 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
800 return rc; 835 return rc;
801 } 836 }
802 837
803 phdr = (const struct qrtr_hdr *)skb_transport_header(skb); 838 copied = skb->len;
804 copied = le32_to_cpu(phdr->size);
805 if (copied > size) { 839 if (copied > size) {
806 copied = size; 840 copied = size;
807 msg->msg_flags |= MSG_TRUNC; 841 msg->msg_flags |= MSG_TRUNC;
808 } 842 }
809 843
810 rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); 844 rc = skb_copy_datagram_msg(skb, 0, msg, copied);
811 if (rc < 0) 845 if (rc < 0)
812 goto out; 846 goto out;
813 rc = copied; 847 rc = copied;
814 848
815 if (addr) { 849 if (addr) {
850 cb = (struct qrtr_cb *)skb->cb;
816 addr->sq_family = AF_QIPCRTR; 851 addr->sq_family = AF_QIPCRTR;
817 addr->sq_node = le32_to_cpu(phdr->src_node_id); 852 addr->sq_node = cb->src_node;
818 addr->sq_port = le32_to_cpu(phdr->src_port_id); 853 addr->sq_port = cb->src_port;
819 msg->msg_namelen = sizeof(*addr); 854 msg->msg_namelen = sizeof(*addr);
820 } 855 }
821 856
@@ -908,7 +943,7 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
908 case TIOCINQ: 943 case TIOCINQ:
909 skb = skb_peek(&sk->sk_receive_queue); 944 skb = skb_peek(&sk->sk_receive_queue);
910 if (skb) 945 if (skb)
911 len = skb->len - QRTR_HDR_SIZE; 946 len = skb->len;
912 rc = put_user(len, (int __user *)argp); 947 rc = put_user(len, (int __user *)argp);
913 break; 948 break;
914 case SIOCGIFADDR: 949 case SIOCGIFADDR:
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a0954ace3774..36dd2099048a 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -126,6 +126,7 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
126static void rds_ib_add_one(struct ib_device *device) 126static void rds_ib_add_one(struct ib_device *device)
127{ 127{
128 struct rds_ib_device *rds_ibdev; 128 struct rds_ib_device *rds_ibdev;
129 bool has_fr, has_fmr;
129 130
130 /* Only handle IB (no iWARP) devices */ 131 /* Only handle IB (no iWARP) devices */
131 if (device->node_type != RDMA_NODE_IB_CA) 132 if (device->node_type != RDMA_NODE_IB_CA)
@@ -143,11 +144,11 @@ static void rds_ib_add_one(struct ib_device *device)
143 rds_ibdev->max_wrs = device->attrs.max_qp_wr; 144 rds_ibdev->max_wrs = device->attrs.max_qp_wr;
144 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); 145 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
145 146
146 rds_ibdev->has_fr = (device->attrs.device_cap_flags & 147 has_fr = (device->attrs.device_cap_flags &
147 IB_DEVICE_MEM_MGT_EXTENSIONS); 148 IB_DEVICE_MEM_MGT_EXTENSIONS);
148 rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 149 has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
149 device->map_phys_fmr && device->unmap_fmr); 150 device->map_phys_fmr && device->unmap_fmr);
150 rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr); 151 rds_ibdev->use_fastreg = (has_fr && !has_fmr);
151 152
152 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; 153 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
153 rds_ibdev->max_1m_mrs = device->attrs.max_mr ? 154 rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 86a8578d95b8..a6f4d7d68e95 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -216,8 +216,6 @@ struct rds_ib_device {
216 struct list_head conn_list; 216 struct list_head conn_list;
217 struct ib_device *dev; 217 struct ib_device *dev;
218 struct ib_pd *pd; 218 struct ib_pd *pd;
219 bool has_fmr;
220 bool has_fr;
221 bool use_fastreg; 219 bool use_fastreg;
222 220
223 unsigned int max_mrs; 221 unsigned int max_mrs;
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 86ef907067bb..e0f70c4051b6 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -139,8 +139,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
139 return -EINVAL; 139 return -EINVAL;
140 } 140 }
141 141
142 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC, 142 dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
143 rdsibdev_to_node(rds_ibdev)); 143 rdsibdev_to_node(rds_ibdev));
144 if (!dma_pages) { 144 if (!dma_pages) {
145 ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); 145 ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
146 return -ENOMEM; 146 return -ENOMEM;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 9a3c54e659e9..e678699268a2 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -601,11 +601,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
601 if (pool_type == RDS_IB_MR_1M_POOL) { 601 if (pool_type == RDS_IB_MR_1M_POOL) {
602 /* +1 allows for unaligned MRs */ 602 /* +1 allows for unaligned MRs */
603 pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1; 603 pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
604 pool->max_items = RDS_MR_1M_POOL_SIZE; 604 pool->max_items = rds_ibdev->max_1m_mrs;
605 } else { 605 } else {
606 /* pool_type == RDS_IB_MR_8K_POOL */ 606 /* pool_type == RDS_IB_MR_8K_POOL */
607 pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1; 607 pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
608 pool->max_items = RDS_MR_8K_POOL_SIZE; 608 pool->max_items = rds_ibdev->max_8k_mrs;
609 } 609 }
610 610
611 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4; 611 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4a9729257023..6a5c4992cf61 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *);
318/* 318/*
319 * Handler for deferred kills. 319 * Handler for deferred kills.
320 */ 320 */
321static void rose_destroy_timer(unsigned long data) 321static void rose_destroy_timer(struct timer_list *t)
322{ 322{
323 rose_destroy_socket((struct sock *)data); 323 struct sock *sk = from_timer(sk, t, sk_timer);
324
325 rose_destroy_socket(sk);
324} 326}
325 327
326/* 328/*
@@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk)
353 355
354 if (sk_has_allocations(sk)) { 356 if (sk_has_allocations(sk)) {
355 /* Defer: outstanding buffers */ 357 /* Defer: outstanding buffers */
356 setup_timer(&sk->sk_timer, rose_destroy_timer, 358 timer_setup(&sk->sk_timer, rose_destroy_timer, 0);
357 (unsigned long)sk);
358 sk->sk_timer.expires = jiffies + 10 * HZ; 359 sk->sk_timer.expires = jiffies + 10 * HZ;
359 add_timer(&sk->sk_timer); 360 add_timer(&sk->sk_timer);
360 } else 361 } else
@@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
538 sock->ops = &rose_proto_ops; 539 sock->ops = &rose_proto_ops;
539 sk->sk_protocol = protocol; 540 sk->sk_protocol = protocol;
540 541
541 init_timer(&rose->timer); 542 timer_setup(&rose->timer, NULL, 0);
542 init_timer(&rose->idletimer); 543 timer_setup(&rose->idletimer, NULL, 0);
543 544
544 rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout); 545 rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout);
545 rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout); 546 rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout);
@@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk)
582 sk->sk_state = TCP_ESTABLISHED; 583 sk->sk_state = TCP_ESTABLISHED;
583 sock_copy_flags(sk, osk); 584 sock_copy_flags(sk, osk);
584 585
585 init_timer(&rose->timer); 586 timer_setup(&rose->timer, NULL, 0);
586 init_timer(&rose->idletimer); 587 timer_setup(&rose->idletimer, NULL, 0);
587 588
588 orose = rose_sk(osk); 589 orose = rose_sk(osk);
589 rose->t1 = orose->t1; 590 rose->t1 = orose->t1;
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 0a6394754e81..9bbbfe325c5a 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -219,6 +219,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety
219 switch (frametype) { 219 switch (frametype) {
220 case ROSE_RESET_REQUEST: 220 case ROSE_RESET_REQUEST:
221 rose_write_internal(sk, ROSE_RESET_CONFIRMATION); 221 rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
222 /* fall through */
222 case ROSE_RESET_CONFIRMATION: 223 case ROSE_RESET_CONFIRMATION:
223 rose_stop_timer(sk); 224 rose_stop_timer(sk);
224 rose_start_idletimer(sk); 225 rose_start_idletimer(sk);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index c76638cc2cd5..62055d3069d2 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -27,8 +27,8 @@
27#include <linux/interrupt.h> 27#include <linux/interrupt.h>
28#include <net/rose.h> 28#include <net/rose.h>
29 29
30static void rose_ftimer_expiry(unsigned long); 30static void rose_ftimer_expiry(struct timer_list *);
31static void rose_t0timer_expiry(unsigned long); 31static void rose_t0timer_expiry(struct timer_list *);
32 32
33static void rose_transmit_restart_confirmation(struct rose_neigh *neigh); 33static void rose_transmit_restart_confirmation(struct rose_neigh *neigh);
34static void rose_transmit_restart_request(struct rose_neigh *neigh); 34static void rose_transmit_restart_request(struct rose_neigh *neigh);
@@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh)
37{ 37{
38 del_timer(&neigh->ftimer); 38 del_timer(&neigh->ftimer);
39 39
40 neigh->ftimer.data = (unsigned long)neigh; 40 neigh->ftimer.function = rose_ftimer_expiry;
41 neigh->ftimer.function = &rose_ftimer_expiry;
42 neigh->ftimer.expires = 41 neigh->ftimer.expires =
43 jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout); 42 jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout);
44 43
@@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh)
49{ 48{
50 del_timer(&neigh->t0timer); 49 del_timer(&neigh->t0timer);
51 50
52 neigh->t0timer.data = (unsigned long)neigh; 51 neigh->t0timer.function = rose_t0timer_expiry;
53 neigh->t0timer.function = &rose_t0timer_expiry;
54 neigh->t0timer.expires = 52 neigh->t0timer.expires =
55 jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout); 53 jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout);
56 54
@@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh)
77 return timer_pending(&neigh->t0timer); 75 return timer_pending(&neigh->t0timer);
78} 76}
79 77
80static void rose_ftimer_expiry(unsigned long param) 78static void rose_ftimer_expiry(struct timer_list *t)
81{ 79{
82} 80}
83 81
84static void rose_t0timer_expiry(unsigned long param) 82static void rose_t0timer_expiry(struct timer_list *t)
85{ 83{
86 struct rose_neigh *neigh = (struct rose_neigh *)param; 84 struct rose_neigh *neigh = from_timer(neigh, t, t0timer);
87 85
88 rose_transmit_restart_request(neigh); 86 rose_transmit_restart_request(neigh);
89 87
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 344456206b70..7af4f99c4a93 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -19,12 +19,13 @@ static struct sk_buff_head loopback_queue;
19static struct timer_list loopback_timer; 19static struct timer_list loopback_timer;
20 20
21static void rose_set_loopback_timer(void); 21static void rose_set_loopback_timer(void);
22static void rose_loopback_timer(struct timer_list *unused);
22 23
23void rose_loopback_init(void) 24void rose_loopback_init(void)
24{ 25{
25 skb_queue_head_init(&loopback_queue); 26 skb_queue_head_init(&loopback_queue);
26 27
27 init_timer(&loopback_timer); 28 timer_setup(&loopback_timer, rose_loopback_timer, 0);
28} 29}
29 30
30static int rose_loopback_running(void) 31static int rose_loopback_running(void)
@@ -50,20 +51,16 @@ int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
50 return 1; 51 return 1;
51} 52}
52 53
53static void rose_loopback_timer(unsigned long);
54 54
55static void rose_set_loopback_timer(void) 55static void rose_set_loopback_timer(void)
56{ 56{
57 del_timer(&loopback_timer); 57 del_timer(&loopback_timer);
58 58
59 loopback_timer.data = 0;
60 loopback_timer.function = &rose_loopback_timer;
61 loopback_timer.expires = jiffies + 10; 59 loopback_timer.expires = jiffies + 10;
62
63 add_timer(&loopback_timer); 60 add_timer(&loopback_timer);
64} 61}
65 62
66static void rose_loopback_timer(unsigned long param) 63static void rose_loopback_timer(struct timer_list *unused)
67{ 64{
68 struct sk_buff *skb; 65 struct sk_buff *skb;
69 struct net_device *dev; 66 struct net_device *dev;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 452bbb38d943..8ca3124df83f 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route,
104 104
105 skb_queue_head_init(&rose_neigh->queue); 105 skb_queue_head_init(&rose_neigh->queue);
106 106
107 init_timer(&rose_neigh->ftimer); 107 timer_setup(&rose_neigh->ftimer, NULL, 0);
108 init_timer(&rose_neigh->t0timer); 108 timer_setup(&rose_neigh->t0timer, NULL, 0);
109 109
110 if (rose_route->ndigis != 0) { 110 if (rose_route->ndigis != 0) {
111 rose_neigh->digipeat = 111 rose_neigh->digipeat =
@@ -346,6 +346,7 @@ static int rose_del_node(struct rose_route_struct *rose_route,
346 case 0: 346 case 0:
347 rose_node->neighbour[0] = 347 rose_node->neighbour[0] =
348 rose_node->neighbour[1]; 348 rose_node->neighbour[1];
349 /* fall through */
349 case 1: 350 case 1:
350 rose_node->neighbour[1] = 351 rose_node->neighbour[1] =
351 rose_node->neighbour[2]; 352 rose_node->neighbour[2];
@@ -390,8 +391,8 @@ void rose_add_loopback_neigh(void)
390 391
391 skb_queue_head_init(&sn->queue); 392 skb_queue_head_init(&sn->queue);
392 393
393 init_timer(&sn->ftimer); 394 timer_setup(&sn->ftimer, NULL, 0);
394 init_timer(&sn->t0timer); 395 timer_setup(&sn->t0timer, NULL, 0);
395 396
396 spin_lock_bh(&rose_neigh_list_lock); 397 spin_lock_bh(&rose_neigh_list_lock);
397 sn->next = rose_neigh_list; 398 sn->next = rose_neigh_list;
@@ -507,6 +508,7 @@ void rose_rt_device_down(struct net_device *dev)
507 switch (i) { 508 switch (i) {
508 case 0: 509 case 0:
509 t->neighbour[0] = t->neighbour[1]; 510 t->neighbour[0] = t->neighbour[1];
511 /* fall through */
510 case 1: 512 case 1:
511 t->neighbour[1] = t->neighbour[2]; 513 t->neighbour[1] = t->neighbour[2];
512 case 2: 514 case 2:
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index bc5469d6d9cb..74555fb95615 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -28,16 +28,15 @@
28#include <linux/interrupt.h> 28#include <linux/interrupt.h>
29#include <net/rose.h> 29#include <net/rose.h>
30 30
31static void rose_heartbeat_expiry(unsigned long); 31static void rose_heartbeat_expiry(struct timer_list *t);
32static void rose_timer_expiry(unsigned long); 32static void rose_timer_expiry(struct timer_list *);
33static void rose_idletimer_expiry(unsigned long); 33static void rose_idletimer_expiry(struct timer_list *);
34 34
35void rose_start_heartbeat(struct sock *sk) 35void rose_start_heartbeat(struct sock *sk)
36{ 36{
37 del_timer(&sk->sk_timer); 37 del_timer(&sk->sk_timer);
38 38
39 sk->sk_timer.data = (unsigned long)sk; 39 sk->sk_timer.function = rose_heartbeat_expiry;
40 sk->sk_timer.function = &rose_heartbeat_expiry;
41 sk->sk_timer.expires = jiffies + 5 * HZ; 40 sk->sk_timer.expires = jiffies + 5 * HZ;
42 41
43 add_timer(&sk->sk_timer); 42 add_timer(&sk->sk_timer);
@@ -49,8 +48,7 @@ void rose_start_t1timer(struct sock *sk)
49 48
50 del_timer(&rose->timer); 49 del_timer(&rose->timer);
51 50
52 rose->timer.data = (unsigned long)sk; 51 rose->timer.function = rose_timer_expiry;
53 rose->timer.function = &rose_timer_expiry;
54 rose->timer.expires = jiffies + rose->t1; 52 rose->timer.expires = jiffies + rose->t1;
55 53
56 add_timer(&rose->timer); 54 add_timer(&rose->timer);
@@ -62,8 +60,7 @@ void rose_start_t2timer(struct sock *sk)
62 60
63 del_timer(&rose->timer); 61 del_timer(&rose->timer);
64 62
65 rose->timer.data = (unsigned long)sk; 63 rose->timer.function = rose_timer_expiry;
66 rose->timer.function = &rose_timer_expiry;
67 rose->timer.expires = jiffies + rose->t2; 64 rose->timer.expires = jiffies + rose->t2;
68 65
69 add_timer(&rose->timer); 66 add_timer(&rose->timer);
@@ -75,8 +72,7 @@ void rose_start_t3timer(struct sock *sk)
75 72
76 del_timer(&rose->timer); 73 del_timer(&rose->timer);
77 74
78 rose->timer.data = (unsigned long)sk; 75 rose->timer.function = rose_timer_expiry;
79 rose->timer.function = &rose_timer_expiry;
80 rose->timer.expires = jiffies + rose->t3; 76 rose->timer.expires = jiffies + rose->t3;
81 77
82 add_timer(&rose->timer); 78 add_timer(&rose->timer);
@@ -88,8 +84,7 @@ void rose_start_hbtimer(struct sock *sk)
88 84
89 del_timer(&rose->timer); 85 del_timer(&rose->timer);
90 86
91 rose->timer.data = (unsigned long)sk; 87 rose->timer.function = rose_timer_expiry;
92 rose->timer.function = &rose_timer_expiry;
93 rose->timer.expires = jiffies + rose->hb; 88 rose->timer.expires = jiffies + rose->hb;
94 89
95 add_timer(&rose->timer); 90 add_timer(&rose->timer);
@@ -102,8 +97,7 @@ void rose_start_idletimer(struct sock *sk)
102 del_timer(&rose->idletimer); 97 del_timer(&rose->idletimer);
103 98
104 if (rose->idle > 0) { 99 if (rose->idle > 0) {
105 rose->idletimer.data = (unsigned long)sk; 100 rose->idletimer.function = rose_idletimer_expiry;
106 rose->idletimer.function = &rose_idletimer_expiry;
107 rose->idletimer.expires = jiffies + rose->idle; 101 rose->idletimer.expires = jiffies + rose->idle;
108 102
109 add_timer(&rose->idletimer); 103 add_timer(&rose->idletimer);
@@ -125,9 +119,9 @@ void rose_stop_idletimer(struct sock *sk)
125 del_timer(&rose_sk(sk)->idletimer); 119 del_timer(&rose_sk(sk)->idletimer);
126} 120}
127 121
128static void rose_heartbeat_expiry(unsigned long param) 122static void rose_heartbeat_expiry(struct timer_list *t)
129{ 123{
130 struct sock *sk = (struct sock *)param; 124 struct sock *sk = from_timer(sk, t, sk_timer);
131 struct rose_sock *rose = rose_sk(sk); 125 struct rose_sock *rose = rose_sk(sk);
132 126
133 bh_lock_sock(sk); 127 bh_lock_sock(sk);
@@ -163,10 +157,10 @@ static void rose_heartbeat_expiry(unsigned long param)
163 bh_unlock_sock(sk); 157 bh_unlock_sock(sk);
164} 158}
165 159
166static void rose_timer_expiry(unsigned long param) 160static void rose_timer_expiry(struct timer_list *t)
167{ 161{
168 struct sock *sk = (struct sock *)param; 162 struct rose_sock *rose = from_timer(rose, t, timer);
169 struct rose_sock *rose = rose_sk(sk); 163 struct sock *sk = &rose->sock;
170 164
171 bh_lock_sock(sk); 165 bh_lock_sock(sk);
172 switch (rose->state) { 166 switch (rose->state) {
@@ -192,9 +186,10 @@ static void rose_timer_expiry(unsigned long param)
192 bh_unlock_sock(sk); 186 bh_unlock_sock(sk);
193} 187}
194 188
195static void rose_idletimer_expiry(unsigned long param) 189static void rose_idletimer_expiry(struct timer_list *t)
196{ 190{
197 struct sock *sk = (struct sock *)param; 191 struct rose_sock *rose = from_timer(rose, t, idletimer);
192 struct sock *sk = &rose->sock;
198 193
199 bh_lock_sock(sk); 194 bh_lock_sock(sk);
200 rose_clear_queues(sk); 195 rose_clear_queues(sk);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 4b0a8288c98a..9b5c46b052fd 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -246,6 +246,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
246 ret = 0; 246 ret = 0;
247 break; 247 break;
248 } 248 }
249 /* Fall through */
249 default: 250 default:
250 ret = -EBUSY; 251 ret = -EBUSY;
251 break; 252 break;
@@ -265,6 +266,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
265 * @tx_total_len: Total length of data to transmit during the call (or -1) 266 * @tx_total_len: Total length of data to transmit during the call (or -1)
266 * @gfp: The allocation constraints 267 * @gfp: The allocation constraints
267 * @notify_rx: Where to send notifications instead of socket queue 268 * @notify_rx: Where to send notifications instead of socket queue
269 * @upgrade: Request service upgrade for call
268 * 270 *
269 * Allow a kernel service to begin a call on the nominated socket. This just 271 * Allow a kernel service to begin a call on the nominated socket. This just
270 * sets up all the internal tracking structures and allocates connection and 272 * sets up all the internal tracking structures and allocates connection and
@@ -279,7 +281,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
279 unsigned long user_call_ID, 281 unsigned long user_call_ID,
280 s64 tx_total_len, 282 s64 tx_total_len,
281 gfp_t gfp, 283 gfp_t gfp,
282 rxrpc_notify_rx_t notify_rx) 284 rxrpc_notify_rx_t notify_rx,
285 bool upgrade)
283{ 286{
284 struct rxrpc_conn_parameters cp; 287 struct rxrpc_conn_parameters cp;
285 struct rxrpc_call *call; 288 struct rxrpc_call *call;
@@ -304,6 +307,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
304 cp.key = key; 307 cp.key = key;
305 cp.security_level = 0; 308 cp.security_level = 0;
306 cp.exclusive = false; 309 cp.exclusive = false;
310 cp.upgrade = upgrade;
307 cp.service_id = srx->srx_service; 311 cp.service_id = srx->srx_service;
308 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, 312 call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
309 gfp); 313 gfp);
@@ -318,6 +322,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
318} 322}
319EXPORT_SYMBOL(rxrpc_kernel_begin_call); 323EXPORT_SYMBOL(rxrpc_kernel_begin_call);
320 324
325/*
326 * Dummy function used to stop the notifier talking to recvmsg().
327 */
328static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
329 unsigned long call_user_ID)
330{
331}
332
321/** 333/**
322 * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using 334 * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
323 * @sock: The socket the call is on 335 * @sock: The socket the call is on
@@ -332,12 +344,39 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
332 344
333 mutex_lock(&call->user_mutex); 345 mutex_lock(&call->user_mutex);
334 rxrpc_release_call(rxrpc_sk(sock->sk), call); 346 rxrpc_release_call(rxrpc_sk(sock->sk), call);
347
348 /* Make sure we're not going to call back into a kernel service */
349 if (call->notify_rx) {
350 spin_lock_bh(&call->notify_lock);
351 call->notify_rx = rxrpc_dummy_notify_rx;
352 spin_unlock_bh(&call->notify_lock);
353 }
354
335 mutex_unlock(&call->user_mutex); 355 mutex_unlock(&call->user_mutex);
336 rxrpc_put_call(call, rxrpc_call_put_kernel); 356 rxrpc_put_call(call, rxrpc_call_put_kernel);
337} 357}
338EXPORT_SYMBOL(rxrpc_kernel_end_call); 358EXPORT_SYMBOL(rxrpc_kernel_end_call);
339 359
340/** 360/**
361 * rxrpc_kernel_check_life - Check to see whether a call is still alive
362 * @sock: The socket the call is on
363 * @call: The call to check
364 *
365 * Allow a kernel service to find out whether a call is still alive - ie. we're
366 * getting ACKs from the server. Returns a number representing the life state
367 * which can be compared to that returned by a previous call.
368 *
369 * If this is a client call, ping ACKs will be sent to the server to find out
370 * whether it's still responsive and whether the call is still alive on the
371 * server.
372 */
373u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
374{
375 return call->acks_latest;
376}
377EXPORT_SYMBOL(rxrpc_kernel_check_life);
378
379/**
341 * rxrpc_kernel_check_call - Check a call's state 380 * rxrpc_kernel_check_call - Check a call's state
342 * @sock: The socket the call is on 381 * @sock: The socket the call is on
343 * @call: The call to check 382 * @call: The call to check
@@ -538,6 +577,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
538 m->msg_name = &rx->connect_srx; 577 m->msg_name = &rx->connect_srx;
539 m->msg_namelen = sizeof(rx->connect_srx); 578 m->msg_namelen = sizeof(rx->connect_srx);
540 } 579 }
580 /* Fall through */
541 case RXRPC_SERVER_BOUND: 581 case RXRPC_SERVER_BOUND:
542 case RXRPC_SERVER_LISTENING: 582 case RXRPC_SERVER_LISTENING:
543 ret = rxrpc_do_sendmsg(rx, m, len); 583 ret = rxrpc_do_sendmsg(rx, m, len);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ea5600b747cc..b2151993d384 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -525,6 +525,7 @@ struct rxrpc_call {
525 unsigned long flags; 525 unsigned long flags;
526 unsigned long events; 526 unsigned long events;
527 spinlock_t lock; 527 spinlock_t lock;
528 spinlock_t notify_lock; /* Kernel notification lock */
528 rwlock_t state_lock; /* lock for state transition */ 529 rwlock_t state_lock; /* lock for state transition */
529 u32 abort_code; /* Local/remote abort code */ 530 u32 abort_code; /* Local/remote abort code */
530 int error; /* Local error incurred */ 531 int error; /* Local error incurred */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 7a77844aab16..3574508baf9a 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
386 386
387 now = ktime_get_real(); 387 now = ktime_get_real();
388 if (ktime_before(call->expire_at, now)) { 388 if (ktime_before(call->expire_at, now)) {
389 rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); 389 rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
390 set_bit(RXRPC_CALL_EV_ABORT, &call->events); 390 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
391 goto recheck_state; 391 goto recheck_state;
392 } 392 }
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index fcdd6555a820..994dc2df57e4 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -45,9 +45,9 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
45 45
46struct kmem_cache *rxrpc_call_jar; 46struct kmem_cache *rxrpc_call_jar;
47 47
48static void rxrpc_call_timer_expired(unsigned long _call) 48static void rxrpc_call_timer_expired(struct timer_list *t)
49{ 49{
50 struct rxrpc_call *call = (struct rxrpc_call *)_call; 50 struct rxrpc_call *call = from_timer(call, t, timer);
51 51
52 _enter("%d", call->debug_id); 52 _enter("%d", call->debug_id);
53 53
@@ -114,8 +114,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
114 goto nomem_2; 114 goto nomem_2;
115 115
116 mutex_init(&call->user_mutex); 116 mutex_init(&call->user_mutex);
117 setup_timer(&call->timer, rxrpc_call_timer_expired, 117 timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
118 (unsigned long)call);
119 INIT_WORK(&call->processor, &rxrpc_process_call); 118 INIT_WORK(&call->processor, &rxrpc_process_call);
120 INIT_LIST_HEAD(&call->link); 119 INIT_LIST_HEAD(&call->link);
121 INIT_LIST_HEAD(&call->chan_wait_link); 120 INIT_LIST_HEAD(&call->chan_wait_link);
@@ -124,6 +123,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
124 INIT_LIST_HEAD(&call->sock_link); 123 INIT_LIST_HEAD(&call->sock_link);
125 init_waitqueue_head(&call->waitq); 124 init_waitqueue_head(&call->waitq);
126 spin_lock_init(&call->lock); 125 spin_lock_init(&call->lock);
126 spin_lock_init(&call->notify_lock);
127 rwlock_init(&call->state_lock); 127 rwlock_init(&call->state_lock);
128 atomic_set(&call->usage, 1); 128 atomic_set(&call->usage, 1);
129 call->debug_id = atomic_inc_return(&rxrpc_debug_id); 129 call->debug_id = atomic_inc_return(&rxrpc_debug_id);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index e56e23ed2229..1b592073ec96 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
298 298
299 write_unlock(&call->state_lock); 299 write_unlock(&call->state_lock);
300 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { 300 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
301 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
302 rxrpc_propose_ack_client_tx_end);
303 trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); 301 trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
304 } else { 302 } else {
305 trace_rxrpc_transmit(call, rxrpc_transmit_end); 303 trace_rxrpc_transmit(call, rxrpc_transmit_end);
@@ -1125,6 +1123,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
1125 case RXRPC_PACKET_TYPE_BUSY: 1123 case RXRPC_PACKET_TYPE_BUSY:
1126 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) 1124 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
1127 goto discard; 1125 goto discard;
1126 /* Fall through */
1128 1127
1129 case RXRPC_PACKET_TYPE_DATA: 1128 case RXRPC_PACKET_TYPE_DATA:
1130 if (sp->hdr.callNumber == 0) 1129 if (sp->hdr.callNumber == 0)
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 71e6f713fbe7..f47659c7b224 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,7 +35,8 @@ struct rxrpc_abort_buffer {
35/* 35/*
36 * Fill out an ACK packet. 36 * Fill out an ACK packet.
37 */ 37 */
38static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, 38static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
39 struct rxrpc_call *call,
39 struct rxrpc_ack_buffer *pkt, 40 struct rxrpc_ack_buffer *pkt,
40 rxrpc_seq_t *_hard_ack, 41 rxrpc_seq_t *_hard_ack,
41 rxrpc_seq_t *_top, 42 rxrpc_seq_t *_top,
@@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
77 } while (before_eq(seq, top)); 78 } while (before_eq(seq, top));
78 } 79 }
79 80
80 mtu = call->conn->params.peer->if_mtu; 81 mtu = conn->params.peer->if_mtu;
81 mtu -= call->conn->params.peer->hdrsize; 82 mtu -= conn->params.peer->hdrsize;
82 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; 83 jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
83 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 84 pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
84 pkt->ackinfo.maxMTU = htonl(mtu); 85 pkt->ackinfo.maxMTU = htonl(mtu);
@@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
148 } 149 }
149 call->ackr_reason = 0; 150 call->ackr_reason = 0;
150 } 151 }
151 n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason); 152 n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
152 153
153 spin_unlock_bh(&call->lock); 154 spin_unlock_bh(&call->lock);
154 155
@@ -221,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
221 rxrpc_serial_t serial; 222 rxrpc_serial_t serial;
222 int ret; 223 int ret;
223 224
225 /* Don't bother sending aborts for a client call once the server has
226 * hard-ACK'd all of its request data. After that point, we're not
227 * going to stop the operation proceeding, and whilst we might limit
228 * the reply, it's not worth it if we can send a new call on the same
229 * channel instead, thereby closing off this call.
230 */
231 if (rxrpc_is_client_call(call) &&
232 test_bit(RXRPC_CALL_TX_LAST, &call->flags))
233 return 0;
234
224 spin_lock_bh(&call->lock); 235 spin_lock_bh(&call->lock);
225 if (call->conn) 236 if (call->conn)
226 conn = rxrpc_get_connection_maybe(call->conn); 237 conn = rxrpc_get_connection_maybe(call->conn);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 5787f97f5330..d02a99f37f5f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
411 *_srx = call->peer->srx; 411 *_srx = call->peer->srx;
412} 412}
413EXPORT_SYMBOL(rxrpc_kernel_get_peer); 413EXPORT_SYMBOL(rxrpc_kernel_get_peer);
414
415/**
416 * rxrpc_kernel_get_rtt - Get a call's peer RTT
417 * @sock: The socket on which the call is in progress.
418 * @call: The call to query
419 *
420 * Get the call's peer RTT.
421 */
422u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
423{
424 return call->peer->rtt;
425}
426EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index bdece21f313d..8510a98b87e1 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
40 sk = &rx->sk; 40 sk = &rx->sk;
41 if (rx && sk->sk_state < RXRPC_CLOSE) { 41 if (rx && sk->sk_state < RXRPC_CLOSE) {
42 if (call->notify_rx) { 42 if (call->notify_rx) {
43 spin_lock_bh(&call->notify_lock);
43 call->notify_rx(sk, call, call->user_call_ID); 44 call->notify_rx(sk, call, call->user_call_ID);
45 spin_unlock_bh(&call->notify_lock);
44 } else { 46 } else {
45 write_lock_bh(&rx->recvmsg_lock); 47 write_lock_bh(&rx->recvmsg_lock);
46 if (list_empty(&call->recvmsg_link)) { 48 if (list_empty(&call->recvmsg_link)) {
@@ -607,6 +609,7 @@ wait_error:
607 * @_offset: The running offset into the buffer. 609 * @_offset: The running offset into the buffer.
608 * @want_more: True if more data is expected to be read 610 * @want_more: True if more data is expected to be read
609 * @_abort: Where the abort code is stored if -ECONNABORTED is returned 611 * @_abort: Where the abort code is stored if -ECONNABORTED is returned
612 * @_service: Where to store the actual service ID (may be upgraded)
610 * 613 *
611 * Allow a kernel service to receive data and pick up information about the 614 * Allow a kernel service to receive data and pick up information about the
612 * state of a call. Returns 0 if got what was asked for and there's more 615 * state of a call. Returns 0 if got what was asked for and there's more
@@ -624,7 +627,7 @@ wait_error:
624 */ 627 */
625int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, 628int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
626 void *buf, size_t size, size_t *_offset, 629 void *buf, size_t size, size_t *_offset,
627 bool want_more, u32 *_abort) 630 bool want_more, u32 *_abort, u16 *_service)
628{ 631{
629 struct iov_iter iter; 632 struct iov_iter iter;
630 struct kvec iov; 633 struct kvec iov;
@@ -680,6 +683,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
680read_phase_complete: 683read_phase_complete:
681 ret = 1; 684 ret = 1;
682out: 685out:
686 if (_service)
687 *_service = call->service_id;
683 mutex_unlock(&call->user_mutex); 688 mutex_unlock(&call->user_mutex);
684 _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); 689 _leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
685 return ret; 690 return ret;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 9ea6f972767e..7d2595582c09 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -38,12 +38,86 @@ struct rxrpc_send_params {
38}; 38};
39 39
40/* 40/*
41 * Wait for space to appear in the Tx queue or a signal to occur.
42 */
43static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
44 struct rxrpc_call *call,
45 long *timeo)
46{
47 for (;;) {
48 set_current_state(TASK_INTERRUPTIBLE);
49 if (call->tx_top - call->tx_hard_ack <
50 min_t(unsigned int, call->tx_winsize,
51 call->cong_cwnd + call->cong_extra))
52 return 0;
53
54 if (call->state >= RXRPC_CALL_COMPLETE)
55 return call->error;
56
57 if (signal_pending(current))
58 return sock_intr_errno(*timeo);
59
60 trace_rxrpc_transmit(call, rxrpc_transmit_wait);
61 mutex_unlock(&call->user_mutex);
62 *timeo = schedule_timeout(*timeo);
63 if (mutex_lock_interruptible(&call->user_mutex) < 0)
64 return sock_intr_errno(*timeo);
65 }
66}
67
68/*
69 * Wait for space to appear in the Tx queue uninterruptibly, but with
70 * a timeout of 2*RTT if no progress was made and a signal occurred.
71 */
72static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
73 struct rxrpc_call *call)
74{
75 rxrpc_seq_t tx_start, tx_win;
76 signed long rtt2, timeout;
77 u64 rtt;
78
79 rtt = READ_ONCE(call->peer->rtt);
80 rtt2 = nsecs_to_jiffies64(rtt) * 2;
81 if (rtt2 < 1)
82 rtt2 = 1;
83
84 timeout = rtt2;
85 tx_start = READ_ONCE(call->tx_hard_ack);
86
87 for (;;) {
88 set_current_state(TASK_UNINTERRUPTIBLE);
89
90 tx_win = READ_ONCE(call->tx_hard_ack);
91 if (call->tx_top - tx_win <
92 min_t(unsigned int, call->tx_winsize,
93 call->cong_cwnd + call->cong_extra))
94 return 0;
95
96 if (call->state >= RXRPC_CALL_COMPLETE)
97 return call->error;
98
99 if (timeout == 0 &&
100 tx_win == tx_start && signal_pending(current))
101 return -EINTR;
102
103 if (tx_win != tx_start) {
104 timeout = rtt2;
105 tx_start = tx_win;
106 }
107
108 trace_rxrpc_transmit(call, rxrpc_transmit_wait);
109 timeout = schedule_timeout(timeout);
110 }
111}
112
113/*
41 * wait for space to appear in the transmit/ACK window 114 * wait for space to appear in the transmit/ACK window
42 * - caller holds the socket locked 115 * - caller holds the socket locked
43 */ 116 */
44static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, 117static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
45 struct rxrpc_call *call, 118 struct rxrpc_call *call,
46 long *timeo) 119 long *timeo,
120 bool waitall)
47{ 121{
48 DECLARE_WAITQUEUE(myself, current); 122 DECLARE_WAITQUEUE(myself, current);
49 int ret; 123 int ret;
@@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
53 127
54 add_wait_queue(&call->waitq, &myself); 128 add_wait_queue(&call->waitq, &myself);
55 129
56 for (;;) { 130 if (waitall)
57 set_current_state(TASK_INTERRUPTIBLE); 131 ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
58 ret = 0; 132 else
59 if (call->tx_top - call->tx_hard_ack < 133 ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
60 min_t(unsigned int, call->tx_winsize,
61 call->cong_cwnd + call->cong_extra))
62 break;
63 if (call->state >= RXRPC_CALL_COMPLETE) {
64 ret = call->error;
65 break;
66 }
67 if (signal_pending(current)) {
68 ret = sock_intr_errno(*timeo);
69 break;
70 }
71
72 trace_rxrpc_transmit(call, rxrpc_transmit_wait);
73 mutex_unlock(&call->user_mutex);
74 *timeo = schedule_timeout(*timeo);
75 if (mutex_lock_interruptible(&call->user_mutex) < 0) {
76 ret = sock_intr_errno(*timeo);
77 break;
78 }
79 }
80 134
81 remove_wait_queue(&call->waitq, &myself); 135 remove_wait_queue(&call->waitq, &myself);
82 set_current_state(TASK_RUNNING); 136 set_current_state(TASK_RUNNING);
@@ -166,6 +220,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
166 ktime_get_real()); 220 ktime_get_real());
167 if (!last) 221 if (!last)
168 break; 222 break;
223 /* Fall through */
169 case RXRPC_CALL_SERVER_SEND_REPLY: 224 case RXRPC_CALL_SERVER_SEND_REPLY:
170 call->state = RXRPC_CALL_SERVER_AWAIT_ACK; 225 call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
171 rxrpc_notify_end_tx(rx, call, notify_end_tx); 226 rxrpc_notify_end_tx(rx, call, notify_end_tx);
@@ -254,7 +309,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
254 if (msg->msg_flags & MSG_DONTWAIT) 309 if (msg->msg_flags & MSG_DONTWAIT)
255 goto maybe_error; 310 goto maybe_error;
256 ret = rxrpc_wait_for_tx_window(rx, call, 311 ret = rxrpc_wait_for_tx_window(rx, call,
257 &timeo); 312 &timeo,
313 msg->msg_flags & MSG_WAITALL);
258 if (ret < 0) 314 if (ret < 0)
259 goto maybe_error; 315 goto maybe_error;
260 } 316 }
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e70ed26485a2..c03d86a7775e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -172,6 +172,17 @@ config NET_SCH_TBF
172 To compile this code as a module, choose M here: the 172 To compile this code as a module, choose M here: the
173 module will be called sch_tbf. 173 module will be called sch_tbf.
174 174
175config NET_SCH_CBS
176 tristate "Credit Based Shaper (CBS)"
177 ---help---
178 Say Y here if you want to use the Credit Based Shaper (CBS) packet
179 scheduling algorithm.
180
181 See the top of <file:net/sched/sch_cbs.c> for more details.
182
183 To compile this code as a module, choose M here: the
184 module will be called sch_cbs.
185
175config NET_SCH_GRED 186config NET_SCH_GRED
176 tristate "Generic Random Early Detection (GRED)" 187 tristate "Generic Random Early Detection (GRED)"
177 ---help--- 188 ---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 9e43a4721ef8..5b635447e3f8 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
53obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o 53obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
54obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o 54obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
55obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o 55obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
56obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
56 57
57obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 58obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
58obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 59obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8f2c63514956..4d33a50a8a6d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -21,6 +21,8 @@
21#include <linux/kmod.h> 21#include <linux/kmod.h>
22#include <linux/err.h> 22#include <linux/err.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/rhashtable.h>
25#include <linux/list.h>
24#include <net/net_namespace.h> 26#include <net/net_namespace.h>
25#include <net/sock.h> 27#include <net/sock.h>
26#include <net/sch_generic.h> 28#include <net/sch_generic.h>
@@ -1251,8 +1253,227 @@ out_module_put:
1251 return skb->len; 1253 return skb->len;
1252} 1254}
1253 1255
1256struct tcf_action_net {
1257 struct rhashtable egdev_ht;
1258};
1259
1260static unsigned int tcf_action_net_id;
1261
1262struct tcf_action_egdev_cb {
1263 struct list_head list;
1264 tc_setup_cb_t *cb;
1265 void *cb_priv;
1266};
1267
1268struct tcf_action_egdev {
1269 struct rhash_head ht_node;
1270 const struct net_device *dev;
1271 unsigned int refcnt;
1272 struct list_head cb_list;
1273};
1274
1275static const struct rhashtable_params tcf_action_egdev_ht_params = {
1276 .key_offset = offsetof(struct tcf_action_egdev, dev),
1277 .head_offset = offsetof(struct tcf_action_egdev, ht_node),
1278 .key_len = sizeof(const struct net_device *),
1279};
1280
1281static struct tcf_action_egdev *
1282tcf_action_egdev_lookup(const struct net_device *dev)
1283{
1284 struct net *net = dev_net(dev);
1285 struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
1286
1287 return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
1288 tcf_action_egdev_ht_params);
1289}
1290
1291static struct tcf_action_egdev *
1292tcf_action_egdev_get(const struct net_device *dev)
1293{
1294 struct tcf_action_egdev *egdev;
1295 struct tcf_action_net *tan;
1296
1297 egdev = tcf_action_egdev_lookup(dev);
1298 if (egdev)
1299 goto inc_ref;
1300
1301 egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
1302 if (!egdev)
1303 return NULL;
1304 INIT_LIST_HEAD(&egdev->cb_list);
1305 egdev->dev = dev;
1306 tan = net_generic(dev_net(dev), tcf_action_net_id);
1307 rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
1308 tcf_action_egdev_ht_params);
1309
1310inc_ref:
1311 egdev->refcnt++;
1312 return egdev;
1313}
1314
1315static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
1316{
1317 struct tcf_action_net *tan;
1318
1319 if (--egdev->refcnt)
1320 return;
1321 tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
1322 rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
1323 tcf_action_egdev_ht_params);
1324 kfree(egdev);
1325}
1326
1327static struct tcf_action_egdev_cb *
1328tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
1329 tc_setup_cb_t *cb, void *cb_priv)
1330{
1331 struct tcf_action_egdev_cb *egdev_cb;
1332
1333 list_for_each_entry(egdev_cb, &egdev->cb_list, list)
1334 if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
1335 return egdev_cb;
1336 return NULL;
1337}
1338
1339static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
1340 enum tc_setup_type type,
1341 void *type_data, bool err_stop)
1342{
1343 struct tcf_action_egdev_cb *egdev_cb;
1344 int ok_count = 0;
1345 int err;
1346
1347 list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
1348 err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
1349 if (err) {
1350 if (err_stop)
1351 return err;
1352 } else {
1353 ok_count++;
1354 }
1355 }
1356 return ok_count;
1357}
1358
1359static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
1360 tc_setup_cb_t *cb, void *cb_priv)
1361{
1362 struct tcf_action_egdev_cb *egdev_cb;
1363
1364 egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
1365 if (WARN_ON(egdev_cb))
1366 return -EEXIST;
1367 egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
1368 if (!egdev_cb)
1369 return -ENOMEM;
1370 egdev_cb->cb = cb;
1371 egdev_cb->cb_priv = cb_priv;
1372 list_add(&egdev_cb->list, &egdev->cb_list);
1373 return 0;
1374}
1375
1376static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
1377 tc_setup_cb_t *cb, void *cb_priv)
1378{
1379 struct tcf_action_egdev_cb *egdev_cb;
1380
1381 egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
1382 if (WARN_ON(!egdev_cb))
1383 return;
1384 list_del(&egdev_cb->list);
1385 kfree(egdev_cb);
1386}
1387
1388static int __tc_setup_cb_egdev_register(const struct net_device *dev,
1389 tc_setup_cb_t *cb, void *cb_priv)
1390{
1391 struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
1392 int err;
1393
1394 if (!egdev)
1395 return -ENOMEM;
1396 err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
1397 if (err)
1398 goto err_cb_add;
1399 return 0;
1400
1401err_cb_add:
1402 tcf_action_egdev_put(egdev);
1403 return err;
1404}
1405int tc_setup_cb_egdev_register(const struct net_device *dev,
1406 tc_setup_cb_t *cb, void *cb_priv)
1407{
1408 int err;
1409
1410 rtnl_lock();
1411 err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
1412 rtnl_unlock();
1413 return err;
1414}
1415EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
1416
1417static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
1418 tc_setup_cb_t *cb, void *cb_priv)
1419{
1420 struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
1421
1422 if (WARN_ON(!egdev))
1423 return;
1424 tcf_action_egdev_cb_del(egdev, cb, cb_priv);
1425 tcf_action_egdev_put(egdev);
1426}
1427void tc_setup_cb_egdev_unregister(const struct net_device *dev,
1428 tc_setup_cb_t *cb, void *cb_priv)
1429{
1430 rtnl_lock();
1431 __tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
1432 rtnl_unlock();
1433}
1434EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
1435
1436int tc_setup_cb_egdev_call(const struct net_device *dev,
1437 enum tc_setup_type type, void *type_data,
1438 bool err_stop)
1439{
1440 struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
1441
1442 if (!egdev)
1443 return 0;
1444 return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
1445}
1446EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
1447
1448static __net_init int tcf_action_net_init(struct net *net)
1449{
1450 struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
1451
1452 return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
1453}
1454
1455static void __net_exit tcf_action_net_exit(struct net *net)
1456{
1457 struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
1458
1459 rhashtable_destroy(&tan->egdev_ht);
1460}
1461
1462static struct pernet_operations tcf_action_net_ops = {
1463 .init = tcf_action_net_init,
1464 .exit = tcf_action_net_exit,
1465 .id = &tcf_action_net_id,
1466 .size = sizeof(struct tcf_action_net),
1467};
1468
1254static int __init tc_action_init(void) 1469static int __init tc_action_init(void)
1255{ 1470{
1471 int err;
1472
1473 err = register_pernet_subsys(&tcf_action_net_ops);
1474 if (err)
1475 return err;
1476
1256 rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); 1477 rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
1257 rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); 1478 rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
1258 rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, 1479 rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c0c707eb2c96..5ef8ce8c83d4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -49,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
49 filter = rcu_dereference(prog->filter); 49 filter = rcu_dereference(prog->filter);
50 if (at_ingress) { 50 if (at_ingress) {
51 __skb_push(skb, skb->mac_len); 51 __skb_push(skb, skb->mac_len);
52 bpf_compute_data_end(skb); 52 bpf_compute_data_pointers(skb);
53 filter_res = BPF_PROG_RUN(filter, skb); 53 filter_res = BPF_PROG_RUN(filter, skb);
54 __skb_pull(skb, skb->mac_len); 54 __skb_pull(skb, skb->mac_len);
55 } else { 55 } else {
56 bpf_compute_data_end(skb); 56 bpf_compute_data_pointers(skb);
57 filter_res = BPF_PROG_RUN(filter, skb); 57 filter_res = BPF_PROG_RUN(filter, skb);
58 } 58 }
59 rcu_read_unlock(); 59 rcu_read_unlock();
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 8ccd35825b6b..3007cb1310ea 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -248,6 +248,22 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
248 return ret; 248 return ret;
249} 249}
250 250
251#ifdef CONFIG_MODULES
252static const char *ife_meta_id2name(u32 metaid)
253{
254 switch (metaid) {
255 case IFE_META_SKBMARK:
256 return "skbmark";
257 case IFE_META_PRIO:
258 return "skbprio";
259 case IFE_META_TCINDEX:
260 return "tcindex";
261 default:
262 return "unknown";
263 }
264}
265#endif
266
251/* called when adding new meta information 267/* called when adding new meta information
252 * under ife->tcf_lock for existing action 268 * under ife->tcf_lock for existing action
253*/ 269*/
@@ -263,7 +279,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
263 if (exists) 279 if (exists)
264 spin_unlock_bh(&ife->tcf_lock); 280 spin_unlock_bh(&ife->tcf_lock);
265 rtnl_unlock(); 281 rtnl_unlock();
266 request_module("ifemeta%u", metaid); 282 request_module("ife-meta-%s", ife_meta_id2name(metaid));
267 rtnl_lock(); 283 rtnl_lock();
268 if (exists) 284 if (exists)
269 spin_lock_bh(&ife->tcf_lock); 285 spin_lock_bh(&ife->tcf_lock);
@@ -392,10 +408,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
392static void tcf_ife_cleanup(struct tc_action *a, int bind) 408static void tcf_ife_cleanup(struct tc_action *a, int bind)
393{ 409{
394 struct tcf_ife_info *ife = to_ife(a); 410 struct tcf_ife_info *ife = to_ife(a);
411 struct tcf_ife_params *p;
395 412
396 spin_lock_bh(&ife->tcf_lock); 413 spin_lock_bh(&ife->tcf_lock);
397 _tcf_ife_cleanup(a, bind); 414 _tcf_ife_cleanup(a, bind);
398 spin_unlock_bh(&ife->tcf_lock); 415 spin_unlock_bh(&ife->tcf_lock);
416
417 p = rcu_dereference_protected(ife->params, 1);
418 kfree_rcu(p, rcu);
399} 419}
400 420
401/* under ife->tcf_lock for existing action */ 421/* under ife->tcf_lock for existing action */
@@ -432,6 +452,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
432 struct tc_action_net *tn = net_generic(net, ife_net_id); 452 struct tc_action_net *tn = net_generic(net, ife_net_id);
433 struct nlattr *tb[TCA_IFE_MAX + 1]; 453 struct nlattr *tb[TCA_IFE_MAX + 1];
434 struct nlattr *tb2[IFE_META_MAX + 1]; 454 struct nlattr *tb2[IFE_META_MAX + 1];
455 struct tcf_ife_params *p, *p_old;
435 struct tcf_ife_info *ife; 456 struct tcf_ife_info *ife;
436 u16 ife_type = ETH_P_IFE; 457 u16 ife_type = ETH_P_IFE;
437 struct tc_ife *parm; 458 struct tc_ife *parm;
@@ -450,24 +471,41 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
450 471
451 parm = nla_data(tb[TCA_IFE_PARMS]); 472 parm = nla_data(tb[TCA_IFE_PARMS]);
452 473
474 /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because
475 * they cannot run as the same time. Check on all other values which
476 * are not supported right now.
477 */
478 if (parm->flags & ~IFE_ENCODE)
479 return -EINVAL;
480
481 p = kzalloc(sizeof(*p), GFP_KERNEL);
482 if (!p)
483 return -ENOMEM;
484
453 exists = tcf_idr_check(tn, parm->index, a, bind); 485 exists = tcf_idr_check(tn, parm->index, a, bind);
454 if (exists && bind) 486 if (exists && bind) {
487 kfree(p);
455 return 0; 488 return 0;
489 }
456 490
457 if (!exists) { 491 if (!exists) {
458 ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, 492 ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
459 bind, false); 493 bind, true);
460 if (ret) 494 if (ret) {
495 kfree(p);
461 return ret; 496 return ret;
497 }
462 ret = ACT_P_CREATED; 498 ret = ACT_P_CREATED;
463 } else { 499 } else {
464 tcf_idr_release(*a, bind); 500 tcf_idr_release(*a, bind);
465 if (!ovr) 501 if (!ovr) {
502 kfree(p);
466 return -EEXIST; 503 return -EEXIST;
504 }
467 } 505 }
468 506
469 ife = to_ife(*a); 507 ife = to_ife(*a);
470 ife->flags = parm->flags; 508 p->flags = parm->flags;
471 509
472 if (parm->flags & IFE_ENCODE) { 510 if (parm->flags & IFE_ENCODE) {
473 if (tb[TCA_IFE_TYPE]) 511 if (tb[TCA_IFE_TYPE])
@@ -478,24 +516,25 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
478 saddr = nla_data(tb[TCA_IFE_SMAC]); 516 saddr = nla_data(tb[TCA_IFE_SMAC]);
479 } 517 }
480 518
481 if (exists)
482 spin_lock_bh(&ife->tcf_lock);
483 ife->tcf_action = parm->action; 519 ife->tcf_action = parm->action;
484 520
485 if (parm->flags & IFE_ENCODE) { 521 if (parm->flags & IFE_ENCODE) {
486 if (daddr) 522 if (daddr)
487 ether_addr_copy(ife->eth_dst, daddr); 523 ether_addr_copy(p->eth_dst, daddr);
488 else 524 else
489 eth_zero_addr(ife->eth_dst); 525 eth_zero_addr(p->eth_dst);
490 526
491 if (saddr) 527 if (saddr)
492 ether_addr_copy(ife->eth_src, saddr); 528 ether_addr_copy(p->eth_src, saddr);
493 else 529 else
494 eth_zero_addr(ife->eth_src); 530 eth_zero_addr(p->eth_src);
495 531
496 ife->eth_type = ife_type; 532 p->eth_type = ife_type;
497 } 533 }
498 534
535 if (exists)
536 spin_lock_bh(&ife->tcf_lock);
537
499 if (ret == ACT_P_CREATED) 538 if (ret == ACT_P_CREATED)
500 INIT_LIST_HEAD(&ife->metalist); 539 INIT_LIST_HEAD(&ife->metalist);
501 540
@@ -511,6 +550,7 @@ metadata_parse_err:
511 550
512 if (exists) 551 if (exists)
513 spin_unlock_bh(&ife->tcf_lock); 552 spin_unlock_bh(&ife->tcf_lock);
553 kfree(p);
514 return err; 554 return err;
515 } 555 }
516 556
@@ -531,6 +571,7 @@ metadata_parse_err:
531 571
532 if (exists) 572 if (exists)
533 spin_unlock_bh(&ife->tcf_lock); 573 spin_unlock_bh(&ife->tcf_lock);
574 kfree(p);
534 return err; 575 return err;
535 } 576 }
536 } 577 }
@@ -538,6 +579,11 @@ metadata_parse_err:
538 if (exists) 579 if (exists)
539 spin_unlock_bh(&ife->tcf_lock); 580 spin_unlock_bh(&ife->tcf_lock);
540 581
582 p_old = rtnl_dereference(ife->params);
583 rcu_assign_pointer(ife->params, p);
584 if (p_old)
585 kfree_rcu(p_old, rcu);
586
541 if (ret == ACT_P_CREATED) 587 if (ret == ACT_P_CREATED)
542 tcf_idr_insert(tn, *a); 588 tcf_idr_insert(tn, *a);
543 589
@@ -549,12 +595,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
549{ 595{
550 unsigned char *b = skb_tail_pointer(skb); 596 unsigned char *b = skb_tail_pointer(skb);
551 struct tcf_ife_info *ife = to_ife(a); 597 struct tcf_ife_info *ife = to_ife(a);
598 struct tcf_ife_params *p = rtnl_dereference(ife->params);
552 struct tc_ife opt = { 599 struct tc_ife opt = {
553 .index = ife->tcf_index, 600 .index = ife->tcf_index,
554 .refcnt = ife->tcf_refcnt - ref, 601 .refcnt = ife->tcf_refcnt - ref,
555 .bindcnt = ife->tcf_bindcnt - bind, 602 .bindcnt = ife->tcf_bindcnt - bind,
556 .action = ife->tcf_action, 603 .action = ife->tcf_action,
557 .flags = ife->flags, 604 .flags = p->flags,
558 }; 605 };
559 struct tcf_t t; 606 struct tcf_t t;
560 607
@@ -565,17 +612,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
565 if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) 612 if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
566 goto nla_put_failure; 613 goto nla_put_failure;
567 614
568 if (!is_zero_ether_addr(ife->eth_dst)) { 615 if (!is_zero_ether_addr(p->eth_dst)) {
569 if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst)) 616 if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst))
570 goto nla_put_failure; 617 goto nla_put_failure;
571 } 618 }
572 619
573 if (!is_zero_ether_addr(ife->eth_src)) { 620 if (!is_zero_ether_addr(p->eth_src)) {
574 if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src)) 621 if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src))
575 goto nla_put_failure; 622 goto nla_put_failure;
576 } 623 }
577 624
578 if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type)) 625 if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
579 goto nla_put_failure; 626 goto nla_put_failure;
580 627
581 if (dump_metalist(skb, ife)) { 628 if (dump_metalist(skb, ife)) {
@@ -617,19 +664,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
617 u8 *tlv_data; 664 u8 *tlv_data;
618 u16 metalen; 665 u16 metalen;
619 666
620 spin_lock(&ife->tcf_lock); 667 bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
621 bstats_update(&ife->tcf_bstats, skb);
622 tcf_lastuse_update(&ife->tcf_tm); 668 tcf_lastuse_update(&ife->tcf_tm);
623 spin_unlock(&ife->tcf_lock);
624 669
625 if (skb_at_tc_ingress(skb)) 670 if (skb_at_tc_ingress(skb))
626 skb_push(skb, skb->dev->hard_header_len); 671 skb_push(skb, skb->dev->hard_header_len);
627 672
628 tlv_data = ife_decode(skb, &metalen); 673 tlv_data = ife_decode(skb, &metalen);
629 if (unlikely(!tlv_data)) { 674 if (unlikely(!tlv_data)) {
630 spin_lock(&ife->tcf_lock); 675 qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
631 ife->tcf_qstats.drops++;
632 spin_unlock(&ife->tcf_lock);
633 return TC_ACT_SHOT; 676 return TC_ACT_SHOT;
634 } 677 }
635 678
@@ -647,14 +690,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
647 */ 690 */
648 pr_info_ratelimited("Unknown metaid %d dlen %d\n", 691 pr_info_ratelimited("Unknown metaid %d dlen %d\n",
649 mtype, dlen); 692 mtype, dlen);
650 ife->tcf_qstats.overlimits++; 693 qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
651 } 694 }
652 } 695 }
653 696
654 if (WARN_ON(tlv_data != ifehdr_end)) { 697 if (WARN_ON(tlv_data != ifehdr_end)) {
655 spin_lock(&ife->tcf_lock); 698 qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
656 ife->tcf_qstats.drops++;
657 spin_unlock(&ife->tcf_lock);
658 return TC_ACT_SHOT; 699 return TC_ACT_SHOT;
659 } 700 }
660 701
@@ -683,7 +724,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
683} 724}
684 725
685static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, 726static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
686 struct tcf_result *res) 727 struct tcf_result *res, struct tcf_ife_params *p)
687{ 728{
688 struct tcf_ife_info *ife = to_ife(a); 729 struct tcf_ife_info *ife = to_ife(a);
689 int action = ife->tcf_action; 730 int action = ife->tcf_action;
@@ -706,23 +747,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
706 exceed_mtu = true; 747 exceed_mtu = true;
707 } 748 }
708 749
709 spin_lock(&ife->tcf_lock); 750 bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
710 bstats_update(&ife->tcf_bstats, skb);
711 tcf_lastuse_update(&ife->tcf_tm); 751 tcf_lastuse_update(&ife->tcf_tm);
712 752
713 if (!metalen) { /* no metadata to send */ 753 if (!metalen) { /* no metadata to send */
714 /* abuse overlimits to count when we allow packet 754 /* abuse overlimits to count when we allow packet
715 * with no metadata 755 * with no metadata
716 */ 756 */
717 ife->tcf_qstats.overlimits++; 757 qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
718 spin_unlock(&ife->tcf_lock);
719 return action; 758 return action;
720 } 759 }
721 /* could be stupid policy setup or mtu config 760 /* could be stupid policy setup or mtu config
722 * so lets be conservative.. */ 761 * so lets be conservative.. */
723 if ((action == TC_ACT_SHOT) || exceed_mtu) { 762 if ((action == TC_ACT_SHOT) || exceed_mtu) {
724 ife->tcf_qstats.drops++; 763 qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
725 spin_unlock(&ife->tcf_lock);
726 return TC_ACT_SHOT; 764 return TC_ACT_SHOT;
727 } 765 }
728 766
@@ -731,6 +769,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
731 769
732 ife_meta = ife_encode(skb, metalen); 770 ife_meta = ife_encode(skb, metalen);
733 771
772 spin_lock(&ife->tcf_lock);
773
734 /* XXX: we dont have a clever way of telling encode to 774 /* XXX: we dont have a clever way of telling encode to
735 * not repeat some of the computations that are done by 775 * not repeat some of the computations that are done by
736 * ops->presence_check... 776 * ops->presence_check...
@@ -742,25 +782,24 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
742 } 782 }
743 if (err < 0) { 783 if (err < 0) {
744 /* too corrupt to keep around if overwritten */ 784 /* too corrupt to keep around if overwritten */
745 ife->tcf_qstats.drops++;
746 spin_unlock(&ife->tcf_lock); 785 spin_unlock(&ife->tcf_lock);
786 qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
747 return TC_ACT_SHOT; 787 return TC_ACT_SHOT;
748 } 788 }
749 skboff += err; 789 skboff += err;
750 } 790 }
791 spin_unlock(&ife->tcf_lock);
751 oethh = (struct ethhdr *)skb->data; 792 oethh = (struct ethhdr *)skb->data;
752 793
753 if (!is_zero_ether_addr(ife->eth_src)) 794 if (!is_zero_ether_addr(p->eth_src))
754 ether_addr_copy(oethh->h_source, ife->eth_src); 795 ether_addr_copy(oethh->h_source, p->eth_src);
755 if (!is_zero_ether_addr(ife->eth_dst)) 796 if (!is_zero_ether_addr(p->eth_dst))
756 ether_addr_copy(oethh->h_dest, ife->eth_dst); 797 ether_addr_copy(oethh->h_dest, p->eth_dst);
757 oethh->h_proto = htons(ife->eth_type); 798 oethh->h_proto = htons(p->eth_type);
758 799
759 if (skb_at_tc_ingress(skb)) 800 if (skb_at_tc_ingress(skb))
760 skb_pull(skb, skb->dev->hard_header_len); 801 skb_pull(skb, skb->dev->hard_header_len);
761 802
762 spin_unlock(&ife->tcf_lock);
763
764 return action; 803 return action;
765} 804}
766 805
@@ -768,21 +807,19 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
768 struct tcf_result *res) 807 struct tcf_result *res)
769{ 808{
770 struct tcf_ife_info *ife = to_ife(a); 809 struct tcf_ife_info *ife = to_ife(a);
810 struct tcf_ife_params *p;
811 int ret;
812
813 rcu_read_lock();
814 p = rcu_dereference(ife->params);
815 if (p->flags & IFE_ENCODE) {
816 ret = tcf_ife_encode(skb, a, res, p);
817 rcu_read_unlock();
818 return ret;
819 }
820 rcu_read_unlock();
771 821
772 if (ife->flags & IFE_ENCODE) 822 return tcf_ife_decode(skb, a, res);
773 return tcf_ife_encode(skb, a, res);
774
775 if (!(ife->flags & IFE_ENCODE))
776 return tcf_ife_decode(skb, a, res);
777
778 pr_info_ratelimited("unknown failure(policy neither de/encode\n");
779 spin_lock(&ife->tcf_lock);
780 bstats_update(&ife->tcf_bstats, skb);
781 tcf_lastuse_update(&ife->tcf_tm);
782 ife->tcf_qstats.drops++;
783 spin_unlock(&ife->tcf_lock);
784
785 return TC_ACT_SHOT;
786} 823}
787 824
788static int tcf_ife_walker(struct net *net, struct sk_buff *skb, 825static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 82892170ce4f..1e3f10e5da99 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module);
76MODULE_AUTHOR("Jamal Hadi Salim(2015)"); 76MODULE_AUTHOR("Jamal Hadi Salim(2015)");
77MODULE_DESCRIPTION("Inter-FE skb mark metadata module"); 77MODULE_DESCRIPTION("Inter-FE skb mark metadata module");
78MODULE_LICENSE("GPL"); 78MODULE_LICENSE("GPL");
79MODULE_ALIAS_IFE_META(IFE_META_SKBMARK); 79MODULE_ALIAS_IFE_META("skbmark");
diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
index 26bf4d86030b..4033f9fc4d4a 100644
--- a/net/sched/act_meta_skbprio.c
+++ b/net/sched/act_meta_skbprio.c
@@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module);
73MODULE_AUTHOR("Jamal Hadi Salim(2015)"); 73MODULE_AUTHOR("Jamal Hadi Salim(2015)");
74MODULE_DESCRIPTION("Inter-FE skb prio metadata action"); 74MODULE_DESCRIPTION("Inter-FE skb prio metadata action");
75MODULE_LICENSE("GPL"); 75MODULE_LICENSE("GPL");
76MODULE_ALIAS_IFE_META(IFE_META_PRIO); 76MODULE_ALIAS_IFE_META("skbprio");
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 3b35774ce890..2ea1f26c9e96 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module);
76MODULE_AUTHOR("Jamal Hadi Salim(2016)"); 76MODULE_AUTHOR("Jamal Hadi Salim(2016)");
77MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); 77MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
78MODULE_LICENSE("GPL"); 78MODULE_LICENSE("GPL");
79MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); 79MODULE_ALIAS_IFE_META("tcindex");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 416627c66f08..8b3e59388480 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -140,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
140 m->tcfm_eaction = parm->eaction; 140 m->tcfm_eaction = parm->eaction;
141 if (dev != NULL) { 141 if (dev != NULL) {
142 m->tcfm_ifindex = parm->ifindex; 142 m->tcfm_ifindex = parm->ifindex;
143 m->net = net;
143 if (ret != ACT_P_CREATED) 144 if (ret != ACT_P_CREATED)
144 dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); 145 dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
145 dev_hold(dev); 146 dev_hold(dev);
@@ -313,15 +314,11 @@ static struct notifier_block mirred_device_notifier = {
313 .notifier_call = mirred_device_event, 314 .notifier_call = mirred_device_event,
314}; 315};
315 316
316static int tcf_mirred_device(const struct tc_action *a, struct net *net, 317static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
317 struct net_device **mirred_dev)
318{ 318{
319 int ifindex = tcf_mirred_ifindex(a); 319 struct tcf_mirred *m = to_mirred(a);
320 320
321 *mirred_dev = __dev_get_by_index(net, ifindex); 321 return __dev_get_by_index(m->net, m->tcfm_ifindex);
322 if (!*mirred_dev)
323 return -EINVAL;
324 return 0;
325} 322}
326 323
327static struct tc_action_ops act_mirred_ops = { 324static struct tc_action_ops act_mirred_ops = {
@@ -336,7 +333,7 @@ static struct tc_action_ops act_mirred_ops = {
336 .walk = tcf_mirred_walker, 333 .walk = tcf_mirred_walker,
337 .lookup = tcf_mirred_search, 334 .lookup = tcf_mirred_search,
338 .size = sizeof(struct tcf_mirred), 335 .size = sizeof(struct tcf_mirred),
339 .get_dev = tcf_mirred_device, 336 .get_dev = tcf_mirred_get_dev,
340}; 337};
341 338
342static __net_init int mirred_init_net(struct net *net) 339static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 16eb067a8d8f..97f717a13ad5 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -26,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
26 struct tcf_result *res) 26 struct tcf_result *res)
27{ 27{
28 struct tcf_vlan *v = to_vlan(a); 28 struct tcf_vlan *v = to_vlan(a);
29 struct tcf_vlan_params *p;
29 int action; 30 int action;
30 int err; 31 int err;
31 u16 tci; 32 u16 tci;
32 33
33 spin_lock(&v->tcf_lock);
34 tcf_lastuse_update(&v->tcf_tm); 34 tcf_lastuse_update(&v->tcf_tm);
35 bstats_update(&v->tcf_bstats, skb); 35 bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
36 action = v->tcf_action;
37 36
38 /* Ensure 'data' points at mac_header prior calling vlan manipulating 37 /* Ensure 'data' points at mac_header prior calling vlan manipulating
39 * functions. 38 * functions.
@@ -41,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
41 if (skb_at_tc_ingress(skb)) 40 if (skb_at_tc_ingress(skb))
42 skb_push_rcsum(skb, skb->mac_len); 41 skb_push_rcsum(skb, skb->mac_len);
43 42
44 switch (v->tcfv_action) { 43 rcu_read_lock();
44
45 action = READ_ONCE(v->tcf_action);
46
47 p = rcu_dereference(v->vlan_p);
48
49 switch (p->tcfv_action) {
45 case TCA_VLAN_ACT_POP: 50 case TCA_VLAN_ACT_POP:
46 err = skb_vlan_pop(skb); 51 err = skb_vlan_pop(skb);
47 if (err) 52 if (err)
48 goto drop; 53 goto drop;
49 break; 54 break;
50 case TCA_VLAN_ACT_PUSH: 55 case TCA_VLAN_ACT_PUSH:
51 err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | 56 err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid |
52 (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); 57 (p->tcfv_push_prio << VLAN_PRIO_SHIFT));
53 if (err) 58 if (err)
54 goto drop; 59 goto drop;
55 break; 60 break;
@@ -68,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
68 goto drop; 73 goto drop;
69 } 74 }
70 /* replace the vid */ 75 /* replace the vid */
71 tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; 76 tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
72 /* replace prio bits, if tcfv_push_prio specified */ 77 /* replace prio bits, if tcfv_push_prio specified */
73 if (v->tcfv_push_prio) { 78 if (p->tcfv_push_prio) {
74 tci &= ~VLAN_PRIO_MASK; 79 tci &= ~VLAN_PRIO_MASK;
75 tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; 80 tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
76 } 81 }
77 /* put updated tci as hwaccel tag */ 82 /* put updated tci as hwaccel tag */
78 __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); 83 __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci);
79 break; 84 break;
80 default: 85 default:
81 BUG(); 86 BUG();
@@ -85,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
85 90
86drop: 91drop:
87 action = TC_ACT_SHOT; 92 action = TC_ACT_SHOT;
88 v->tcf_qstats.drops++; 93 qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
94
89unlock: 95unlock:
96 rcu_read_unlock();
90 if (skb_at_tc_ingress(skb)) 97 if (skb_at_tc_ingress(skb))
91 skb_pull_rcsum(skb, skb->mac_len); 98 skb_pull_rcsum(skb, skb->mac_len);
92 99
93 spin_unlock(&v->tcf_lock);
94 return action; 100 return action;
95} 101}
96 102
@@ -107,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
107{ 113{
108 struct tc_action_net *tn = net_generic(net, vlan_net_id); 114 struct tc_action_net *tn = net_generic(net, vlan_net_id);
109 struct nlattr *tb[TCA_VLAN_MAX + 1]; 115 struct nlattr *tb[TCA_VLAN_MAX + 1];
116 struct tcf_vlan_params *p, *p_old;
110 struct tc_vlan *parm; 117 struct tc_vlan *parm;
111 struct tcf_vlan *v; 118 struct tcf_vlan *v;
112 int action; 119 int action;
@@ -172,7 +179,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
172 179
173 if (!exists) { 180 if (!exists) {
174 ret = tcf_idr_create(tn, parm->index, est, a, 181 ret = tcf_idr_create(tn, parm->index, est, a,
175 &act_vlan_ops, bind, false); 182 &act_vlan_ops, bind, true);
176 if (ret) 183 if (ret)
177 return ret; 184 return ret;
178 185
@@ -185,46 +192,67 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
185 192
186 v = to_vlan(*a); 193 v = to_vlan(*a);
187 194
188 spin_lock_bh(&v->tcf_lock); 195 ASSERT_RTNL();
189 196 p = kzalloc(sizeof(*p), GFP_KERNEL);
190 v->tcfv_action = action; 197 if (!p) {
191 v->tcfv_push_vid = push_vid; 198 if (ovr)
192 v->tcfv_push_prio = push_prio; 199 tcf_idr_release(*a, bind);
193 v->tcfv_push_proto = push_proto; 200 return -ENOMEM;
201 }
194 202
195 v->tcf_action = parm->action; 203 v->tcf_action = parm->action;
196 204
197 spin_unlock_bh(&v->tcf_lock); 205 p_old = rtnl_dereference(v->vlan_p);
206
207 p->tcfv_action = action;
208 p->tcfv_push_vid = push_vid;
209 p->tcfv_push_prio = push_prio;
210 p->tcfv_push_proto = push_proto;
211
212 rcu_assign_pointer(v->vlan_p, p);
213
214 if (p_old)
215 kfree_rcu(p_old, rcu);
198 216
199 if (ret == ACT_P_CREATED) 217 if (ret == ACT_P_CREATED)
200 tcf_idr_insert(tn, *a); 218 tcf_idr_insert(tn, *a);
201 return ret; 219 return ret;
202} 220}
203 221
222static void tcf_vlan_cleanup(struct tc_action *a, int bind)
223{
224 struct tcf_vlan *v = to_vlan(a);
225 struct tcf_vlan_params *p;
226
227 p = rcu_dereference_protected(v->vlan_p, 1);
228 kfree_rcu(p, rcu);
229}
230
204static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, 231static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
205 int bind, int ref) 232 int bind, int ref)
206{ 233{
207 unsigned char *b = skb_tail_pointer(skb); 234 unsigned char *b = skb_tail_pointer(skb);
208 struct tcf_vlan *v = to_vlan(a); 235 struct tcf_vlan *v = to_vlan(a);
236 struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
209 struct tc_vlan opt = { 237 struct tc_vlan opt = {
210 .index = v->tcf_index, 238 .index = v->tcf_index,
211 .refcnt = v->tcf_refcnt - ref, 239 .refcnt = v->tcf_refcnt - ref,
212 .bindcnt = v->tcf_bindcnt - bind, 240 .bindcnt = v->tcf_bindcnt - bind,
213 .action = v->tcf_action, 241 .action = v->tcf_action,
214 .v_action = v->tcfv_action, 242 .v_action = p->tcfv_action,
215 }; 243 };
216 struct tcf_t t; 244 struct tcf_t t;
217 245
218 if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) 246 if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
219 goto nla_put_failure; 247 goto nla_put_failure;
220 248
221 if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || 249 if ((p->tcfv_action == TCA_VLAN_ACT_PUSH ||
222 v->tcfv_action == TCA_VLAN_ACT_MODIFY) && 250 p->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
223 (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || 251 (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) ||
224 nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, 252 nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
225 v->tcfv_push_proto) || 253 p->tcfv_push_proto) ||
226 (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, 254 (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
227 v->tcfv_push_prio)))) 255 p->tcfv_push_prio))))
228 goto nla_put_failure; 256 goto nla_put_failure;
229 257
230 tcf_tm_dump(&t, &v->tcf_tm); 258 tcf_tm_dump(&t, &v->tcf_tm);
@@ -260,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = {
260 .act = tcf_vlan, 288 .act = tcf_vlan,
261 .dump = tcf_vlan_dump, 289 .dump = tcf_vlan_dump,
262 .init = tcf_vlan_init, 290 .init = tcf_vlan_init,
291 .cleanup = tcf_vlan_cleanup,
263 .walk = tcf_vlan_walker, 292 .walk = tcf_vlan_walker,
264 .lookup = tcf_vlan_search, 293 .lookup = tcf_vlan_search,
265 .size = sizeof(struct tcf_vlan), 294 .size = sizeof(struct tcf_vlan),
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ecbb019efcbd..ab255b421781 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -195,12 +195,19 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
195 return chain; 195 return chain;
196} 196}
197 197
198static void tcf_chain_head_change(struct tcf_chain *chain,
199 struct tcf_proto *tp_head)
200{
201 if (chain->chain_head_change)
202 chain->chain_head_change(tp_head,
203 chain->chain_head_change_priv);
204}
205
198static void tcf_chain_flush(struct tcf_chain *chain) 206static void tcf_chain_flush(struct tcf_chain *chain)
199{ 207{
200 struct tcf_proto *tp; 208 struct tcf_proto *tp;
201 209
202 if (chain->p_filter_chain) 210 tcf_chain_head_change(chain, NULL);
203 RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
204 while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { 211 while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
205 RCU_INIT_POINTER(chain->filter_chain, tp->next); 212 RCU_INIT_POINTER(chain->filter_chain, tp->next);
206 tcf_chain_put(chain); 213 tcf_chain_put(chain);
@@ -242,15 +249,35 @@ void tcf_chain_put(struct tcf_chain *chain)
242} 249}
243EXPORT_SYMBOL(tcf_chain_put); 250EXPORT_SYMBOL(tcf_chain_put);
244 251
245static void 252static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
246tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, 253 struct tcf_block_ext_info *ei,
247 struct tcf_proto __rcu **p_filter_chain) 254 enum tc_block_command command)
248{ 255{
249 chain->p_filter_chain = p_filter_chain; 256 struct net_device *dev = q->dev_queue->dev;
257 struct tc_block_offload bo = {};
258
259 if (!dev->netdev_ops->ndo_setup_tc)
260 return;
261 bo.command = command;
262 bo.binder_type = ei->binder_type;
263 bo.block = block;
264 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
250} 265}
251 266
252int tcf_block_get(struct tcf_block **p_block, 267static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
253 struct tcf_proto __rcu **p_filter_chain) 268 struct tcf_block_ext_info *ei)
269{
270 tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
271}
272
273static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
274 struct tcf_block_ext_info *ei)
275{
276 tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
277}
278
279int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
280 struct tcf_block_ext_info *ei)
254{ 281{
255 struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); 282 struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
256 struct tcf_chain *chain; 283 struct tcf_chain *chain;
@@ -259,13 +286,20 @@ int tcf_block_get(struct tcf_block **p_block,
259 if (!block) 286 if (!block)
260 return -ENOMEM; 287 return -ENOMEM;
261 INIT_LIST_HEAD(&block->chain_list); 288 INIT_LIST_HEAD(&block->chain_list);
289 INIT_LIST_HEAD(&block->cb_list);
290
262 /* Create chain 0 by default, it has to be always present. */ 291 /* Create chain 0 by default, it has to be always present. */
263 chain = tcf_chain_create(block, 0); 292 chain = tcf_chain_create(block, 0);
264 if (!chain) { 293 if (!chain) {
265 err = -ENOMEM; 294 err = -ENOMEM;
266 goto err_chain_create; 295 goto err_chain_create;
267 } 296 }
268 tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); 297 WARN_ON(!ei->chain_head_change);
298 chain->chain_head_change = ei->chain_head_change;
299 chain->chain_head_change_priv = ei->chain_head_change_priv;
300 block->net = qdisc_net(q);
301 block->q = q;
302 tcf_block_offload_bind(block, q, ei);
269 *p_block = block; 303 *p_block = block;
270 return 0; 304 return 0;
271 305
@@ -273,6 +307,26 @@ err_chain_create:
273 kfree(block); 307 kfree(block);
274 return err; 308 return err;
275} 309}
310EXPORT_SYMBOL(tcf_block_get_ext);
311
312static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
313{
314 struct tcf_proto __rcu **p_filter_chain = priv;
315
316 rcu_assign_pointer(*p_filter_chain, tp_head);
317}
318
319int tcf_block_get(struct tcf_block **p_block,
320 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
321{
322 struct tcf_block_ext_info ei = {
323 .chain_head_change = tcf_chain_head_change_dflt,
324 .chain_head_change_priv = p_filter_chain,
325 };
326
327 WARN_ON(!p_filter_chain);
328 return tcf_block_get_ext(p_block, q, &ei);
329}
276EXPORT_SYMBOL(tcf_block_get); 330EXPORT_SYMBOL(tcf_block_get);
277 331
278static void tcf_block_put_final(struct work_struct *work) 332static void tcf_block_put_final(struct work_struct *work)
@@ -292,25 +346,140 @@ static void tcf_block_put_final(struct work_struct *work)
292 * actions should be all removed after flushing. However, filters are now 346 * actions should be all removed after flushing. However, filters are now
293 * destroyed in tc filter workqueue with RTNL lock, they can not race here. 347 * destroyed in tc filter workqueue with RTNL lock, they can not race here.
294 */ 348 */
295void tcf_block_put(struct tcf_block *block) 349void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
350 struct tcf_block_ext_info *ei)
296{ 351{
297 struct tcf_chain *chain, *tmp; 352 struct tcf_chain *chain, *tmp;
298 353
299 if (!block)
300 return;
301
302 list_for_each_entry_safe(chain, tmp, &block->chain_list, list) 354 list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
303 tcf_chain_flush(chain); 355 tcf_chain_flush(chain);
304 356
357 tcf_block_offload_unbind(block, q, ei);
358
305 INIT_WORK(&block->work, tcf_block_put_final); 359 INIT_WORK(&block->work, tcf_block_put_final);
306 /* Wait for RCU callbacks to release the reference count and make 360 /* Wait for existing RCU callbacks to cool down, make sure their works
307 * sure their works have been queued before this. 361 * have been queued before this. We can not flush pending works here
362 * because we are holding the RTNL lock.
308 */ 363 */
309 rcu_barrier(); 364 rcu_barrier();
310 tcf_queue_work(&block->work); 365 tcf_queue_work(&block->work);
311} 366}
367EXPORT_SYMBOL(tcf_block_put_ext);
368
369void tcf_block_put(struct tcf_block *block)
370{
371 struct tcf_block_ext_info ei = {0, };
372
373 if (!block)
374 return;
375 tcf_block_put_ext(block, block->q, &ei);
376}
377
312EXPORT_SYMBOL(tcf_block_put); 378EXPORT_SYMBOL(tcf_block_put);
313 379
380struct tcf_block_cb {
381 struct list_head list;
382 tc_setup_cb_t *cb;
383 void *cb_ident;
384 void *cb_priv;
385 unsigned int refcnt;
386};
387
388void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
389{
390 return block_cb->cb_priv;
391}
392EXPORT_SYMBOL(tcf_block_cb_priv);
393
394struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
395 tc_setup_cb_t *cb, void *cb_ident)
396{ struct tcf_block_cb *block_cb;
397
398 list_for_each_entry(block_cb, &block->cb_list, list)
399 if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
400 return block_cb;
401 return NULL;
402}
403EXPORT_SYMBOL(tcf_block_cb_lookup);
404
405void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
406{
407 block_cb->refcnt++;
408}
409EXPORT_SYMBOL(tcf_block_cb_incref);
410
411unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
412{
413 return --block_cb->refcnt;
414}
415EXPORT_SYMBOL(tcf_block_cb_decref);
416
417struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
418 tc_setup_cb_t *cb, void *cb_ident,
419 void *cb_priv)
420{
421 struct tcf_block_cb *block_cb;
422
423 block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
424 if (!block_cb)
425 return NULL;
426 block_cb->cb = cb;
427 block_cb->cb_ident = cb_ident;
428 block_cb->cb_priv = cb_priv;
429 list_add(&block_cb->list, &block->cb_list);
430 return block_cb;
431}
432EXPORT_SYMBOL(__tcf_block_cb_register);
433
434int tcf_block_cb_register(struct tcf_block *block,
435 tc_setup_cb_t *cb, void *cb_ident,
436 void *cb_priv)
437{
438 struct tcf_block_cb *block_cb;
439
440 block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
441 return block_cb ? 0 : -ENOMEM;
442}
443EXPORT_SYMBOL(tcf_block_cb_register);
444
445void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
446{
447 list_del(&block_cb->list);
448 kfree(block_cb);
449}
450EXPORT_SYMBOL(__tcf_block_cb_unregister);
451
452void tcf_block_cb_unregister(struct tcf_block *block,
453 tc_setup_cb_t *cb, void *cb_ident)
454{
455 struct tcf_block_cb *block_cb;
456
457 block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
458 if (!block_cb)
459 return;
460 __tcf_block_cb_unregister(block_cb);
461}
462EXPORT_SYMBOL(tcf_block_cb_unregister);
463
464static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
465 void *type_data, bool err_stop)
466{
467 struct tcf_block_cb *block_cb;
468 int ok_count = 0;
469 int err;
470
471 list_for_each_entry(block_cb, &block->cb_list, list) {
472 err = block_cb->cb(type, type_data, block_cb->cb_priv);
473 if (err) {
474 if (err_stop)
475 return err;
476 } else {
477 ok_count++;
478 }
479 }
480 return ok_count;
481}
482
314/* Main classifier routine: scans classifier chain attached 483/* Main classifier routine: scans classifier chain attached
315 * to this qdisc, (optionally) tests for protocol and asks 484 * to this qdisc, (optionally) tests for protocol and asks
316 * specific classifiers. 485 * specific classifiers.
@@ -379,9 +548,8 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
379 struct tcf_chain_info *chain_info, 548 struct tcf_chain_info *chain_info,
380 struct tcf_proto *tp) 549 struct tcf_proto *tp)
381{ 550{
382 if (chain->p_filter_chain && 551 if (*chain_info->pprev == chain->filter_chain)
383 *chain_info->pprev == chain->filter_chain) 552 tcf_chain_head_change(chain, tp);
384 rcu_assign_pointer(*chain->p_filter_chain, tp);
385 RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); 553 RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
386 rcu_assign_pointer(*chain_info->pprev, tp); 554 rcu_assign_pointer(*chain_info->pprev, tp);
387 tcf_chain_hold(chain); 555 tcf_chain_hold(chain);
@@ -393,8 +561,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
393{ 561{
394 struct tcf_proto *next = rtnl_dereference(chain_info->next); 562 struct tcf_proto *next = rtnl_dereference(chain_info->next);
395 563
396 if (chain->p_filter_chain && tp == chain->filter_chain) 564 if (tp == chain->filter_chain)
397 RCU_INIT_POINTER(*chain->p_filter_chain, next); 565 tcf_chain_head_change(chain, next);
398 RCU_INIT_POINTER(*chain_info->pprev, next); 566 RCU_INIT_POINTER(*chain_info->pprev, next);
399 tcf_chain_put(chain); 567 tcf_chain_put(chain);
400} 568}
@@ -427,8 +595,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
427} 595}
428 596
429static int tcf_fill_node(struct net *net, struct sk_buff *skb, 597static int tcf_fill_node(struct net *net, struct sk_buff *skb,
430 struct tcf_proto *tp, void *fh, u32 portid, 598 struct tcf_proto *tp, struct Qdisc *q, u32 parent,
431 u32 seq, u16 flags, int event) 599 void *fh, u32 portid, u32 seq, u16 flags, int event)
432{ 600{
433 struct tcmsg *tcm; 601 struct tcmsg *tcm;
434 struct nlmsghdr *nlh; 602 struct nlmsghdr *nlh;
@@ -441,8 +609,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
441 tcm->tcm_family = AF_UNSPEC; 609 tcm->tcm_family = AF_UNSPEC;
442 tcm->tcm__pad1 = 0; 610 tcm->tcm__pad1 = 0;
443 tcm->tcm__pad2 = 0; 611 tcm->tcm__pad2 = 0;
444 tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; 612 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
445 tcm->tcm_parent = tp->classid; 613 tcm->tcm_parent = parent;
446 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 614 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
447 if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) 615 if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
448 goto nla_put_failure; 616 goto nla_put_failure;
@@ -465,6 +633,7 @@ nla_put_failure:
465 633
466static int tfilter_notify(struct net *net, struct sk_buff *oskb, 634static int tfilter_notify(struct net *net, struct sk_buff *oskb,
467 struct nlmsghdr *n, struct tcf_proto *tp, 635 struct nlmsghdr *n, struct tcf_proto *tp,
636 struct Qdisc *q, u32 parent,
468 void *fh, int event, bool unicast) 637 void *fh, int event, bool unicast)
469{ 638{
470 struct sk_buff *skb; 639 struct sk_buff *skb;
@@ -474,7 +643,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
474 if (!skb) 643 if (!skb)
475 return -ENOBUFS; 644 return -ENOBUFS;
476 645
477 if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 646 if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
478 n->nlmsg_flags, event) <= 0) { 647 n->nlmsg_flags, event) <= 0) {
479 kfree_skb(skb); 648 kfree_skb(skb);
480 return -EINVAL; 649 return -EINVAL;
@@ -489,6 +658,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
489 658
490static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, 659static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
491 struct nlmsghdr *n, struct tcf_proto *tp, 660 struct nlmsghdr *n, struct tcf_proto *tp,
661 struct Qdisc *q, u32 parent,
492 void *fh, bool unicast, bool *last) 662 void *fh, bool unicast, bool *last)
493{ 663{
494 struct sk_buff *skb; 664 struct sk_buff *skb;
@@ -499,7 +669,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
499 if (!skb) 669 if (!skb)
500 return -ENOBUFS; 670 return -ENOBUFS;
501 671
502 if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 672 if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
503 n->nlmsg_flags, RTM_DELTFILTER) <= 0) { 673 n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
504 kfree_skb(skb); 674 kfree_skb(skb);
505 return -EINVAL; 675 return -EINVAL;
@@ -519,6 +689,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
519} 689}
520 690
521static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, 691static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
692 struct Qdisc *q, u32 parent,
522 struct nlmsghdr *n, 693 struct nlmsghdr *n,
523 struct tcf_chain *chain, int event) 694 struct tcf_chain *chain, int event)
524{ 695{
@@ -526,7 +697,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
526 697
527 for (tp = rtnl_dereference(chain->filter_chain); 698 for (tp = rtnl_dereference(chain->filter_chain);
528 tp; tp = rtnl_dereference(tp->next)) 699 tp; tp = rtnl_dereference(tp->next))
529 tfilter_notify(net, oskb, n, tp, 0, event, false); 700 tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
530} 701}
531 702
532/* Add/change/delete/get a filter node */ 703/* Add/change/delete/get a filter node */
@@ -645,7 +816,8 @@ replay:
645 } 816 }
646 817
647 if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { 818 if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
648 tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); 819 tfilter_notify_chain(net, skb, q, parent, n,
820 chain, RTM_DELTFILTER);
649 tcf_chain_flush(chain); 821 tcf_chain_flush(chain);
650 err = 0; 822 err = 0;
651 goto errout; 823 goto errout;
@@ -692,7 +864,7 @@ replay:
692 if (!fh) { 864 if (!fh) {
693 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { 865 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
694 tcf_chain_tp_remove(chain, &chain_info, tp); 866 tcf_chain_tp_remove(chain, &chain_info, tp);
695 tfilter_notify(net, skb, n, tp, fh, 867 tfilter_notify(net, skb, n, tp, q, parent, fh,
696 RTM_DELTFILTER, false); 868 RTM_DELTFILTER, false);
697 tcf_proto_destroy(tp); 869 tcf_proto_destroy(tp);
698 err = 0; 870 err = 0;
@@ -717,8 +889,8 @@ replay:
717 } 889 }
718 break; 890 break;
719 case RTM_DELTFILTER: 891 case RTM_DELTFILTER:
720 err = tfilter_del_notify(net, skb, n, tp, fh, false, 892 err = tfilter_del_notify(net, skb, n, tp, q, parent,
721 &last); 893 fh, false, &last);
722 if (err) 894 if (err)
723 goto errout; 895 goto errout;
724 if (last) { 896 if (last) {
@@ -727,7 +899,7 @@ replay:
727 } 899 }
728 goto errout; 900 goto errout;
729 case RTM_GETTFILTER: 901 case RTM_GETTFILTER:
730 err = tfilter_notify(net, skb, n, tp, fh, 902 err = tfilter_notify(net, skb, n, tp, q, parent, fh,
731 RTM_NEWTFILTER, true); 903 RTM_NEWTFILTER, true);
732 goto errout; 904 goto errout;
733 default: 905 default:
@@ -741,7 +913,8 @@ replay:
741 if (err == 0) { 913 if (err == 0) {
742 if (tp_created) 914 if (tp_created)
743 tcf_chain_tp_insert(chain, &chain_info, tp); 915 tcf_chain_tp_insert(chain, &chain_info, tp);
744 tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); 916 tfilter_notify(net, skb, n, tp, q, parent, fh,
917 RTM_NEWTFILTER, false);
745 } else { 918 } else {
746 if (tp_created) 919 if (tp_created)
747 tcf_proto_destroy(tp); 920 tcf_proto_destroy(tp);
@@ -760,6 +933,8 @@ struct tcf_dump_args {
760 struct tcf_walker w; 933 struct tcf_walker w;
761 struct sk_buff *skb; 934 struct sk_buff *skb;
762 struct netlink_callback *cb; 935 struct netlink_callback *cb;
936 struct Qdisc *q;
937 u32 parent;
763}; 938};
764 939
765static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 940static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -767,13 +942,14 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
767 struct tcf_dump_args *a = (void *)arg; 942 struct tcf_dump_args *a = (void *)arg;
768 struct net *net = sock_net(a->skb->sk); 943 struct net *net = sock_net(a->skb->sk);
769 944
770 return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, 945 return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
946 n, NETLINK_CB(a->cb->skb).portid,
771 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 947 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
772 RTM_NEWTFILTER); 948 RTM_NEWTFILTER);
773} 949}
774 950
775static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, 951static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
776 struct netlink_callback *cb, 952 struct sk_buff *skb, struct netlink_callback *cb,
777 long index_start, long *p_index) 953 long index_start, long *p_index)
778{ 954{
779 struct net *net = sock_net(skb->sk); 955 struct net *net = sock_net(skb->sk);
@@ -795,7 +971,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
795 memset(&cb->args[1], 0, 971 memset(&cb->args[1], 0,
796 sizeof(cb->args) - sizeof(cb->args[0])); 972 sizeof(cb->args) - sizeof(cb->args[0]));
797 if (cb->args[1] == 0) { 973 if (cb->args[1] == 0) {
798 if (tcf_fill_node(net, skb, tp, 0, 974 if (tcf_fill_node(net, skb, tp, q, parent, 0,
799 NETLINK_CB(cb->skb).portid, 975 NETLINK_CB(cb->skb).portid,
800 cb->nlh->nlmsg_seq, NLM_F_MULTI, 976 cb->nlh->nlmsg_seq, NLM_F_MULTI,
801 RTM_NEWTFILTER) <= 0) 977 RTM_NEWTFILTER) <= 0)
@@ -808,6 +984,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
808 arg.w.fn = tcf_node_dump; 984 arg.w.fn = tcf_node_dump;
809 arg.skb = skb; 985 arg.skb = skb;
810 arg.cb = cb; 986 arg.cb = cb;
987 arg.q = q;
988 arg.parent = parent;
811 arg.w.stop = 0; 989 arg.w.stop = 0;
812 arg.w.skip = cb->args[1] - 1; 990 arg.w.skip = cb->args[1] - 1;
813 arg.w.count = 0; 991 arg.w.count = 0;
@@ -833,6 +1011,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
833 const struct Qdisc_class_ops *cops; 1011 const struct Qdisc_class_ops *cops;
834 long index_start; 1012 long index_start;
835 long index; 1013 long index;
1014 u32 parent;
836 int err; 1015 int err;
837 1016
838 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 1017 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
@@ -846,10 +1025,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
846 if (!dev) 1025 if (!dev)
847 return skb->len; 1026 return skb->len;
848 1027
849 if (!tcm->tcm_parent) 1028 parent = tcm->tcm_parent;
1029 if (!parent) {
850 q = dev->qdisc; 1030 q = dev->qdisc;
851 else 1031 parent = q->handle;
1032 } else {
852 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 1033 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1034 }
853 if (!q) 1035 if (!q)
854 goto out; 1036 goto out;
855 cops = q->ops->cl_ops; 1037 cops = q->ops->cl_ops;
@@ -873,7 +1055,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
873 if (tca[TCA_CHAIN] && 1055 if (tca[TCA_CHAIN] &&
874 nla_get_u32(tca[TCA_CHAIN]) != chain->index) 1056 nla_get_u32(tca[TCA_CHAIN]) != chain->index)
875 continue; 1057 continue;
876 if (!tcf_chain_dump(chain, skb, cb, index_start, &index)) 1058 if (!tcf_chain_dump(chain, q, parent, skb, cb,
1059 index_start, &index))
877 break; 1060 break;
878 } 1061 }
879 1062
@@ -1015,29 +1198,56 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
1015} 1198}
1016EXPORT_SYMBOL(tcf_exts_dump_stats); 1199EXPORT_SYMBOL(tcf_exts_dump_stats);
1017 1200
1018int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, 1201static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
1019 struct net_device **hw_dev) 1202 enum tc_setup_type type,
1203 void *type_data, bool err_stop)
1020{ 1204{
1205 int ok_count = 0;
1021#ifdef CONFIG_NET_CLS_ACT 1206#ifdef CONFIG_NET_CLS_ACT
1022 const struct tc_action *a; 1207 const struct tc_action *a;
1023 LIST_HEAD(actions); 1208 struct net_device *dev;
1209 int i, ret;
1024 1210
1025 if (!tcf_exts_has_actions(exts)) 1211 if (!tcf_exts_has_actions(exts))
1026 return -EINVAL; 1212 return 0;
1027 1213
1028 tcf_exts_to_list(exts, &actions); 1214 for (i = 0; i < exts->nr_actions; i++) {
1029 list_for_each_entry(a, &actions, list) { 1215 a = exts->actions[i];
1030 if (a->ops->get_dev) { 1216 if (!a->ops->get_dev)
1031 a->ops->get_dev(a, dev_net(dev), hw_dev); 1217 continue;
1032 break; 1218 dev = a->ops->get_dev(a);
1033 } 1219 if (!dev)
1220 continue;
1221 ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
1222 if (ret < 0)
1223 return ret;
1224 ok_count += ret;
1034 } 1225 }
1035 if (*hw_dev)
1036 return 0;
1037#endif 1226#endif
1038 return -EOPNOTSUPP; 1227 return ok_count;
1228}
1229
1230int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
1231 enum tc_setup_type type, void *type_data, bool err_stop)
1232{
1233 int ok_count;
1234 int ret;
1235
1236 ret = tcf_block_cb_call(block, type, type_data, err_stop);
1237 if (ret < 0)
1238 return ret;
1239 ok_count = ret;
1240
1241 if (!exts)
1242 return ok_count;
1243 ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
1244 if (ret < 0)
1245 return ret;
1246 ok_count += ret;
1247
1248 return ok_count;
1039} 1249}
1040EXPORT_SYMBOL(tcf_exts_get_dev); 1250EXPORT_SYMBOL(tc_setup_cb_call);
1041 1251
1042static int __init tc_filter_init(void) 1252static int __init tc_filter_init(void)
1043{ 1253{
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index e43c56d5b96a..5f169ded347e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,13 +17,14 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
19#include <linux/skbuff.h> 19#include <linux/skbuff.h>
20#include <linux/idr.h>
20#include <net/netlink.h> 21#include <net/netlink.h>
21#include <net/act_api.h> 22#include <net/act_api.h>
22#include <net/pkt_cls.h> 23#include <net/pkt_cls.h>
23 24
24struct basic_head { 25struct basic_head {
25 u32 hgenerator;
26 struct list_head flist; 26 struct list_head flist;
27 struct idr handle_idr;
27 struct rcu_head rcu; 28 struct rcu_head rcu;
28}; 29};
29 30
@@ -81,6 +82,7 @@ static int basic_init(struct tcf_proto *tp)
81 if (head == NULL) 82 if (head == NULL)
82 return -ENOBUFS; 83 return -ENOBUFS;
83 INIT_LIST_HEAD(&head->flist); 84 INIT_LIST_HEAD(&head->flist);
85 idr_init(&head->handle_idr);
84 rcu_assign_pointer(tp->root, head); 86 rcu_assign_pointer(tp->root, head);
85 return 0; 87 return 0;
86} 88}
@@ -118,11 +120,13 @@ static void basic_destroy(struct tcf_proto *tp)
118 list_for_each_entry_safe(f, n, &head->flist, link) { 120 list_for_each_entry_safe(f, n, &head->flist, link) {
119 list_del_rcu(&f->link); 121 list_del_rcu(&f->link);
120 tcf_unbind_filter(tp, &f->res); 122 tcf_unbind_filter(tp, &f->res);
123 idr_remove_ext(&head->handle_idr, f->handle);
121 if (tcf_exts_get_net(&f->exts)) 124 if (tcf_exts_get_net(&f->exts))
122 call_rcu(&f->rcu, basic_delete_filter); 125 call_rcu(&f->rcu, basic_delete_filter);
123 else 126 else
124 __basic_delete_filter(f); 127 __basic_delete_filter(f);
125 } 128 }
129 idr_destroy(&head->handle_idr);
126 kfree_rcu(head, rcu); 130 kfree_rcu(head, rcu);
127} 131}
128 132
@@ -133,6 +137,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
133 137
134 list_del_rcu(&f->link); 138 list_del_rcu(&f->link);
135 tcf_unbind_filter(tp, &f->res); 139 tcf_unbind_filter(tp, &f->res);
140 idr_remove_ext(&head->handle_idr, f->handle);
136 tcf_exts_get_net(&f->exts); 141 tcf_exts_get_net(&f->exts);
137 call_rcu(&f->rcu, basic_delete_filter); 142 call_rcu(&f->rcu, basic_delete_filter);
138 *last = list_empty(&head->flist); 143 *last = list_empty(&head->flist);
@@ -177,6 +182,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
177 struct nlattr *tb[TCA_BASIC_MAX + 1]; 182 struct nlattr *tb[TCA_BASIC_MAX + 1];
178 struct basic_filter *fold = (struct basic_filter *) *arg; 183 struct basic_filter *fold = (struct basic_filter *) *arg;
179 struct basic_filter *fnew; 184 struct basic_filter *fnew;
185 unsigned long idr_index;
180 186
181 if (tca[TCA_OPTIONS] == NULL) 187 if (tca[TCA_OPTIONS] == NULL)
182 return -EINVAL; 188 return -EINVAL;
@@ -199,33 +205,33 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
199 if (err < 0) 205 if (err < 0)
200 goto errout; 206 goto errout;
201 207
202 err = -EINVAL;
203 if (handle) { 208 if (handle) {
204 fnew->handle = handle; 209 fnew->handle = handle;
205 } else if (fold) { 210 if (!fold) {
206 fnew->handle = fold->handle; 211 err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
212 handle, handle + 1, GFP_KERNEL);
213 if (err)
214 goto errout;
215 }
207 } else { 216 } else {
208 unsigned int i = 0x80000000; 217 err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
209 do { 218 1, 0x7FFFFFFF, GFP_KERNEL);
210 if (++head->hgenerator == 0x7FFFFFFF) 219 if (err)
211 head->hgenerator = 1;
212 } while (--i > 0 && basic_get(tp, head->hgenerator));
213
214 if (i <= 0) {
215 pr_err("Insufficient number of handles\n");
216 goto errout; 220 goto errout;
217 } 221 fnew->handle = idr_index;
218
219 fnew->handle = head->hgenerator;
220 } 222 }
221 223
222 err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); 224 err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
223 if (err < 0) 225 if (err < 0) {
226 if (!fold)
227 idr_remove_ext(&head->handle_idr, fnew->handle);
224 goto errout; 228 goto errout;
229 }
225 230
226 *arg = fnew; 231 *arg = fnew;
227 232
228 if (fold) { 233 if (fold) {
234 idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
229 list_replace_rcu(&fold->link, &fnew->link); 235 list_replace_rcu(&fold->link, &fnew->link);
230 tcf_unbind_filter(tp, &fold->res); 236 tcf_unbind_filter(tp, &fold->res);
231 tcf_exts_get_net(&fold->exts); 237 tcf_exts_get_net(&fold->exts);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 990eb4d91d54..fb680dafac5a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -17,6 +17,7 @@
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/filter.h> 18#include <linux/filter.h>
19#include <linux/bpf.h> 19#include <linux/bpf.h>
20#include <linux/idr.h>
20 21
21#include <net/rtnetlink.h> 22#include <net/rtnetlink.h>
22#include <net/pkt_cls.h> 23#include <net/pkt_cls.h>
@@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
32 33
33struct cls_bpf_head { 34struct cls_bpf_head {
34 struct list_head plist; 35 struct list_head plist;
35 u32 hgen; 36 struct idr handle_idr;
36 struct rcu_head rcu; 37 struct rcu_head rcu;
37}; 38};
38 39
@@ -102,11 +103,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
102 } else if (at_ingress) { 103 } else if (at_ingress) {
103 /* It is safe to push/pull even if skb_shared() */ 104 /* It is safe to push/pull even if skb_shared() */
104 __skb_push(skb, skb->mac_len); 105 __skb_push(skb, skb->mac_len);
105 bpf_compute_data_end(skb); 106 bpf_compute_data_pointers(skb);
106 filter_res = BPF_PROG_RUN(prog->filter, skb); 107 filter_res = BPF_PROG_RUN(prog->filter, skb);
107 __skb_pull(skb, skb->mac_len); 108 __skb_pull(skb, skb->mac_len);
108 } else { 109 } else {
109 bpf_compute_data_end(skb); 110 bpf_compute_data_pointers(skb);
110 filter_res = BPF_PROG_RUN(prog->filter, skb); 111 filter_res = BPF_PROG_RUN(prog->filter, skb);
111 } 112 }
112 113
@@ -149,7 +150,9 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
149static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, 150static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
150 enum tc_clsbpf_command cmd) 151 enum tc_clsbpf_command cmd)
151{ 152{
152 struct net_device *dev = tp->q->dev_queue->dev; 153 bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
154 struct tcf_block *block = tp->chain->block;
155 bool skip_sw = tc_skip_sw(prog->gen_flags);
153 struct tc_cls_bpf_offload cls_bpf = {}; 156 struct tc_cls_bpf_offload cls_bpf = {};
154 int err; 157 int err;
155 158
@@ -161,17 +164,25 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
161 cls_bpf.exts_integrated = prog->exts_integrated; 164 cls_bpf.exts_integrated = prog->exts_integrated;
162 cls_bpf.gen_flags = prog->gen_flags; 165 cls_bpf.gen_flags = prog->gen_flags;
163 166
164 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &cls_bpf); 167 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
165 if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) 168 if (addorrep) {
166 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; 169 if (err < 0) {
170 cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
171 return err;
172 } else if (err > 0) {
173 prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
174 }
175 }
176
177 if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
178 return -EINVAL;
167 179
168 return err; 180 return 0;
169} 181}
170 182
171static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, 183static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
172 struct cls_bpf_prog *oldprog) 184 struct cls_bpf_prog *oldprog)
173{ 185{
174 struct net_device *dev = tp->q->dev_queue->dev;
175 struct cls_bpf_prog *obj = prog; 186 struct cls_bpf_prog *obj = prog;
176 enum tc_clsbpf_command cmd; 187 enum tc_clsbpf_command cmd;
177 bool skip_sw; 188 bool skip_sw;
@@ -181,7 +192,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
181 (oldprog && tc_skip_sw(oldprog->gen_flags)); 192 (oldprog && tc_skip_sw(oldprog->gen_flags));
182 193
183 if (oldprog && oldprog->offloaded) { 194 if (oldprog && oldprog->offloaded) {
184 if (tc_should_offload(dev, prog->gen_flags)) { 195 if (!tc_skip_hw(prog->gen_flags)) {
185 cmd = TC_CLSBPF_REPLACE; 196 cmd = TC_CLSBPF_REPLACE;
186 } else if (!tc_skip_sw(prog->gen_flags)) { 197 } else if (!tc_skip_sw(prog->gen_flags)) {
187 obj = oldprog; 198 obj = oldprog;
@@ -190,14 +201,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
190 return -EINVAL; 201 return -EINVAL;
191 } 202 }
192 } else { 203 } else {
193 if (!tc_should_offload(dev, prog->gen_flags)) 204 if (tc_skip_hw(prog->gen_flags))
194 return skip_sw ? -EINVAL : 0; 205 return skip_sw ? -EINVAL : 0;
195 cmd = TC_CLSBPF_ADD; 206 cmd = TC_CLSBPF_ADD;
196 } 207 }
197 208
198 ret = cls_bpf_offload_cmd(tp, obj, cmd); 209 ret = cls_bpf_offload_cmd(tp, obj, cmd);
199 if (ret) 210 if (ret)
200 return skip_sw ? ret : 0; 211 return ret;
201 212
202 obj->offloaded = true; 213 obj->offloaded = true;
203 if (oldprog) 214 if (oldprog)
@@ -241,6 +252,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
241 return -ENOBUFS; 252 return -ENOBUFS;
242 253
243 INIT_LIST_HEAD_RCU(&head->plist); 254 INIT_LIST_HEAD_RCU(&head->plist);
255 idr_init(&head->handle_idr);
244 rcu_assign_pointer(tp->root, head); 256 rcu_assign_pointer(tp->root, head);
245 257
246 return 0; 258 return 0;
@@ -280,6 +292,9 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
280 292
281static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) 293static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
282{ 294{
295 struct cls_bpf_head *head = rtnl_dereference(tp->root);
296
297 idr_remove_ext(&head->handle_idr, prog->handle);
283 cls_bpf_stop_offload(tp, prog); 298 cls_bpf_stop_offload(tp, prog);
284 list_del_rcu(&prog->link); 299 list_del_rcu(&prog->link);
285 tcf_unbind_filter(tp, &prog->res); 300 tcf_unbind_filter(tp, &prog->res);
@@ -306,6 +321,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
306 list_for_each_entry_safe(prog, tmp, &head->plist, link) 321 list_for_each_entry_safe(prog, tmp, &head->plist, link)
307 __cls_bpf_delete(tp, prog); 322 __cls_bpf_delete(tp, prog);
308 323
324 idr_destroy(&head->handle_idr);
309 kfree_rcu(head, rcu); 325 kfree_rcu(head, rcu);
310} 326}
311 327
@@ -362,7 +378,7 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
362} 378}
363 379
364static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, 380static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
365 const struct tcf_proto *tp) 381 u32 gen_flags, const struct tcf_proto *tp)
366{ 382{
367 struct bpf_prog *fp; 383 struct bpf_prog *fp;
368 char *name = NULL; 384 char *name = NULL;
@@ -370,7 +386,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
370 386
371 bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); 387 bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
372 388
373 fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); 389 if (gen_flags & TCA_CLS_FLAGS_SKIP_SW)
390 fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS,
391 qdisc_dev(tp->q));
392 else
393 fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
374 if (IS_ERR(fp)) 394 if (IS_ERR(fp))
375 return PTR_ERR(fp); 395 return PTR_ERR(fp);
376 396
@@ -428,7 +448,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
428 prog->gen_flags = gen_flags; 448 prog->gen_flags = gen_flags;
429 449
430 ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : 450 ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
431 cls_bpf_prog_from_efd(tb, prog, tp); 451 cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
432 if (ret < 0) 452 if (ret < 0)
433 return ret; 453 return ret;
434 454
@@ -440,27 +460,6 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
440 return 0; 460 return 0;
441} 461}
442 462
443static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
444 struct cls_bpf_head *head)
445{
446 unsigned int i = 0x80000000;
447 u32 handle;
448
449 do {
450 if (++head->hgen == 0x7FFFFFFF)
451 head->hgen = 1;
452 } while (--i > 0 && cls_bpf_get(tp, head->hgen));
453
454 if (unlikely(i == 0)) {
455 pr_err("Insufficient number of handles\n");
456 handle = 0;
457 } else {
458 handle = head->hgen;
459 }
460
461 return handle;
462}
463
464static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, 463static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
465 struct tcf_proto *tp, unsigned long base, 464 struct tcf_proto *tp, unsigned long base,
466 u32 handle, struct nlattr **tca, 465 u32 handle, struct nlattr **tca,
@@ -470,6 +469,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
470 struct cls_bpf_prog *oldprog = *arg; 469 struct cls_bpf_prog *oldprog = *arg;
471 struct nlattr *tb[TCA_BPF_MAX + 1]; 470 struct nlattr *tb[TCA_BPF_MAX + 1];
472 struct cls_bpf_prog *prog; 471 struct cls_bpf_prog *prog;
472 unsigned long idr_index;
473 int ret; 473 int ret;
474 474
475 if (tca[TCA_OPTIONS] == NULL) 475 if (tca[TCA_OPTIONS] == NULL)
@@ -495,21 +495,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
495 } 495 }
496 } 496 }
497 497
498 if (handle == 0) 498 if (handle == 0) {
499 prog->handle = cls_bpf_grab_new_handle(tp, head); 499 ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
500 else 500 1, 0x7FFFFFFF, GFP_KERNEL);
501 if (ret)
502 goto errout;
503 prog->handle = idr_index;
504 } else {
505 if (!oldprog) {
506 ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
507 handle, handle + 1, GFP_KERNEL);
508 if (ret)
509 goto errout;
510 }
501 prog->handle = handle; 511 prog->handle = handle;
502 if (prog->handle == 0) {
503 ret = -EINVAL;
504 goto errout;
505 } 512 }
506 513
507 ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); 514 ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
508 if (ret < 0) 515 if (ret < 0)
509 goto errout; 516 goto errout_idr;
510 517
511 ret = cls_bpf_offload(tp, prog, oldprog); 518 ret = cls_bpf_offload(tp, prog, oldprog);
512 if (ret) { 519 if (ret) {
520 if (!oldprog)
521 idr_remove_ext(&head->handle_idr, prog->handle);
513 __cls_bpf_delete_prog(prog); 522 __cls_bpf_delete_prog(prog);
514 return ret; 523 return ret;
515 } 524 }
@@ -518,6 +527,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
518 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; 527 prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
519 528
520 if (oldprog) { 529 if (oldprog) {
530 idr_replace_ext(&head->handle_idr, prog, handle);
521 list_replace_rcu(&oldprog->link, &prog->link); 531 list_replace_rcu(&oldprog->link, &prog->link);
522 tcf_unbind_filter(tp, &oldprog->res); 532 tcf_unbind_filter(tp, &oldprog->res);
523 tcf_exts_get_net(&oldprog->exts); 533 tcf_exts_get_net(&oldprog->exts);
@@ -529,6 +539,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
529 *arg = prog; 539 *arg = prog;
530 return 0; 540 return 0;
531 541
542errout_idr:
543 if (!oldprog)
544 idr_remove_ext(&head->handle_idr, prog->handle);
532errout: 545errout:
533 tcf_exts_destroy(&prog->exts); 546 tcf_exts_destroy(&prog->exts);
534 kfree(prog); 547 kfree(prog);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 85f765cff697..25c2a888e1f0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -348,9 +348,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
348 return -1; 348 return -1;
349} 349}
350 350
351static void flow_perturbation(unsigned long arg) 351static void flow_perturbation(struct timer_list *t)
352{ 352{
353 struct flow_filter *f = (struct flow_filter *)arg; 353 struct flow_filter *f = from_timer(f, t, perturb_timer);
354 354
355 get_random_bytes(&f->hashrnd, 4); 355 get_random_bytes(&f->hashrnd, 4);
356 if (f->perturb_period) 356 if (f->perturb_period)
@@ -510,8 +510,11 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
510 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; 510 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
511 } 511 }
512 512
513 if (TC_H_MAJ(baseclass) == 0) 513 if (TC_H_MAJ(baseclass) == 0) {
514 baseclass = TC_H_MAKE(tp->q->handle, baseclass); 514 struct Qdisc *q = tcf_block_q(tp->chain->block);
515
516 baseclass = TC_H_MAKE(q->handle, baseclass);
517 }
515 if (TC_H_MIN(baseclass) == 0) 518 if (TC_H_MIN(baseclass) == 0)
516 baseclass = TC_H_MAKE(baseclass, 1); 519 baseclass = TC_H_MAKE(baseclass, 1);
517 520
@@ -521,8 +524,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
521 get_random_bytes(&fnew->hashrnd, 4); 524 get_random_bytes(&fnew->hashrnd, 4);
522 } 525 }
523 526
524 setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, 527 timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
525 (unsigned long)fnew);
526 528
527 netif_keep_dst(qdisc_dev(tp->q)); 529 netif_keep_dst(qdisc_dev(tp->q));
528 530
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7a838d1c1c00..543a3e875d05 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -155,37 +155,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
155 struct cls_fl_filter *f; 155 struct cls_fl_filter *f;
156 struct fl_flow_key skb_key; 156 struct fl_flow_key skb_key;
157 struct fl_flow_key skb_mkey; 157 struct fl_flow_key skb_mkey;
158 struct ip_tunnel_info *info;
159 158
160 if (!atomic_read(&head->ht.nelems)) 159 if (!atomic_read(&head->ht.nelems))
161 return -1; 160 return -1;
162 161
163 fl_clear_masked_range(&skb_key, &head->mask); 162 fl_clear_masked_range(&skb_key, &head->mask);
164 163
165 info = skb_tunnel_info(skb);
166 if (info) {
167 struct ip_tunnel_key *key = &info->key;
168
169 switch (ip_tunnel_info_af(info)) {
170 case AF_INET:
171 skb_key.enc_control.addr_type =
172 FLOW_DISSECTOR_KEY_IPV4_ADDRS;
173 skb_key.enc_ipv4.src = key->u.ipv4.src;
174 skb_key.enc_ipv4.dst = key->u.ipv4.dst;
175 break;
176 case AF_INET6:
177 skb_key.enc_control.addr_type =
178 FLOW_DISSECTOR_KEY_IPV6_ADDRS;
179 skb_key.enc_ipv6.src = key->u.ipv6.src;
180 skb_key.enc_ipv6.dst = key->u.ipv6.dst;
181 break;
182 }
183
184 skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
185 skb_key.enc_tp.src = key->tp_src;
186 skb_key.enc_tp.dst = key->tp_dst;
187 }
188
189 skb_key.indev_ifindex = skb->skb_iif; 164 skb_key.indev_ifindex = skb->skb_iif;
190 /* skb_flow_dissect() does not set n_proto in case an unknown protocol, 165 /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
191 * so do it rather here. 166 * so do it rather here.
@@ -245,17 +220,14 @@ static void fl_destroy_filter(struct rcu_head *head)
245static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) 220static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
246{ 221{
247 struct tc_cls_flower_offload cls_flower = {}; 222 struct tc_cls_flower_offload cls_flower = {};
248 struct net_device *dev = f->hw_dev; 223 struct tcf_block *block = tp->chain->block;
249
250 if (!tc_can_offload(dev))
251 return;
252 224
253 tc_cls_common_offload_init(&cls_flower.common, tp); 225 tc_cls_common_offload_init(&cls_flower.common, tp);
254 cls_flower.command = TC_CLSFLOWER_DESTROY; 226 cls_flower.command = TC_CLSFLOWER_DESTROY;
255 cls_flower.cookie = (unsigned long) f; 227 cls_flower.cookie = (unsigned long) f;
256 cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev;
257 228
258 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); 229 tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
230 &cls_flower, false);
259} 231}
260 232
261static int fl_hw_replace_filter(struct tcf_proto *tp, 233static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -263,22 +235,11 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
263 struct fl_flow_key *mask, 235 struct fl_flow_key *mask,
264 struct cls_fl_filter *f) 236 struct cls_fl_filter *f)
265{ 237{
266 struct net_device *dev = tp->q->dev_queue->dev;
267 struct tc_cls_flower_offload cls_flower = {}; 238 struct tc_cls_flower_offload cls_flower = {};
239 struct tcf_block *block = tp->chain->block;
240 bool skip_sw = tc_skip_sw(f->flags);
268 int err; 241 int err;
269 242
270 if (!tc_can_offload(dev)) {
271 if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
272 (f->hw_dev && !tc_can_offload(f->hw_dev))) {
273 f->hw_dev = dev;
274 return tc_skip_sw(f->flags) ? -EINVAL : 0;
275 }
276 dev = f->hw_dev;
277 cls_flower.egress_dev = true;
278 } else {
279 f->hw_dev = dev;
280 }
281
282 tc_cls_common_offload_init(&cls_flower.common, tp); 243 tc_cls_common_offload_init(&cls_flower.common, tp);
283 cls_flower.command = TC_CLSFLOWER_REPLACE; 244 cls_flower.command = TC_CLSFLOWER_REPLACE;
284 cls_flower.cookie = (unsigned long) f; 245 cls_flower.cookie = (unsigned long) f;
@@ -286,33 +247,36 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
286 cls_flower.mask = mask; 247 cls_flower.mask = mask;
287 cls_flower.key = &f->mkey; 248 cls_flower.key = &f->mkey;
288 cls_flower.exts = &f->exts; 249 cls_flower.exts = &f->exts;
250 cls_flower.classid = f->res.classid;
289 251
290 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, 252 err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
291 &cls_flower); 253 &cls_flower, skip_sw);
292 if (!err) 254 if (err < 0) {
255 fl_hw_destroy_filter(tp, f);
256 return err;
257 } else if (err > 0) {
293 f->flags |= TCA_CLS_FLAGS_IN_HW; 258 f->flags |= TCA_CLS_FLAGS_IN_HW;
259 }
260
261 if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
262 return -EINVAL;
294 263
295 if (tc_skip_sw(f->flags))
296 return err;
297 return 0; 264 return 0;
298} 265}
299 266
300static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) 267static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
301{ 268{
302 struct tc_cls_flower_offload cls_flower = {}; 269 struct tc_cls_flower_offload cls_flower = {};
303 struct net_device *dev = f->hw_dev; 270 struct tcf_block *block = tp->chain->block;
304
305 if (!tc_can_offload(dev))
306 return;
307 271
308 tc_cls_common_offload_init(&cls_flower.common, tp); 272 tc_cls_common_offload_init(&cls_flower.common, tp);
309 cls_flower.command = TC_CLSFLOWER_STATS; 273 cls_flower.command = TC_CLSFLOWER_STATS;
310 cls_flower.cookie = (unsigned long) f; 274 cls_flower.cookie = (unsigned long) f;
311 cls_flower.exts = &f->exts; 275 cls_flower.exts = &f->exts;
312 cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev; 276 cls_flower.classid = f->res.classid;
313 277
314 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, 278 tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
315 &cls_flower); 279 &cls_flower, false);
316} 280}
317 281
318static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) 282static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 7f45e5ab8afc..20f0de1a960a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -28,6 +28,7 @@
28#include <net/netlink.h> 28#include <net/netlink.h>
29#include <net/act_api.h> 29#include <net/act_api.h>
30#include <net/pkt_cls.h> 30#include <net/pkt_cls.h>
31#include <net/sch_generic.h>
31 32
32#define HTSIZE 256 33#define HTSIZE 256
33 34
@@ -86,9 +87,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
86 } 87 }
87 } 88 }
88 } else { 89 } else {
90 struct Qdisc *q = tcf_block_q(tp->chain->block);
91
89 /* Old method: classify the packet using its skb mark. */ 92 /* Old method: classify the packet using its skb mark. */
90 if (id && (TC_H_MAJ(id) == 0 || 93 if (id && (TC_H_MAJ(id) == 0 ||
91 !(TC_H_MAJ(id ^ tp->q->handle)))) { 94 !(TC_H_MAJ(id ^ q->handle)))) {
92 res->classid = id; 95 res->classid = id;
93 res->class = 0; 96 res->class = 0;
94 return 0; 97 return 0;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 3684153cd8a9..66d4e0099158 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -69,12 +69,27 @@ static void mall_destroy_rcu(struct rcu_head *rcu)
69 tcf_queue_work(&head->work); 69 tcf_queue_work(&head->work);
70} 70}
71 71
72static void mall_destroy_hw_filter(struct tcf_proto *tp,
73 struct cls_mall_head *head,
74 unsigned long cookie)
75{
76 struct tc_cls_matchall_offload cls_mall = {};
77 struct tcf_block *block = tp->chain->block;
78
79 tc_cls_common_offload_init(&cls_mall.common, tp);
80 cls_mall.command = TC_CLSMATCHALL_DESTROY;
81 cls_mall.cookie = cookie;
82
83 tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
84}
85
72static int mall_replace_hw_filter(struct tcf_proto *tp, 86static int mall_replace_hw_filter(struct tcf_proto *tp,
73 struct cls_mall_head *head, 87 struct cls_mall_head *head,
74 unsigned long cookie) 88 unsigned long cookie)
75{ 89{
76 struct net_device *dev = tp->q->dev_queue->dev;
77 struct tc_cls_matchall_offload cls_mall = {}; 90 struct tc_cls_matchall_offload cls_mall = {};
91 struct tcf_block *block = tp->chain->block;
92 bool skip_sw = tc_skip_sw(head->flags);
78 int err; 93 int err;
79 94
80 tc_cls_common_offload_init(&cls_mall.common, tp); 95 tc_cls_common_offload_init(&cls_mall.common, tp);
@@ -82,37 +97,29 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
82 cls_mall.exts = &head->exts; 97 cls_mall.exts = &head->exts;
83 cls_mall.cookie = cookie; 98 cls_mall.cookie = cookie;
84 99
85 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, 100 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
86 &cls_mall); 101 &cls_mall, skip_sw);
87 if (!err) 102 if (err < 0) {
103 mall_destroy_hw_filter(tp, head, cookie);
104 return err;
105 } else if (err > 0) {
88 head->flags |= TCA_CLS_FLAGS_IN_HW; 106 head->flags |= TCA_CLS_FLAGS_IN_HW;
107 }
89 108
90 return err; 109 if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
91} 110 return -EINVAL;
92
93static void mall_destroy_hw_filter(struct tcf_proto *tp,
94 struct cls_mall_head *head,
95 unsigned long cookie)
96{
97 struct net_device *dev = tp->q->dev_queue->dev;
98 struct tc_cls_matchall_offload cls_mall = {};
99
100 tc_cls_common_offload_init(&cls_mall.common, tp);
101 cls_mall.command = TC_CLSMATCHALL_DESTROY;
102 cls_mall.cookie = cookie;
103 111
104 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &cls_mall); 112 return 0;
105} 113}
106 114
107static void mall_destroy(struct tcf_proto *tp) 115static void mall_destroy(struct tcf_proto *tp)
108{ 116{
109 struct cls_mall_head *head = rtnl_dereference(tp->root); 117 struct cls_mall_head *head = rtnl_dereference(tp->root);
110 struct net_device *dev = tp->q->dev_queue->dev;
111 118
112 if (!head) 119 if (!head)
113 return; 120 return;
114 121
115 if (tc_should_offload(dev, head->flags)) 122 if (!tc_skip_hw(head->flags))
116 mall_destroy_hw_filter(tp, head, (unsigned long) head); 123 mall_destroy_hw_filter(tp, head, (unsigned long) head);
117 124
118 if (tcf_exts_get_net(&head->exts)) 125 if (tcf_exts_get_net(&head->exts))
@@ -155,7 +162,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
155 void **arg, bool ovr) 162 void **arg, bool ovr)
156{ 163{
157 struct cls_mall_head *head = rtnl_dereference(tp->root); 164 struct cls_mall_head *head = rtnl_dereference(tp->root);
158 struct net_device *dev = tp->q->dev_queue->dev;
159 struct nlattr *tb[TCA_MATCHALL_MAX + 1]; 165 struct nlattr *tb[TCA_MATCHALL_MAX + 1];
160 struct cls_mall_head *new; 166 struct cls_mall_head *new;
161 u32 flags = 0; 167 u32 flags = 0;
@@ -195,14 +201,10 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
195 if (err) 201 if (err)
196 goto err_set_parms; 202 goto err_set_parms;
197 203
198 if (tc_should_offload(dev, flags)) { 204 if (!tc_skip_hw(new->flags)) {
199 err = mall_replace_hw_filter(tp, new, (unsigned long) new); 205 err = mall_replace_hw_filter(tp, new, (unsigned long) new);
200 if (err) { 206 if (err)
201 if (tc_skip_sw(flags)) 207 goto err_replace_hw_filter;
202 goto err_replace_hw_filter;
203 else
204 err = 0;
205 }
206 } 208 }
207 209
208 if (!tc_in_hw(new->flags)) 210 if (!tc_in_hw(new->flags))
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index a76937ee0b2d..67467ae24c97 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,6 +13,7 @@
13#include <net/act_api.h> 13#include <net/act_api.h>
14#include <net/netlink.h> 14#include <net/netlink.h>
15#include <net/pkt_cls.h> 15#include <net/pkt_cls.h>
16#include <net/sch_generic.h>
16 17
17/* 18/*
18 * Passing parameters to the root seems to be done more awkwardly than really 19 * Passing parameters to the root seems to be done more awkwardly than really
@@ -96,9 +97,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
96 97
97 f = tcindex_lookup(p, key); 98 f = tcindex_lookup(p, key);
98 if (!f) { 99 if (!f) {
100 struct Qdisc *q = tcf_block_q(tp->chain->block);
101
99 if (!p->fall_through) 102 if (!p->fall_through)
100 return -1; 103 return -1;
101 res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key); 104 res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
102 res->class = 0; 105 res->class = 0;
103 pr_debug("alg 0x%x\n", res->classid); 106 pr_debug("alg 0x%x\n", res->classid);
104 return 0; 107 return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b58eccb21f03..ac152b4f4247 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -46,6 +46,7 @@
46#include <net/act_api.h> 46#include <net/act_api.h>
47#include <net/pkt_cls.h> 47#include <net/pkt_cls.h>
48#include <linux/netdevice.h> 48#include <linux/netdevice.h>
49#include <linux/idr.h>
49 50
50struct tc_u_knode { 51struct tc_u_knode {
51 struct tc_u_knode __rcu *next; 52 struct tc_u_knode __rcu *next;
@@ -85,6 +86,7 @@ struct tc_u_hnode {
85 struct tc_u_common *tp_c; 86 struct tc_u_common *tp_c;
86 int refcnt; 87 int refcnt;
87 unsigned int divisor; 88 unsigned int divisor;
89 struct idr handle_idr;
88 struct rcu_head rcu; 90 struct rcu_head rcu;
89 /* The 'ht' field MUST be the last field in structure to allow for 91 /* The 'ht' field MUST be the last field in structure to allow for
90 * more entries allocated at end of structure. 92 * more entries allocated at end of structure.
@@ -94,9 +96,9 @@ struct tc_u_hnode {
94 96
95struct tc_u_common { 97struct tc_u_common {
96 struct tc_u_hnode __rcu *hlist; 98 struct tc_u_hnode __rcu *hlist;
97 struct Qdisc *q; 99 struct tcf_block *block;
98 int refcnt; 100 int refcnt;
99 u32 hgenerator; 101 struct idr handle_idr;
100 struct hlist_node hnode; 102 struct hlist_node hnode;
101 struct rcu_head rcu; 103 struct rcu_head rcu;
102}; 104};
@@ -314,19 +316,19 @@ static void *u32_get(struct tcf_proto *tp, u32 handle)
314 return u32_lookup_key(ht, handle); 316 return u32_lookup_key(ht, handle);
315} 317}
316 318
317static u32 gen_new_htid(struct tc_u_common *tp_c) 319static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
318{ 320{
319 int i = 0x800; 321 unsigned long idr_index;
322 int err;
320 323
321 /* hgenerator only used inside rtnl lock it is safe to increment 324 /* This is only used inside rtnl lock it is safe to increment
322 * without read _copy_ update semantics 325 * without read _copy_ update semantics
323 */ 326 */
324 do { 327 err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
325 if (++tp_c->hgenerator == 0x7FF) 328 1, 0x7FF, GFP_KERNEL);
326 tp_c->hgenerator = 1; 329 if (err)
327 } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); 330 return 0;
328 331 return (u32)(idr_index | 0x800) << 20;
329 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
330} 332}
331 333
332static struct hlist_head *tc_u_common_hash; 334static struct hlist_head *tc_u_common_hash;
@@ -336,11 +338,7 @@ static struct hlist_head *tc_u_common_hash;
336 338
337static unsigned int tc_u_hash(const struct tcf_proto *tp) 339static unsigned int tc_u_hash(const struct tcf_proto *tp)
338{ 340{
339 struct net_device *dev = tp->q->dev_queue->dev; 341 return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
340 u32 qhandle = tp->q->handle;
341 int ifindex = dev->ifindex;
342
343 return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT);
344} 342}
345 343
346static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) 344static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
@@ -350,7 +348,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
350 348
351 h = tc_u_hash(tp); 349 h = tc_u_hash(tp);
352 hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { 350 hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
353 if (tc->q == tp->q) 351 if (tc->block == tp->chain->block)
354 return tc; 352 return tc;
355 } 353 }
356 return NULL; 354 return NULL;
@@ -369,8 +367,9 @@ static int u32_init(struct tcf_proto *tp)
369 return -ENOBUFS; 367 return -ENOBUFS;
370 368
371 root_ht->refcnt++; 369 root_ht->refcnt++;
372 root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; 370 root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
373 root_ht->prio = tp->prio; 371 root_ht->prio = tp->prio;
372 idr_init(&root_ht->handle_idr);
374 373
375 if (tp_c == NULL) { 374 if (tp_c == NULL) {
376 tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); 375 tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
@@ -378,8 +377,9 @@ static int u32_init(struct tcf_proto *tp)
378 kfree(root_ht); 377 kfree(root_ht);
379 return -ENOBUFS; 378 return -ENOBUFS;
380 } 379 }
381 tp_c->q = tp->q; 380 tp_c->block = tp->chain->block;
382 INIT_HLIST_NODE(&tp_c->hnode); 381 INIT_HLIST_NODE(&tp_c->hnode);
382 idr_init(&tp_c->handle_idr);
383 383
384 h = tc_u_hash(tp); 384 h = tc_u_hash(tp);
385 hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); 385 hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
@@ -487,71 +487,69 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
487 return 0; 487 return 0;
488} 488}
489 489
490static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) 490static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
491{ 491{
492 struct net_device *dev = tp->q->dev_queue->dev; 492 struct tcf_block *block = tp->chain->block;
493 struct tc_cls_u32_offload cls_u32 = {}; 493 struct tc_cls_u32_offload cls_u32 = {};
494 494
495 if (!tc_should_offload(dev, 0))
496 return;
497
498 tc_cls_common_offload_init(&cls_u32.common, tp); 495 tc_cls_common_offload_init(&cls_u32.common, tp);
499 cls_u32.command = TC_CLSU32_DELETE_KNODE; 496 cls_u32.command = TC_CLSU32_DELETE_HNODE;
500 cls_u32.knode.handle = handle; 497 cls_u32.hnode.divisor = h->divisor;
498 cls_u32.hnode.handle = h->handle;
499 cls_u32.hnode.prio = h->prio;
501 500
502 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); 501 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
503} 502}
504 503
505static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, 504static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
506 u32 flags) 505 u32 flags)
507{ 506{
508 struct net_device *dev = tp->q->dev_queue->dev; 507 struct tcf_block *block = tp->chain->block;
509 struct tc_cls_u32_offload cls_u32 = {}; 508 struct tc_cls_u32_offload cls_u32 = {};
509 bool skip_sw = tc_skip_sw(flags);
510 bool offloaded = false;
510 int err; 511 int err;
511 512
512 if (!tc_should_offload(dev, flags))
513 return tc_skip_sw(flags) ? -EINVAL : 0;
514
515 tc_cls_common_offload_init(&cls_u32.common, tp); 513 tc_cls_common_offload_init(&cls_u32.common, tp);
516 cls_u32.command = TC_CLSU32_NEW_HNODE; 514 cls_u32.command = TC_CLSU32_NEW_HNODE;
517 cls_u32.hnode.divisor = h->divisor; 515 cls_u32.hnode.divisor = h->divisor;
518 cls_u32.hnode.handle = h->handle; 516 cls_u32.hnode.handle = h->handle;
519 cls_u32.hnode.prio = h->prio; 517 cls_u32.hnode.prio = h->prio;
520 518
521 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); 519 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
522 if (tc_skip_sw(flags)) 520 if (err < 0) {
521 u32_clear_hw_hnode(tp, h);
523 return err; 522 return err;
523 } else if (err > 0) {
524 offloaded = true;
525 }
526
527 if (skip_sw && !offloaded)
528 return -EINVAL;
524 529
525 return 0; 530 return 0;
526} 531}
527 532
528static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) 533static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
529{ 534{
530 struct net_device *dev = tp->q->dev_queue->dev; 535 struct tcf_block *block = tp->chain->block;
531 struct tc_cls_u32_offload cls_u32 = {}; 536 struct tc_cls_u32_offload cls_u32 = {};
532 537
533 if (!tc_should_offload(dev, 0))
534 return;
535
536 tc_cls_common_offload_init(&cls_u32.common, tp); 538 tc_cls_common_offload_init(&cls_u32.common, tp);
537 cls_u32.command = TC_CLSU32_DELETE_HNODE; 539 cls_u32.command = TC_CLSU32_DELETE_KNODE;
538 cls_u32.hnode.divisor = h->divisor; 540 cls_u32.knode.handle = handle;
539 cls_u32.hnode.handle = h->handle;
540 cls_u32.hnode.prio = h->prio;
541 541
542 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); 542 tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
543} 543}
544 544
545static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, 545static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
546 u32 flags) 546 u32 flags)
547{ 547{
548 struct net_device *dev = tp->q->dev_queue->dev; 548 struct tcf_block *block = tp->chain->block;
549 struct tc_cls_u32_offload cls_u32 = {}; 549 struct tc_cls_u32_offload cls_u32 = {};
550 bool skip_sw = tc_skip_sw(flags);
550 int err; 551 int err;
551 552
552 if (!tc_should_offload(dev, flags))
553 return tc_skip_sw(flags) ? -EINVAL : 0;
554
555 tc_cls_common_offload_init(&cls_u32.common, tp); 553 tc_cls_common_offload_init(&cls_u32.common, tp);
556 cls_u32.command = TC_CLSU32_REPLACE_KNODE; 554 cls_u32.command = TC_CLSU32_REPLACE_KNODE;
557 cls_u32.knode.handle = n->handle; 555 cls_u32.knode.handle = n->handle;
@@ -568,13 +566,16 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
568 if (n->ht_down) 566 if (n->ht_down)
569 cls_u32.knode.link_handle = n->ht_down->handle; 567 cls_u32.knode.link_handle = n->ht_down->handle;
570 568
571 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32); 569 err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
572 570 if (err < 0) {
573 if (!err) 571 u32_remove_hw_knode(tp, n->handle);
572 return err;
573 } else if (err > 0) {
574 n->flags |= TCA_CLS_FLAGS_IN_HW; 574 n->flags |= TCA_CLS_FLAGS_IN_HW;
575 }
575 576
576 if (tc_skip_sw(flags)) 577 if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
577 return err; 578 return -EINVAL;
578 579
579 return 0; 580 return 0;
580} 581}
@@ -590,6 +591,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
590 rtnl_dereference(n->next)); 591 rtnl_dereference(n->next));
591 tcf_unbind_filter(tp, &n->res); 592 tcf_unbind_filter(tp, &n->res);
592 u32_remove_hw_knode(tp, n->handle); 593 u32_remove_hw_knode(tp, n->handle);
594 idr_remove_ext(&ht->handle_idr, n->handle);
593 if (tcf_exts_get_net(&n->exts)) 595 if (tcf_exts_get_net(&n->exts))
594 call_rcu(&n->rcu, u32_delete_key_freepf_rcu); 596 call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
595 else 597 else
@@ -614,6 +616,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
614 hn = &phn->next, phn = rtnl_dereference(*hn)) { 616 hn = &phn->next, phn = rtnl_dereference(*hn)) {
615 if (phn == ht) { 617 if (phn == ht) {
616 u32_clear_hw_hnode(tp, ht); 618 u32_clear_hw_hnode(tp, ht);
619 idr_destroy(&ht->handle_idr);
620 idr_remove_ext(&tp_c->handle_idr, ht->handle);
617 RCU_INIT_POINTER(*hn, ht->next); 621 RCU_INIT_POINTER(*hn, ht->next);
618 kfree_rcu(ht, rcu); 622 kfree_rcu(ht, rcu);
619 return 0; 623 return 0;
@@ -661,6 +665,7 @@ static void u32_destroy(struct tcf_proto *tp)
661 kfree_rcu(ht, rcu); 665 kfree_rcu(ht, rcu);
662 } 666 }
663 667
668 idr_destroy(&tp_c->handle_idr);
664 kfree(tp_c); 669 kfree(tp_c);
665 } 670 }
666 671
@@ -729,27 +734,21 @@ ret:
729 return ret; 734 return ret;
730} 735}
731 736
732#define NR_U32_NODE (1<<12) 737static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
733static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
734{ 738{
735 struct tc_u_knode *n; 739 unsigned long idr_index;
736 unsigned long i; 740 u32 start = htid | 0x800;
737 unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long), 741 u32 max = htid | 0xFFF;
738 GFP_KERNEL); 742 u32 min = htid;
739 if (!bitmap) 743
740 return handle | 0xFFF; 744 if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
741 745 start, max + 1, GFP_KERNEL)) {
742 for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]); 746 if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
743 n; 747 min + 1, max + 1, GFP_KERNEL))
744 n = rtnl_dereference(n->next)) 748 return max;
745 set_bit(TC_U32_NODE(n->handle), bitmap); 749 }
746
747 i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
748 if (i >= NR_U32_NODE)
749 i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
750 750
751 kfree(bitmap); 751 return (u32)idr_index;
752 return handle | (i >= NR_U32_NODE ? 0xFFF : i);
753} 752}
754 753
755static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { 754static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -834,6 +833,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
834 if (pins->handle == n->handle) 833 if (pins->handle == n->handle)
835 break; 834 break;
836 835
836 idr_replace_ext(&ht->handle_idr, n, n->handle);
837 RCU_INIT_POINTER(n->next, pins->next); 837 RCU_INIT_POINTER(n->next, pins->next);
838 rcu_assign_pointer(*ins, n); 838 rcu_assign_pointer(*ins, n);
839} 839}
@@ -966,22 +966,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
966 return -EINVAL; 966 return -EINVAL;
967 if (TC_U32_KEY(handle)) 967 if (TC_U32_KEY(handle))
968 return -EINVAL; 968 return -EINVAL;
969 if (handle == 0) {
970 handle = gen_new_htid(tp->data);
971 if (handle == 0)
972 return -ENOMEM;
973 }
974 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); 969 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
975 if (ht == NULL) 970 if (ht == NULL)
976 return -ENOBUFS; 971 return -ENOBUFS;
972 if (handle == 0) {
973 handle = gen_new_htid(tp->data, ht);
974 if (handle == 0) {
975 kfree(ht);
976 return -ENOMEM;
977 }
978 } else {
979 err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
980 handle, handle + 1, GFP_KERNEL);
981 if (err) {
982 kfree(ht);
983 return err;
984 }
985 }
977 ht->tp_c = tp_c; 986 ht->tp_c = tp_c;
978 ht->refcnt = 1; 987 ht->refcnt = 1;
979 ht->divisor = divisor; 988 ht->divisor = divisor;
980 ht->handle = handle; 989 ht->handle = handle;
981 ht->prio = tp->prio; 990 ht->prio = tp->prio;
991 idr_init(&ht->handle_idr);
982 992
983 err = u32_replace_hw_hnode(tp, ht, flags); 993 err = u32_replace_hw_hnode(tp, ht, flags);
984 if (err) { 994 if (err) {
995 idr_remove_ext(&tp_c->handle_idr, handle);
985 kfree(ht); 996 kfree(ht);
986 return err; 997 return err;
987 } 998 }
@@ -1015,24 +1026,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
1015 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) 1026 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
1016 return -EINVAL; 1027 return -EINVAL;
1017 handle = htid | TC_U32_NODE(handle); 1028 handle = htid | TC_U32_NODE(handle);
1029 err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
1030 handle, handle + 1,
1031 GFP_KERNEL);
1032 if (err)
1033 return err;
1018 } else 1034 } else
1019 handle = gen_new_kid(ht, htid); 1035 handle = gen_new_kid(ht, htid);
1020 1036
1021 if (tb[TCA_U32_SEL] == NULL) 1037 if (tb[TCA_U32_SEL] == NULL) {
1022 return -EINVAL; 1038 err = -EINVAL;
1039 goto erridr;
1040 }
1023 1041
1024 s = nla_data(tb[TCA_U32_SEL]); 1042 s = nla_data(tb[TCA_U32_SEL]);
1025 1043
1026 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); 1044 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
1027 if (n == NULL) 1045 if (n == NULL) {
1028 return -ENOBUFS; 1046 err = -ENOBUFS;
1047 goto erridr;
1048 }
1029 1049
1030#ifdef CONFIG_CLS_U32_PERF 1050#ifdef CONFIG_CLS_U32_PERF
1031 size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); 1051 size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
1032 n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); 1052 n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
1033 if (!n->pf) { 1053 if (!n->pf) {
1034 kfree(n); 1054 err = -ENOBUFS;
1035 return -ENOBUFS; 1055 goto errfree;
1036 } 1056 }
1037#endif 1057#endif
1038 1058
@@ -1095,9 +1115,12 @@ errhw:
1095errout: 1115errout:
1096 tcf_exts_destroy(&n->exts); 1116 tcf_exts_destroy(&n->exts);
1097#ifdef CONFIG_CLS_U32_PERF 1117#ifdef CONFIG_CLS_U32_PERF
1118errfree:
1098 free_percpu(n->pf); 1119 free_percpu(n->pf);
1099#endif 1120#endif
1100 kfree(n); 1121 kfree(n);
1122erridr:
1123 idr_remove_ext(&ht->handle_idr, handle);
1101 return err; 1124 return err;
1102} 1125}
1103 1126
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 03b677bc0700..1331a4c2d8ff 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
178 struct tcf_ematch_hdr *em_hdr = nla_data(nla); 178 struct tcf_ematch_hdr *em_hdr = nla_data(nla);
179 int data_len = nla_len(nla) - sizeof(*em_hdr); 179 int data_len = nla_len(nla) - sizeof(*em_hdr);
180 void *data = (void *) em_hdr + sizeof(*em_hdr); 180 void *data = (void *) em_hdr + sizeof(*em_hdr);
181 struct net *net = dev_net(qdisc_dev(tp->q)); 181 struct net *net = tp->chain->block->net;
182 182
183 if (!TCF_EM_REL_VALID(em_hdr->flags)) 183 if (!TCF_EM_REL_VALID(em_hdr->flags))
184 goto errout; 184 goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 22bc6fc48311..b6c4f536876b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1502,7 +1502,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1502 int s_idx, s_q_idx; 1502 int s_idx, s_q_idx;
1503 struct net_device *dev; 1503 struct net_device *dev;
1504 const struct nlmsghdr *nlh = cb->nlh; 1504 const struct nlmsghdr *nlh = cb->nlh;
1505 struct tcmsg *tcm = nlmsg_data(nlh);
1506 struct nlattr *tca[TCA_MAX + 1]; 1505 struct nlattr *tca[TCA_MAX + 1];
1507 int err; 1506 int err;
1508 1507
@@ -1512,7 +1511,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1512 idx = 0; 1511 idx = 0;
1513 ASSERT_RTNL(); 1512 ASSERT_RTNL();
1514 1513
1515 err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); 1514 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
1516 if (err < 0) 1515 if (err < 0)
1517 return err; 1516 return err;
1518 1517
@@ -1664,9 +1663,11 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1664 struct tcf_bind_args *a = (void *)arg; 1663 struct tcf_bind_args *a = (void *)arg;
1665 1664
1666 if (tp->ops->bind_class) { 1665 if (tp->ops->bind_class) {
1667 tcf_tree_lock(tp); 1666 struct Qdisc *q = tcf_block_q(tp->chain->block);
1667
1668 sch_tree_lock(q);
1668 tp->ops->bind_class(n, a->classid, a->cl); 1669 tp->ops->bind_class(n, a->classid, a->cl);
1669 tcf_tree_unlock(tp); 1670 sch_tree_unlock(q);
1670 } 1671 }
1671 return 0; 1672 return 0;
1672} 1673}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index c5fcdf1a58a0..2dbd249c0b2f 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -281,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
281 goto err_out; 281 goto err_out;
282 } 282 }
283 283
284 error = tcf_block_get(&flow->block, &flow->filter_list); 284 error = tcf_block_get(&flow->block, &flow->filter_list, sch);
285 if (error) { 285 if (error) {
286 kfree(flow); 286 kfree(flow);
287 goto err_out; 287 goto err_out;
@@ -546,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
546 p->link.q = &noop_qdisc; 546 p->link.q = &noop_qdisc;
547 pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); 547 pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
548 548
549 err = tcf_block_get(&p->link.block, &p->link.filter_list); 549 err = tcf_block_get(&p->link.block, &p->link.filter_list, sch);
550 if (err) 550 if (err)
551 return err; 551 return err;
552 552
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index dcef97fa8047..6361be7881f1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -255,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
255 case TC_ACT_STOLEN: 255 case TC_ACT_STOLEN:
256 case TC_ACT_TRAP: 256 case TC_ACT_TRAP:
257 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 257 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
258 /* fall through */
258 case TC_ACT_SHOT: 259 case TC_ACT_SHOT:
259 return NULL; 260 return NULL;
260 case TC_ACT_RECLASSIFY: 261 case TC_ACT_RECLASSIFY:
@@ -1566,7 +1567,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1566 if (cl == NULL) 1567 if (cl == NULL)
1567 goto failure; 1568 goto failure;
1568 1569
1569 err = tcf_block_get(&cl->block, &cl->filter_list); 1570 err = tcf_block_get(&cl->block, &cl->filter_list, sch);
1570 if (err) { 1571 if (err) {
1571 kfree(cl); 1572 kfree(cl);
1572 return err; 1573 return err;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
new file mode 100644
index 000000000000..7a72980c1509
--- /dev/null
+++ b/net/sched/sch_cbs.c
@@ -0,0 +1,373 @@
1/*
2 * net/sched/sch_cbs.c Credit Based Shaper
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
10 *
11 */
12
13/* Credit Based Shaper (CBS)
14 * =========================
15 *
16 * This is a simple rate-limiting shaper aimed at TSN applications on
17 * systems with known traffic workloads.
18 *
19 * Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
20 * Section 8.6.8.2, and explained in more detail in the Annex L of the
21 * same specification.
22 *
23 * There are four tunables to be considered:
24 *
25 * 'idleslope': Idleslope is the rate of credits that is
26 * accumulated (in kilobits per second) when there is at least
27 * one packet waiting for transmission. Packets are transmitted
28 * when the current value of credits is equal or greater than
29 * zero. When there is no packet to be transmitted the amount of
30 * credits is set to zero. This is the main tunable of the CBS
31 * algorithm.
32 *
33 * 'sendslope':
34 * Sendslope is the rate of credits that is depleted (it should be a
35 * negative number of kilobits per second) when a transmission is
36 * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
37 * 8.6.8.2 item g):
38 *
39 * sendslope = idleslope - port_transmit_rate
40 *
41 * 'hicredit': Hicredit defines the maximum amount of credits (in
42 * bytes) that can be accumulated. Hicredit depends on the
43 * characteristics of interfering traffic,
44 * 'max_interference_size' is the maximum size of any burst of
45 * traffic that can delay the transmission of a frame that is
46 * available for transmission for this traffic class, (IEEE
47 * 802.1Q-2014 Annex L, Equation L-3):
48 *
49 * hicredit = max_interference_size * (idleslope / port_transmit_rate)
50 *
51 * 'locredit': Locredit is the minimum amount of credits that can
52 * be reached. It is a function of the traffic flowing through
53 * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
54 *
55 * locredit = max_frame_size * (sendslope / port_transmit_rate)
56 */
57
58#include <linux/module.h>
59#include <linux/types.h>
60#include <linux/kernel.h>
61#include <linux/string.h>
62#include <linux/errno.h>
63#include <linux/skbuff.h>
64#include <net/netlink.h>
65#include <net/sch_generic.h>
66#include <net/pkt_sched.h>
67
68#define BYTES_PER_KBIT (1000LL / 8)
69
70struct cbs_sched_data {
71 bool offload;
72 int queue;
73 s64 port_rate; /* in bytes/s */
74 s64 last; /* timestamp in ns */
75 s64 credits; /* in bytes */
76 s32 locredit; /* in bytes */
77 s32 hicredit; /* in bytes */
78 s64 sendslope; /* in bytes/s */
79 s64 idleslope; /* in bytes/s */
80 struct qdisc_watchdog watchdog;
81 int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
82 struct sk_buff *(*dequeue)(struct Qdisc *sch);
83};
84
85static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch)
86{
87 return qdisc_enqueue_tail(skb, sch);
88}
89
90static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
91{
92 struct cbs_sched_data *q = qdisc_priv(sch);
93
94 if (sch->q.qlen == 0 && q->credits > 0) {
95 /* We need to stop accumulating credits when there's
96 * no enqueued packets and q->credits is positive.
97 */
98 q->credits = 0;
99 q->last = ktime_get_ns();
100 }
101
102 return qdisc_enqueue_tail(skb, sch);
103}
104
105static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
106 struct sk_buff **to_free)
107{
108 struct cbs_sched_data *q = qdisc_priv(sch);
109
110 return q->enqueue(skb, sch);
111}
112
113/* timediff is in ns, slope is in bytes/s */
114static s64 timediff_to_credits(s64 timediff, s64 slope)
115{
116 return div64_s64(timediff * slope, NSEC_PER_SEC);
117}
118
119static s64 delay_from_credits(s64 credits, s64 slope)
120{
121 if (unlikely(slope == 0))
122 return S64_MAX;
123
124 return div64_s64(-credits * NSEC_PER_SEC, slope);
125}
126
127static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
128{
129 if (unlikely(port_rate == 0))
130 return S64_MAX;
131
132 return div64_s64(len * slope, port_rate);
133}
134
135static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
136{
137 struct cbs_sched_data *q = qdisc_priv(sch);
138 s64 now = ktime_get_ns();
139 struct sk_buff *skb;
140 s64 credits;
141 int len;
142
143 if (q->credits < 0) {
144 credits = timediff_to_credits(now - q->last, q->idleslope);
145
146 credits = q->credits + credits;
147 q->credits = min_t(s64, credits, q->hicredit);
148
149 if (q->credits < 0) {
150 s64 delay;
151
152 delay = delay_from_credits(q->credits, q->idleslope);
153 qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
154
155 q->last = now;
156
157 return NULL;
158 }
159 }
160
161 skb = qdisc_dequeue_head(sch);
162 if (!skb)
163 return NULL;
164
165 len = qdisc_pkt_len(skb);
166
167 /* As sendslope is a negative number, this will decrease the
168 * amount of q->credits.
169 */
170 credits = credits_from_len(len, q->sendslope, q->port_rate);
171 credits += q->credits;
172
173 q->credits = max_t(s64, credits, q->locredit);
174 q->last = now;
175
176 return skb;
177}
178
179static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
180{
181 return qdisc_dequeue_head(sch);
182}
183
184static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
185{
186 struct cbs_sched_data *q = qdisc_priv(sch);
187
188 return q->dequeue(sch);
189}
190
191static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
192 [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
193};
194
195static void cbs_disable_offload(struct net_device *dev,
196 struct cbs_sched_data *q)
197{
198 struct tc_cbs_qopt_offload cbs = { };
199 const struct net_device_ops *ops;
200 int err;
201
202 if (!q->offload)
203 return;
204
205 q->enqueue = cbs_enqueue_soft;
206 q->dequeue = cbs_dequeue_soft;
207
208 ops = dev->netdev_ops;
209 if (!ops->ndo_setup_tc)
210 return;
211
212 cbs.queue = q->queue;
213 cbs.enable = 0;
214
215 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
216 if (err < 0)
217 pr_warn("Couldn't disable CBS offload for queue %d\n",
218 cbs.queue);
219}
220
221static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
222 const struct tc_cbs_qopt *opt)
223{
224 const struct net_device_ops *ops = dev->netdev_ops;
225 struct tc_cbs_qopt_offload cbs = { };
226 int err;
227
228 if (!ops->ndo_setup_tc)
229 return -EOPNOTSUPP;
230
231 cbs.queue = q->queue;
232
233 cbs.enable = 1;
234 cbs.hicredit = opt->hicredit;
235 cbs.locredit = opt->locredit;
236 cbs.idleslope = opt->idleslope;
237 cbs.sendslope = opt->sendslope;
238
239 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
240 if (err < 0)
241 return err;
242
243 q->enqueue = cbs_enqueue_offload;
244 q->dequeue = cbs_dequeue_offload;
245
246 return 0;
247}
248
249static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
250{
251 struct cbs_sched_data *q = qdisc_priv(sch);
252 struct net_device *dev = qdisc_dev(sch);
253 struct nlattr *tb[TCA_CBS_MAX + 1];
254 struct tc_cbs_qopt *qopt;
255 int err;
256
257 err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
258 if (err < 0)
259 return err;
260
261 if (!tb[TCA_CBS_PARMS])
262 return -EINVAL;
263
264 qopt = nla_data(tb[TCA_CBS_PARMS]);
265
266 if (!qopt->offload) {
267 struct ethtool_link_ksettings ecmd;
268 s64 link_speed;
269
270 if (!__ethtool_get_link_ksettings(dev, &ecmd))
271 link_speed = ecmd.base.speed;
272 else
273 link_speed = SPEED_1000;
274
275 q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
276
277 cbs_disable_offload(dev, q);
278 } else {
279 err = cbs_enable_offload(dev, q, qopt);
280 if (err < 0)
281 return err;
282 }
283
284 /* Everything went OK, save the parameters used. */
285 q->hicredit = qopt->hicredit;
286 q->locredit = qopt->locredit;
287 q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
288 q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
289 q->offload = qopt->offload;
290
291 return 0;
292}
293
294static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
295{
296 struct cbs_sched_data *q = qdisc_priv(sch);
297 struct net_device *dev = qdisc_dev(sch);
298
299 if (!opt)
300 return -EINVAL;
301
302 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
303
304 q->enqueue = cbs_enqueue_soft;
305 q->dequeue = cbs_dequeue_soft;
306
307 qdisc_watchdog_init(&q->watchdog, sch);
308
309 return cbs_change(sch, opt);
310}
311
312static void cbs_destroy(struct Qdisc *sch)
313{
314 struct cbs_sched_data *q = qdisc_priv(sch);
315 struct net_device *dev = qdisc_dev(sch);
316
317 qdisc_watchdog_cancel(&q->watchdog);
318
319 cbs_disable_offload(dev, q);
320}
321
322static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
323{
324 struct cbs_sched_data *q = qdisc_priv(sch);
325 struct tc_cbs_qopt opt = { };
326 struct nlattr *nest;
327
328 nest = nla_nest_start(skb, TCA_OPTIONS);
329 if (!nest)
330 goto nla_put_failure;
331
332 opt.hicredit = q->hicredit;
333 opt.locredit = q->locredit;
334 opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
335 opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
336 opt.offload = q->offload;
337
338 if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
339 goto nla_put_failure;
340
341 return nla_nest_end(skb, nest);
342
343nla_put_failure:
344 nla_nest_cancel(skb, nest);
345 return -1;
346}
347
348static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
349 .id = "cbs",
350 .priv_size = sizeof(struct cbs_sched_data),
351 .enqueue = cbs_enqueue,
352 .dequeue = cbs_dequeue,
353 .peek = qdisc_peek_dequeued,
354 .init = cbs_init,
355 .reset = qdisc_reset_queue,
356 .destroy = cbs_destroy,
357 .change = cbs_change,
358 .dump = cbs_dump,
359 .owner = THIS_MODULE,
360};
361
362static int __init cbs_module_init(void)
363{
364 return register_qdisc(&cbs_qdisc_ops);
365}
366
367static void __exit cbs_module_exit(void)
368{
369 unregister_qdisc(&cbs_qdisc_ops);
370}
371module_init(cbs_module_init)
372module_exit(cbs_module_exit)
373MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 2d0e8d4bdc29..5bbcef3dcd8c 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -321,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
321 case TC_ACT_STOLEN: 321 case TC_ACT_STOLEN:
322 case TC_ACT_TRAP: 322 case TC_ACT_TRAP:
323 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 323 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
324 /* fall through */
324 case TC_ACT_SHOT: 325 case TC_ACT_SHOT:
325 return NULL; 326 return NULL;
326 } 327 }
@@ -412,7 +413,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
412 struct drr_sched *q = qdisc_priv(sch); 413 struct drr_sched *q = qdisc_priv(sch);
413 int err; 414 int err;
414 415
415 err = tcf_block_get(&q->block, &q->filter_list); 416 err = tcf_block_get(&q->block, &q->filter_list, sch);
416 if (err) 417 if (err)
417 return err; 418 return err;
418 err = qdisc_class_hash_init(&q->clhash); 419 err = qdisc_class_hash_init(&q->clhash);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 2836c80c7aa5..fb4fb71c68cf 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
344 if (!opt) 344 if (!opt)
345 goto errout; 345 goto errout;
346 346
347 err = tcf_block_get(&p->block, &p->filter_list); 347 err = tcf_block_get(&p->block, &p->filter_list, sch);
348 if (err) 348 if (err)
349 return err; 349 return err;
350 350
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index de3b57ceca7b..0305d791ea94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
105 case TC_ACT_QUEUED: 105 case TC_ACT_QUEUED:
106 case TC_ACT_TRAP: 106 case TC_ACT_TRAP:
107 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 107 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
108 /* fall through */
108 case TC_ACT_SHOT: 109 case TC_ACT_SHOT:
109 return 0; 110 return 0;
110 } 111 }
@@ -481,7 +482,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
481 return err; 482 return err;
482 } 483 }
483 484
484 err = tcf_block_get(&q->block, &q->filter_list); 485 err = tcf_block_get(&q->block, &q->filter_list, sch);
485 if (err) 486 if (err)
486 return err; 487 return err;
487 488
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index bf8c81e07c70..3839cbbdc32b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -288,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev)
288} 288}
289EXPORT_SYMBOL(dev_trans_start); 289EXPORT_SYMBOL(dev_trans_start);
290 290
291static void dev_watchdog(unsigned long arg) 291static void dev_watchdog(struct timer_list *t)
292{ 292{
293 struct net_device *dev = (struct net_device *)arg; 293 struct net_device *dev = from_timer(dev, t, watchdog_timer);
294 294
295 netif_tx_lock(dev); 295 netif_tx_lock(dev);
296 if (!qdisc_tx_is_noop(dev)) { 296 if (!qdisc_tx_is_noop(dev)) {
@@ -603,8 +603,14 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
603 struct Qdisc *sch; 603 struct Qdisc *sch;
604 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; 604 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
605 int err = -ENOBUFS; 605 int err = -ENOBUFS;
606 struct net_device *dev = dev_queue->dev; 606 struct net_device *dev;
607
608 if (!dev_queue) {
609 err = -EINVAL;
610 goto errout;
611 }
607 612
613 dev = dev_queue->dev;
608 p = kzalloc_node(size, GFP_KERNEL, 614 p = kzalloc_node(size, GFP_KERNEL,
609 netdev_queue_numa_node_read(dev_queue)); 615 netdev_queue_numa_node_read(dev_queue));
610 616
@@ -689,10 +695,8 @@ void qdisc_reset(struct Qdisc *qdisc)
689} 695}
690EXPORT_SYMBOL(qdisc_reset); 696EXPORT_SYMBOL(qdisc_reset);
691 697
692static void qdisc_rcu_free(struct rcu_head *head) 698static void qdisc_free(struct Qdisc *qdisc)
693{ 699{
694 struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
695
696 if (qdisc_is_percpu_stats(qdisc)) { 700 if (qdisc_is_percpu_stats(qdisc)) {
697 free_percpu(qdisc->cpu_bstats); 701 free_percpu(qdisc->cpu_bstats);
698 free_percpu(qdisc->cpu_qstats); 702 free_percpu(qdisc->cpu_qstats);
@@ -725,11 +729,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
725 729
726 kfree_skb_list(qdisc->gso_skb); 730 kfree_skb_list(qdisc->gso_skb);
727 kfree_skb(qdisc->skb_bad_txq); 731 kfree_skb(qdisc->skb_bad_txq);
728 /* 732 qdisc_free(qdisc);
729 * gen_estimator est_timer() might access qdisc->q.lock,
730 * wait a RCU grace period before freeing qdisc.
731 */
732 call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
733} 733}
734EXPORT_SYMBOL(qdisc_destroy); 734EXPORT_SYMBOL(qdisc_destroy);
735 735
@@ -960,7 +960,7 @@ void dev_init_scheduler(struct net_device *dev)
960 if (dev_ingress_queue(dev)) 960 if (dev_ingress_queue(dev))
961 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); 961 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
962 962
963 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); 963 timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
964} 964}
965 965
966static void shutdown_scheduler_queue(struct net_device *dev, 966static void shutdown_scheduler_queue(struct net_device *dev,
@@ -1024,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
1024 } 1024 }
1025} 1025}
1026EXPORT_SYMBOL(psched_ratecfg_precompute); 1026EXPORT_SYMBOL(psched_ratecfg_precompute);
1027
1028static void mini_qdisc_rcu_func(struct rcu_head *head)
1029{
1030}
1031
1032void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1033 struct tcf_proto *tp_head)
1034{
1035 struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
1036 struct mini_Qdisc *miniq;
1037
1038 if (!tp_head) {
1039 RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1040 return;
1041 }
1042
1043 miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
1044 &miniqp->miniq1 : &miniqp->miniq2;
1045
1046 /* We need to make sure that readers won't see the miniq
1047 * we are about to modify. So wait until previous call_rcu_bh callback
1048 * is done.
1049 */
1050 rcu_barrier_bh();
1051 miniq->filter_list = tp_head;
1052 rcu_assign_pointer(*miniqp->p_miniq, miniq);
1053
1054 if (miniq_old)
1055 /* This is counterpart of the rcu barrier above. We need to
1056 * block potential new user of miniq_old until all readers
1057 * are not seeing it.
1058 */
1059 call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
1060}
1061EXPORT_SYMBOL(mini_qdisc_pair_swap);
1062
1063void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1064 struct mini_Qdisc __rcu **p_miniq)
1065{
1066 miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
1067 miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
1068 miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
1069 miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
1070 miniqp->p_miniq = p_miniq;
1071}
1072EXPORT_SYMBOL(mini_qdisc_pair_init);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3f88b75488b0..d04068a97d81 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1033,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1033 if (cl == NULL) 1033 if (cl == NULL)
1034 return -ENOBUFS; 1034 return -ENOBUFS;
1035 1035
1036 err = tcf_block_get(&cl->block, &cl->filter_list); 1036 err = tcf_block_get(&cl->block, &cl->filter_list, sch);
1037 if (err) { 1037 if (err) {
1038 kfree(cl); 1038 kfree(cl);
1039 return err; 1039 return err;
@@ -1144,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1144 case TC_ACT_STOLEN: 1144 case TC_ACT_STOLEN:
1145 case TC_ACT_TRAP: 1145 case TC_ACT_TRAP:
1146 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 1146 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1147 /* fall through */
1147 case TC_ACT_SHOT: 1148 case TC_ACT_SHOT:
1148 return NULL; 1149 return NULL;
1149 } 1150 }
@@ -1405,7 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1405 return err; 1406 return err;
1406 q->eligible = RB_ROOT; 1407 q->eligible = RB_ROOT;
1407 1408
1408 err = tcf_block_get(&q->root.block, &q->root.filter_list); 1409 err = tcf_block_get(&q->root.block, &q->root.filter_list, sch);
1409 if (err) 1410 if (err)
1410 return err; 1411 return err;
1411 1412
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7e148376ba52..fa0380730ff0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -142,6 +142,7 @@ struct htb_class {
142 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ 142 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
143 143
144 unsigned int drops ____cacheline_aligned_in_smp; 144 unsigned int drops ____cacheline_aligned_in_smp;
145 unsigned int overlimits;
145}; 146};
146 147
147struct htb_level { 148struct htb_level {
@@ -243,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
243 case TC_ACT_STOLEN: 244 case TC_ACT_STOLEN:
244 case TC_ACT_TRAP: 245 case TC_ACT_TRAP:
245 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 246 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
247 /* fall through */
246 case TC_ACT_SHOT: 248 case TC_ACT_SHOT:
247 return NULL; 249 return NULL;
248 } 250 }
@@ -533,6 +535,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
533 if (new_mode == cl->cmode) 535 if (new_mode == cl->cmode)
534 return; 536 return;
535 537
538 if (new_mode == HTB_CANT_SEND)
539 cl->overlimits++;
540
536 if (cl->prio_activity) { /* not necessary: speed optimization */ 541 if (cl->prio_activity) { /* not necessary: speed optimization */
537 if (cl->cmode != HTB_CANT_SEND) 542 if (cl->cmode != HTB_CANT_SEND)
538 htb_deactivate_prios(q, cl); 543 htb_deactivate_prios(q, cl);
@@ -1026,7 +1031,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1026 if (!opt) 1031 if (!opt)
1027 return -EINVAL; 1032 return -EINVAL;
1028 1033
1029 err = tcf_block_get(&q->block, &q->filter_list); 1034 err = tcf_block_get(&q->block, &q->filter_list, sch);
1030 if (err) 1035 if (err)
1031 return err; 1036 return err;
1032 1037
@@ -1143,6 +1148,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1143 struct htb_class *cl = (struct htb_class *)arg; 1148 struct htb_class *cl = (struct htb_class *)arg;
1144 struct gnet_stats_queue qs = { 1149 struct gnet_stats_queue qs = {
1145 .drops = cl->drops, 1150 .drops = cl->drops,
1151 .overlimits = cl->overlimits,
1146 }; 1152 };
1147 __u32 qlen = 0; 1153 __u32 qlen = 0;
1148 1154
@@ -1388,7 +1394,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1388 if (!cl) 1394 if (!cl)
1389 goto failure; 1395 goto failure;
1390 1396
1391 err = tcf_block_get(&cl->block, &cl->filter_list); 1397 err = tcf_block_get(&cl->block, &cl->filter_list, sch);
1392 if (err) { 1398 if (err) {
1393 kfree(cl); 1399 kfree(cl);
1394 goto failure; 1400 goto failure;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 44de4ee51ce9..5ecc38f35d47 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -20,6 +20,8 @@
20 20
21struct ingress_sched_data { 21struct ingress_sched_data {
22 struct tcf_block *block; 22 struct tcf_block *block;
23 struct tcf_block_ext_info block_info;
24 struct mini_Qdisc_pair miniqp;
23}; 25};
24 26
25static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) 27static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
@@ -53,13 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
53 return q->block; 55 return q->block;
54} 56}
55 57
58static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
59{
60 struct mini_Qdisc_pair *miniqp = priv;
61
62 mini_qdisc_pair_swap(miniqp, tp_head);
63}
64
56static int ingress_init(struct Qdisc *sch, struct nlattr *opt) 65static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
57{ 66{
58 struct ingress_sched_data *q = qdisc_priv(sch); 67 struct ingress_sched_data *q = qdisc_priv(sch);
59 struct net_device *dev = qdisc_dev(sch); 68 struct net_device *dev = qdisc_dev(sch);
60 int err; 69 int err;
61 70
62 err = tcf_block_get(&q->block, &dev->ingress_cl_list); 71 mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
72
73 q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
74 q->block_info.chain_head_change = clsact_chain_head_change;
75 q->block_info.chain_head_change_priv = &q->miniqp;
76
77 err = tcf_block_get_ext(&q->block, sch, &q->block_info);
63 if (err) 78 if (err)
64 return err; 79 return err;
65 80
@@ -73,7 +88,7 @@ static void ingress_destroy(struct Qdisc *sch)
73{ 88{
74 struct ingress_sched_data *q = qdisc_priv(sch); 89 struct ingress_sched_data *q = qdisc_priv(sch);
75 90
76 tcf_block_put(q->block); 91 tcf_block_put_ext(q->block, sch, &q->block_info);
77 net_dec_ingress_queue(); 92 net_dec_ingress_queue();
78} 93}
79 94
@@ -114,6 +129,10 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
114struct clsact_sched_data { 129struct clsact_sched_data {
115 struct tcf_block *ingress_block; 130 struct tcf_block *ingress_block;
116 struct tcf_block *egress_block; 131 struct tcf_block *egress_block;
132 struct tcf_block_ext_info ingress_block_info;
133 struct tcf_block_ext_info egress_block_info;
134 struct mini_Qdisc_pair miniqp_ingress;
135 struct mini_Qdisc_pair miniqp_egress;
117}; 136};
118 137
119static unsigned long clsact_find(struct Qdisc *sch, u32 classid) 138static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
@@ -153,13 +172,25 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
153 struct net_device *dev = qdisc_dev(sch); 172 struct net_device *dev = qdisc_dev(sch);
154 int err; 173 int err;
155 174
156 err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list); 175 mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
176
177 q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
178 q->ingress_block_info.chain_head_change = clsact_chain_head_change;
179 q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
180
181 err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info);
157 if (err) 182 if (err)
158 return err; 183 return err;
159 184
160 err = tcf_block_get(&q->egress_block, &dev->egress_cl_list); 185 mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
186
187 q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
188 q->egress_block_info.chain_head_change = clsact_chain_head_change;
189 q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
190
191 err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
161 if (err) 192 if (err)
162 return err; 193 goto err_egress_block_get;
163 194
164 net_inc_ingress_queue(); 195 net_inc_ingress_queue();
165 net_inc_egress_queue(); 196 net_inc_egress_queue();
@@ -167,14 +198,18 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
167 sch->flags |= TCQ_F_CPUSTATS; 198 sch->flags |= TCQ_F_CPUSTATS;
168 199
169 return 0; 200 return 0;
201
202err_egress_block_get:
203 tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
204 return err;
170} 205}
171 206
172static void clsact_destroy(struct Qdisc *sch) 207static void clsact_destroy(struct Qdisc *sch)
173{ 208{
174 struct clsact_sched_data *q = qdisc_priv(sch); 209 struct clsact_sched_data *q = qdisc_priv(sch);
175 210
176 tcf_block_put(q->egress_block); 211 tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
177 tcf_block_put(q->ingress_block); 212 tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
178 213
179 net_dec_ingress_queue(); 214 net_dec_ingress_queue();
180 net_dec_egress_queue(); 215 net_dec_egress_queue();
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3a3e507422b..213b586a06a0 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -130,15 +130,7 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
130static struct netdev_queue *mq_select_queue(struct Qdisc *sch, 130static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
131 struct tcmsg *tcm) 131 struct tcmsg *tcm)
132{ 132{
133 unsigned int ntx = TC_H_MIN(tcm->tcm_parent); 133 return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
134 struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
135
136 if (!dev_queue) {
137 struct net_device *dev = qdisc_dev(sch);
138
139 return netdev_get_tx_queue(dev, 0);
140 }
141 return dev_queue;
142} 134}
143 135
144static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, 136static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6bcdfe6e7b63..b85885a9d8a1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -18,10 +18,16 @@
18#include <net/netlink.h> 18#include <net/netlink.h>
19#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
20#include <net/sch_generic.h> 20#include <net/sch_generic.h>
21#include <net/pkt_cls.h>
21 22
22struct mqprio_sched { 23struct mqprio_sched {
23 struct Qdisc **qdiscs; 24 struct Qdisc **qdiscs;
25 u16 mode;
26 u16 shaper;
24 int hw_offload; 27 int hw_offload;
28 u32 flags;
29 u64 min_rate[TC_QOPT_MAX_QUEUE];
30 u64 max_rate[TC_QOPT_MAX_QUEUE];
25}; 31};
26 32
27static void mqprio_destroy(struct Qdisc *sch) 33static void mqprio_destroy(struct Qdisc *sch)
@@ -39,9 +45,18 @@ static void mqprio_destroy(struct Qdisc *sch)
39 } 45 }
40 46
41 if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { 47 if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
42 struct tc_mqprio_qopt mqprio = {}; 48 struct tc_mqprio_qopt_offload mqprio = { { 0 } };
43 49
44 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio); 50 switch (priv->mode) {
51 case TC_MQPRIO_MODE_DCB:
52 case TC_MQPRIO_MODE_CHANNEL:
53 dev->netdev_ops->ndo_setup_tc(dev,
54 TC_SETUP_QDISC_MQPRIO,
55 &mqprio);
56 break;
57 default:
58 return;
59 }
45 } else { 60 } else {
46 netdev_set_num_tc(dev, 0); 61 netdev_set_num_tc(dev, 0);
47 } 62 }
@@ -97,6 +112,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
97 return 0; 112 return 0;
98} 113}
99 114
115static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
116 [TCA_MQPRIO_MODE] = { .len = sizeof(u16) },
117 [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) },
118 [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED },
119 [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED },
120};
121
122static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
123 const struct nla_policy *policy, int len)
124{
125 int nested_len = nla_len(nla) - NLA_ALIGN(len);
126
127 if (nested_len >= nla_attr_size(0))
128 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
129 nested_len, policy, NULL);
130
131 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
132 return 0;
133}
134
100static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) 135static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
101{ 136{
102 struct net_device *dev = qdisc_dev(sch); 137 struct net_device *dev = qdisc_dev(sch);
@@ -105,6 +140,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
105 struct Qdisc *qdisc; 140 struct Qdisc *qdisc;
106 int i, err = -EOPNOTSUPP; 141 int i, err = -EOPNOTSUPP;
107 struct tc_mqprio_qopt *qopt = NULL; 142 struct tc_mqprio_qopt *qopt = NULL;
143 struct nlattr *tb[TCA_MQPRIO_MAX + 1];
144 struct nlattr *attr;
145 int rem;
146 int len;
108 147
109 BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); 148 BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
110 BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); 149 BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
@@ -115,6 +154,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
115 if (!netif_is_multiqueue(dev)) 154 if (!netif_is_multiqueue(dev))
116 return -EOPNOTSUPP; 155 return -EOPNOTSUPP;
117 156
157 /* make certain can allocate enough classids to handle queues */
158 if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
159 return -ENOMEM;
160
118 if (!opt || nla_len(opt) < sizeof(*qopt)) 161 if (!opt || nla_len(opt) < sizeof(*qopt))
119 return -EINVAL; 162 return -EINVAL;
120 163
@@ -122,6 +165,59 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
122 if (mqprio_parse_opt(dev, qopt)) 165 if (mqprio_parse_opt(dev, qopt))
123 return -EINVAL; 166 return -EINVAL;
124 167
168 len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
169 if (len > 0) {
170 err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
171 sizeof(*qopt));
172 if (err < 0)
173 return err;
174
175 if (!qopt->hw)
176 return -EINVAL;
177
178 if (tb[TCA_MQPRIO_MODE]) {
179 priv->flags |= TC_MQPRIO_F_MODE;
180 priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
181 }
182
183 if (tb[TCA_MQPRIO_SHAPER]) {
184 priv->flags |= TC_MQPRIO_F_SHAPER;
185 priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
186 }
187
188 if (tb[TCA_MQPRIO_MIN_RATE64]) {
189 if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
190 return -EINVAL;
191 i = 0;
192 nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
193 rem) {
194 if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
195 return -EINVAL;
196 if (i >= qopt->num_tc)
197 break;
198 priv->min_rate[i] = *(u64 *)nla_data(attr);
199 i++;
200 }
201 priv->flags |= TC_MQPRIO_F_MIN_RATE;
202 }
203
204 if (tb[TCA_MQPRIO_MAX_RATE64]) {
205 if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
206 return -EINVAL;
207 i = 0;
208 nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
209 rem) {
210 if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
211 return -EINVAL;
212 if (i >= qopt->num_tc)
213 break;
214 priv->max_rate[i] = *(u64 *)nla_data(attr);
215 i++;
216 }
217 priv->flags |= TC_MQPRIO_F_MAX_RATE;
218 }
219 }
220
125 /* pre-allocate qdisc, attachment can't fail */ 221 /* pre-allocate qdisc, attachment can't fail */
126 priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), 222 priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
127 GFP_KERNEL); 223 GFP_KERNEL);
@@ -146,14 +242,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
146 * supplied and verified mapping 242 * supplied and verified mapping
147 */ 243 */
148 if (qopt->hw) { 244 if (qopt->hw) {
149 struct tc_mqprio_qopt mqprio = *qopt; 245 struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
150 246
151 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, 247 switch (priv->mode) {
248 case TC_MQPRIO_MODE_DCB:
249 if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
250 return -EINVAL;
251 break;
252 case TC_MQPRIO_MODE_CHANNEL:
253 mqprio.flags = priv->flags;
254 if (priv->flags & TC_MQPRIO_F_MODE)
255 mqprio.mode = priv->mode;
256 if (priv->flags & TC_MQPRIO_F_SHAPER)
257 mqprio.shaper = priv->shaper;
258 if (priv->flags & TC_MQPRIO_F_MIN_RATE)
259 for (i = 0; i < mqprio.qopt.num_tc; i++)
260 mqprio.min_rate[i] = priv->min_rate[i];
261 if (priv->flags & TC_MQPRIO_F_MAX_RATE)
262 for (i = 0; i < mqprio.qopt.num_tc; i++)
263 mqprio.max_rate[i] = priv->max_rate[i];
264 break;
265 default:
266 return -EINVAL;
267 }
268 err = dev->netdev_ops->ndo_setup_tc(dev,
269 TC_SETUP_QDISC_MQPRIO,
152 &mqprio); 270 &mqprio);
153 if (err) 271 if (err)
154 return err; 272 return err;
155 273
156 priv->hw_offload = mqprio.hw; 274 priv->hw_offload = mqprio.qopt.hw;
157 } else { 275 } else {
158 netdev_set_num_tc(dev, qopt->num_tc); 276 netdev_set_num_tc(dev, qopt->num_tc);
159 for (i = 0; i < qopt->num_tc; i++) 277 for (i = 0; i < qopt->num_tc; i++)
@@ -193,7 +311,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
193 unsigned long cl) 311 unsigned long cl)
194{ 312{
195 struct net_device *dev = qdisc_dev(sch); 313 struct net_device *dev = qdisc_dev(sch);
196 unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); 314 unsigned long ntx = cl - 1;
197 315
198 if (ntx >= dev->num_tx_queues) 316 if (ntx >= dev->num_tx_queues)
199 return NULL; 317 return NULL;
@@ -223,11 +341,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
223 return 0; 341 return 0;
224} 342}
225 343
344static int dump_rates(struct mqprio_sched *priv,
345 struct tc_mqprio_qopt *opt, struct sk_buff *skb)
346{
347 struct nlattr *nest;
348 int i;
349
350 if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
351 nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64);
352 if (!nest)
353 goto nla_put_failure;
354
355 for (i = 0; i < opt->num_tc; i++) {
356 if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
357 sizeof(priv->min_rate[i]),
358 &priv->min_rate[i]))
359 goto nla_put_failure;
360 }
361 nla_nest_end(skb, nest);
362 }
363
364 if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
365 nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64);
366 if (!nest)
367 goto nla_put_failure;
368
369 for (i = 0; i < opt->num_tc; i++) {
370 if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
371 sizeof(priv->max_rate[i]),
372 &priv->max_rate[i]))
373 goto nla_put_failure;
374 }
375 nla_nest_end(skb, nest);
376 }
377 return 0;
378
379nla_put_failure:
380 nla_nest_cancel(skb, nest);
381 return -1;
382}
383
226static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) 384static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
227{ 385{
228 struct net_device *dev = qdisc_dev(sch); 386 struct net_device *dev = qdisc_dev(sch);
229 struct mqprio_sched *priv = qdisc_priv(sch); 387 struct mqprio_sched *priv = qdisc_priv(sch);
230 unsigned char *b = skb_tail_pointer(skb); 388 struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
231 struct tc_mqprio_qopt opt = { 0 }; 389 struct tc_mqprio_qopt opt = { 0 };
232 struct Qdisc *qdisc; 390 struct Qdisc *qdisc;
233 unsigned int i; 391 unsigned int i;
@@ -258,12 +416,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
258 opt.offset[i] = dev->tc_to_txq[i].offset; 416 opt.offset[i] = dev->tc_to_txq[i].offset;
259 } 417 }
260 418
261 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) 419 if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
420 goto nla_put_failure;
421
422 if ((priv->flags & TC_MQPRIO_F_MODE) &&
423 nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
262 goto nla_put_failure; 424 goto nla_put_failure;
263 425
264 return skb->len; 426 if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
427 nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
428 goto nla_put_failure;
429
430 if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
431 priv->flags & TC_MQPRIO_F_MAX_RATE) &&
432 (dump_rates(priv, &opt, skb) != 0))
433 goto nla_put_failure;
434
435 return nla_nest_end(skb, nla);
265nla_put_failure: 436nla_put_failure:
266 nlmsg_trim(skb, b); 437 nlmsg_trim(skb, nla);
267 return -1; 438 return -1;
268} 439}
269 440
@@ -282,38 +453,35 @@ static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
282 struct net_device *dev = qdisc_dev(sch); 453 struct net_device *dev = qdisc_dev(sch);
283 unsigned int ntx = TC_H_MIN(classid); 454 unsigned int ntx = TC_H_MIN(classid);
284 455
285 if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) 456 /* There are essentially two regions here that have valid classid
286 return 0; 457 * values. The first region will have a classid value of 1 through
287 return ntx; 458 * num_tx_queues. All of these are backed by actual Qdiscs.
459 */
460 if (ntx < TC_H_MIN_PRIORITY)
461 return (ntx <= dev->num_tx_queues) ? ntx : 0;
462
463 /* The second region represents the hardware traffic classes. These
464 * are represented by classid values of TC_H_MIN_PRIORITY through
465 * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
466 */
467 return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
288} 468}
289 469
290static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, 470static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
291 struct sk_buff *skb, struct tcmsg *tcm) 471 struct sk_buff *skb, struct tcmsg *tcm)
292{ 472{
293 struct net_device *dev = qdisc_dev(sch); 473 if (cl < TC_H_MIN_PRIORITY) {
474 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
475 struct net_device *dev = qdisc_dev(sch);
476 int tc = netdev_txq_to_tc(dev, cl - 1);
294 477
295 if (cl <= netdev_get_num_tc(dev)) { 478 tcm->tcm_parent = (tc < 0) ? 0 :
479 TC_H_MAKE(TC_H_MAJ(sch->handle),
480 TC_H_MIN(tc + TC_H_MIN_PRIORITY));
481 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
482 } else {
296 tcm->tcm_parent = TC_H_ROOT; 483 tcm->tcm_parent = TC_H_ROOT;
297 tcm->tcm_info = 0; 484 tcm->tcm_info = 0;
298 } else {
299 int i;
300 struct netdev_queue *dev_queue;
301
302 dev_queue = mqprio_queue_get(sch, cl);
303 tcm->tcm_parent = 0;
304 for (i = 0; i < netdev_get_num_tc(dev); i++) {
305 struct netdev_tc_txq tc = dev->tc_to_txq[i];
306 int q_idx = cl - netdev_get_num_tc(dev);
307
308 if (q_idx > tc.offset &&
309 q_idx <= tc.offset + tc.count) {
310 tcm->tcm_parent =
311 TC_H_MAKE(TC_H_MAJ(sch->handle),
312 TC_H_MIN(i + 1));
313 break;
314 }
315 }
316 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
317 } 485 }
318 tcm->tcm_handle |= TC_H_MIN(cl); 486 tcm->tcm_handle |= TC_H_MIN(cl);
319 return 0; 487 return 0;
@@ -324,15 +492,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
324 __releases(d->lock) 492 __releases(d->lock)
325 __acquires(d->lock) 493 __acquires(d->lock)
326{ 494{
327 struct net_device *dev = qdisc_dev(sch); 495 if (cl >= TC_H_MIN_PRIORITY) {
328
329 if (cl <= netdev_get_num_tc(dev)) {
330 int i; 496 int i;
331 __u32 qlen = 0; 497 __u32 qlen = 0;
332 struct Qdisc *qdisc; 498 struct Qdisc *qdisc;
333 struct gnet_stats_queue qstats = {0}; 499 struct gnet_stats_queue qstats = {0};
334 struct gnet_stats_basic_packed bstats = {0}; 500 struct gnet_stats_basic_packed bstats = {0};
335 struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; 501 struct net_device *dev = qdisc_dev(sch);
502 struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
336 503
337 /* Drop lock here it will be reclaimed before touching 504 /* Drop lock here it will be reclaimed before touching
338 * statistics this is required because the d->lock we 505 * statistics this is required because the d->lock we
@@ -385,17 +552,36 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
385 552
386 /* Walk hierarchy with a virtual class per tc */ 553 /* Walk hierarchy with a virtual class per tc */
387 arg->count = arg->skip; 554 arg->count = arg->skip;
388 for (ntx = arg->skip; 555 for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
389 ntx < dev->num_tx_queues + netdev_get_num_tc(dev); 556 if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
390 ntx++) { 557 arg->stop = 1;
558 return;
559 }
560 arg->count++;
561 }
562
563 /* Pad the values and skip over unused traffic classes */
564 if (ntx < TC_MAX_QUEUE) {
565 arg->count = TC_MAX_QUEUE;
566 ntx = TC_MAX_QUEUE;
567 }
568
569 /* Reset offset, sort out remaining per-queue qdiscs */
570 for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
391 if (arg->fn(sch, ntx + 1, arg) < 0) { 571 if (arg->fn(sch, ntx + 1, arg) < 0) {
392 arg->stop = 1; 572 arg->stop = 1;
393 break; 573 return;
394 } 574 }
395 arg->count++; 575 arg->count++;
396 } 576 }
397} 577}
398 578
579static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch,
580 struct tcmsg *tcm)
581{
582 return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
583}
584
399static const struct Qdisc_class_ops mqprio_class_ops = { 585static const struct Qdisc_class_ops mqprio_class_ops = {
400 .graft = mqprio_graft, 586 .graft = mqprio_graft,
401 .leaf = mqprio_leaf, 587 .leaf = mqprio_leaf,
@@ -403,6 +589,7 @@ static const struct Qdisc_class_ops mqprio_class_ops = {
403 .walk = mqprio_walk, 589 .walk = mqprio_walk,
404 .dump = mqprio_dump_class, 590 .dump = mqprio_dump_class,
405 .dump_stats = mqprio_dump_class_stats, 591 .dump_stats = mqprio_dump_class_stats,
592 .select_queue = mqprio_select_queue,
406}; 593};
407 594
408static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { 595static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index ff4fc3e0facd..012216386c0b 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
54 case TC_ACT_QUEUED: 54 case TC_ACT_QUEUED:
55 case TC_ACT_TRAP: 55 case TC_ACT_TRAP:
56 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 56 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
57 /* fall through */
57 case TC_ACT_SHOT: 58 case TC_ACT_SHOT:
58 return NULL; 59 return NULL;
59 } 60 }
@@ -245,7 +246,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
245 if (opt == NULL) 246 if (opt == NULL)
246 return -EINVAL; 247 return -EINVAL;
247 248
248 err = tcf_block_get(&q->block, &q->filter_list); 249 err = tcf_block_get(&q->block, &q->filter_list, sch);
249 if (err) 250 if (err)
250 return err; 251 return err;
251 252
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b1266e75ca43..dd70924cbcdf 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,8 +77,8 @@ struct netem_sched_data {
77 77
78 struct qdisc_watchdog watchdog; 78 struct qdisc_watchdog watchdog;
79 79
80 psched_tdiff_t latency; 80 s64 latency;
81 psched_tdiff_t jitter; 81 s64 jitter;
82 82
83 u32 loss; 83 u32 loss;
84 u32 ecn; 84 u32 ecn;
@@ -135,6 +135,13 @@ struct netem_sched_data {
135 u32 a5; /* p23 used only in 4-states */ 135 u32 a5; /* p23 used only in 4-states */
136 } clg; 136 } clg;
137 137
138 struct tc_netem_slot slot_config;
139 struct slotstate {
140 u64 slot_next;
141 s32 packets_left;
142 s32 bytes_left;
143 } slot;
144
138}; 145};
139 146
140/* Time stamp put into socket buffer control block 147/* Time stamp put into socket buffer control block
@@ -145,16 +152,9 @@ struct netem_sched_data {
145 * we save skb->tstamp value in skb->cb[] before destroying it. 152 * we save skb->tstamp value in skb->cb[] before destroying it.
146 */ 153 */
147struct netem_skb_cb { 154struct netem_skb_cb {
148 psched_time_t time_to_send; 155 u64 time_to_send;
149 ktime_t tstamp_save;
150}; 156};
151 157
152
153static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
154{
155 return rb_entry(rb, struct sk_buff, rbnode);
156}
157
158static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 158static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
159{ 159{
160 /* we assume we can use skb next/prev/tstamp as storage for rb_node */ 160 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
@@ -312,11 +312,11 @@ static bool loss_event(struct netem_sched_data *q)
312 * std deviation sigma. Uses table lookup to approximate the desired 312 * std deviation sigma. Uses table lookup to approximate the desired
313 * distribution, and a uniformly-distributed pseudo-random source. 313 * distribution, and a uniformly-distributed pseudo-random source.
314 */ 314 */
315static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, 315static s64 tabledist(s64 mu, s32 sigma,
316 struct crndstate *state, 316 struct crndstate *state,
317 const struct disttable *dist) 317 const struct disttable *dist)
318{ 318{
319 psched_tdiff_t x; 319 s64 x;
320 long t; 320 long t;
321 u32 rnd; 321 u32 rnd;
322 322
@@ -327,7 +327,7 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
327 327
328 /* default uniform distribution */ 328 /* default uniform distribution */
329 if (dist == NULL) 329 if (dist == NULL)
330 return (rnd % (2*sigma)) - sigma + mu; 330 return (rnd % (2 * sigma)) - sigma + mu;
331 331
332 t = dist->table[rnd % dist->size]; 332 t = dist->table[rnd % dist->size];
333 x = (sigma % NETEM_DIST_SCALE) * t; 333 x = (sigma % NETEM_DIST_SCALE) * t;
@@ -339,10 +339,8 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
339 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 339 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
340} 340}
341 341
342static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) 342static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
343{ 343{
344 u64 ticks;
345
346 len += q->packet_overhead; 344 len += q->packet_overhead;
347 345
348 if (q->cell_size) { 346 if (q->cell_size) {
@@ -353,21 +351,19 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
353 len = cells * (q->cell_size + q->cell_overhead); 351 len = cells * (q->cell_size + q->cell_overhead);
354 } 352 }
355 353
356 ticks = (u64)len * NSEC_PER_SEC; 354 return div64_u64(len * NSEC_PER_SEC, q->rate);
357
358 do_div(ticks, q->rate);
359 return PSCHED_NS2TICKS(ticks);
360} 355}
361 356
362static void tfifo_reset(struct Qdisc *sch) 357static void tfifo_reset(struct Qdisc *sch)
363{ 358{
364 struct netem_sched_data *q = qdisc_priv(sch); 359 struct netem_sched_data *q = qdisc_priv(sch);
365 struct rb_node *p; 360 struct rb_node *p = rb_first(&q->t_root);
366 361
367 while ((p = rb_first(&q->t_root))) { 362 while (p) {
368 struct sk_buff *skb = netem_rb_to_skb(p); 363 struct sk_buff *skb = rb_to_skb(p);
369 364
370 rb_erase(p, &q->t_root); 365 p = rb_next(p);
366 rb_erase(&skb->rbnode, &q->t_root);
371 rtnl_kfree_skbs(skb, skb); 367 rtnl_kfree_skbs(skb, skb);
372 } 368 }
373} 369}
@@ -375,14 +371,14 @@ static void tfifo_reset(struct Qdisc *sch)
375static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 371static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
376{ 372{
377 struct netem_sched_data *q = qdisc_priv(sch); 373 struct netem_sched_data *q = qdisc_priv(sch);
378 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; 374 u64 tnext = netem_skb_cb(nskb)->time_to_send;
379 struct rb_node **p = &q->t_root.rb_node, *parent = NULL; 375 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
380 376
381 while (*p) { 377 while (*p) {
382 struct sk_buff *skb; 378 struct sk_buff *skb;
383 379
384 parent = *p; 380 parent = *p;
385 skb = netem_rb_to_skb(parent); 381 skb = rb_to_skb(parent);
386 if (tnext >= netem_skb_cb(skb)->time_to_send) 382 if (tnext >= netem_skb_cb(skb)->time_to_send)
387 p = &parent->rb_right; 383 p = &parent->rb_right;
388 else 384 else
@@ -521,13 +517,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
521 if (q->gap == 0 || /* not doing reordering */ 517 if (q->gap == 0 || /* not doing reordering */
522 q->counter < q->gap - 1 || /* inside last reordering gap */ 518 q->counter < q->gap - 1 || /* inside last reordering gap */
523 q->reorder < get_crandom(&q->reorder_cor)) { 519 q->reorder < get_crandom(&q->reorder_cor)) {
524 psched_time_t now; 520 u64 now;
525 psched_tdiff_t delay; 521 s64 delay;
526 522
527 delay = tabledist(q->latency, q->jitter, 523 delay = tabledist(q->latency, q->jitter,
528 &q->delay_cor, q->delay_dist); 524 &q->delay_cor, q->delay_dist);
529 525
530 now = psched_get_time(); 526 now = ktime_get_ns();
531 527
532 if (q->rate) { 528 if (q->rate) {
533 struct netem_skb_cb *last = NULL; 529 struct netem_skb_cb *last = NULL;
@@ -538,7 +534,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
538 struct sk_buff *t_skb; 534 struct sk_buff *t_skb;
539 struct netem_skb_cb *t_last; 535 struct netem_skb_cb *t_last;
540 536
541 t_skb = netem_rb_to_skb(rb_last(&q->t_root)); 537 t_skb = skb_rb_last(&q->t_root);
542 t_last = netem_skb_cb(t_skb); 538 t_last = netem_skb_cb(t_skb);
543 if (!last || 539 if (!last ||
544 t_last->time_to_send > last->time_to_send) { 540 t_last->time_to_send > last->time_to_send) {
@@ -553,15 +549,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
553 * from delay. 549 * from delay.
554 */ 550 */
555 delay -= last->time_to_send - now; 551 delay -= last->time_to_send - now;
556 delay = max_t(psched_tdiff_t, 0, delay); 552 delay = max_t(s64, 0, delay);
557 now = last->time_to_send; 553 now = last->time_to_send;
558 } 554 }
559 555
560 delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); 556 delay += packet_time_ns(qdisc_pkt_len(skb), q);
561 } 557 }
562 558
563 cb->time_to_send = now + delay; 559 cb->time_to_send = now + delay;
564 cb->tstamp_save = skb->tstamp;
565 ++q->counter; 560 ++q->counter;
566 tfifo_enqueue(skb, sch); 561 tfifo_enqueue(skb, sch);
567 } else { 562 } else {
@@ -569,7 +564,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
569 * Do re-ordering by putting one out of N packets at the front 564 * Do re-ordering by putting one out of N packets at the front
570 * of the queue. 565 * of the queue.
571 */ 566 */
572 cb->time_to_send = psched_get_time(); 567 cb->time_to_send = ktime_get_ns();
573 q->counter = 0; 568 q->counter = 0;
574 569
575 netem_enqueue_skb_head(&sch->q, skb); 570 netem_enqueue_skb_head(&sch->q, skb);
@@ -600,6 +595,20 @@ finish_segs:
600 return NET_XMIT_SUCCESS; 595 return NET_XMIT_SUCCESS;
601} 596}
602 597
598/* Delay the next round with a new future slot with a
599 * correct number of bytes and packets.
600 */
601
602static void get_slot_next(struct netem_sched_data *q, u64 now)
603{
604 q->slot.slot_next = now + q->slot_config.min_delay +
605 (prandom_u32() *
606 (q->slot_config.max_delay -
607 q->slot_config.min_delay) >> 32);
608 q->slot.packets_left = q->slot_config.max_packets;
609 q->slot.bytes_left = q->slot_config.max_bytes;
610}
611
603static struct sk_buff *netem_dequeue(struct Qdisc *sch) 612static struct sk_buff *netem_dequeue(struct Qdisc *sch)
604{ 613{
605 struct netem_sched_data *q = qdisc_priv(sch); 614 struct netem_sched_data *q = qdisc_priv(sch);
@@ -616,20 +625,26 @@ deliver:
616 } 625 }
617 p = rb_first(&q->t_root); 626 p = rb_first(&q->t_root);
618 if (p) { 627 if (p) {
619 psched_time_t time_to_send; 628 u64 time_to_send;
629 u64 now = ktime_get_ns();
620 630
621 skb = netem_rb_to_skb(p); 631 skb = rb_to_skb(p);
622 632
623 /* if more time remaining? */ 633 /* if more time remaining? */
624 time_to_send = netem_skb_cb(skb)->time_to_send; 634 time_to_send = netem_skb_cb(skb)->time_to_send;
625 if (time_to_send <= psched_get_time()) { 635 if (q->slot.slot_next && q->slot.slot_next < time_to_send)
626 rb_erase(p, &q->t_root); 636 get_slot_next(q, now);
627 637
638 if (time_to_send <= now && q->slot.slot_next <= now) {
639 rb_erase(p, &q->t_root);
628 sch->q.qlen--; 640 sch->q.qlen--;
629 qdisc_qstats_backlog_dec(sch, skb); 641 qdisc_qstats_backlog_dec(sch, skb);
630 skb->next = NULL; 642 skb->next = NULL;
631 skb->prev = NULL; 643 skb->prev = NULL;
632 skb->tstamp = netem_skb_cb(skb)->tstamp_save; 644 /* skb->dev shares skb->rbnode area,
645 * we need to restore its value.
646 */
647 skb->dev = qdisc_dev(sch);
633 648
634#ifdef CONFIG_NET_CLS_ACT 649#ifdef CONFIG_NET_CLS_ACT
635 /* 650 /*
@@ -640,6 +655,14 @@ deliver:
640 skb->tstamp = 0; 655 skb->tstamp = 0;
641#endif 656#endif
642 657
658 if (q->slot.slot_next) {
659 q->slot.packets_left--;
660 q->slot.bytes_left -= qdisc_pkt_len(skb);
661 if (q->slot.packets_left <= 0 ||
662 q->slot.bytes_left <= 0)
663 get_slot_next(q, now);
664 }
665
643 if (q->qdisc) { 666 if (q->qdisc) {
644 unsigned int pkt_len = qdisc_pkt_len(skb); 667 unsigned int pkt_len = qdisc_pkt_len(skb);
645 struct sk_buff *to_free = NULL; 668 struct sk_buff *to_free = NULL;
@@ -663,7 +686,10 @@ deliver:
663 if (skb) 686 if (skb)
664 goto deliver; 687 goto deliver;
665 } 688 }
666 qdisc_watchdog_schedule(&q->watchdog, time_to_send); 689
690 qdisc_watchdog_schedule_ns(&q->watchdog,
691 max(time_to_send,
692 q->slot.slot_next));
667 } 693 }
668 694
669 if (q->qdisc) { 695 if (q->qdisc) {
@@ -694,6 +720,7 @@ static void dist_free(struct disttable *d)
694 * Distribution data is a variable size payload containing 720 * Distribution data is a variable size payload containing
695 * signed 16 bit values. 721 * signed 16 bit values.
696 */ 722 */
723
697static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 724static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
698{ 725{
699 struct netem_sched_data *q = qdisc_priv(sch); 726 struct netem_sched_data *q = qdisc_priv(sch);
@@ -724,6 +751,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
724 return 0; 751 return 0;
725} 752}
726 753
754static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
755{
756 const struct tc_netem_slot *c = nla_data(attr);
757
758 q->slot_config = *c;
759 if (q->slot_config.max_packets == 0)
760 q->slot_config.max_packets = INT_MAX;
761 if (q->slot_config.max_bytes == 0)
762 q->slot_config.max_bytes = INT_MAX;
763 q->slot.packets_left = q->slot_config.max_packets;
764 q->slot.bytes_left = q->slot_config.max_bytes;
765 if (q->slot_config.min_delay | q->slot_config.max_delay)
766 q->slot.slot_next = ktime_get_ns();
767 else
768 q->slot.slot_next = 0;
769}
770
727static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) 771static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
728{ 772{
729 const struct tc_netem_corr *c = nla_data(attr); 773 const struct tc_netem_corr *c = nla_data(attr);
@@ -825,6 +869,9 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
825 [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, 869 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
826 [TCA_NETEM_ECN] = { .type = NLA_U32 }, 870 [TCA_NETEM_ECN] = { .type = NLA_U32 },
827 [TCA_NETEM_RATE64] = { .type = NLA_U64 }, 871 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
872 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
873 [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
874 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
828}; 875};
829 876
830static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 877static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -892,8 +939,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
892 939
893 sch->limit = qopt->limit; 940 sch->limit = qopt->limit;
894 941
895 q->latency = qopt->latency; 942 q->latency = PSCHED_TICKS2NS(qopt->latency);
896 q->jitter = qopt->jitter; 943 q->jitter = PSCHED_TICKS2NS(qopt->jitter);
897 q->limit = qopt->limit; 944 q->limit = qopt->limit;
898 q->gap = qopt->gap; 945 q->gap = qopt->gap;
899 q->counter = 0; 946 q->counter = 0;
@@ -922,9 +969,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
922 q->rate = max_t(u64, q->rate, 969 q->rate = max_t(u64, q->rate,
923 nla_get_u64(tb[TCA_NETEM_RATE64])); 970 nla_get_u64(tb[TCA_NETEM_RATE64]));
924 971
972 if (tb[TCA_NETEM_LATENCY64])
973 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
974
975 if (tb[TCA_NETEM_JITTER64])
976 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
977
925 if (tb[TCA_NETEM_ECN]) 978 if (tb[TCA_NETEM_ECN])
926 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); 979 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
927 980
981 if (tb[TCA_NETEM_SLOT])
982 get_slot(q, tb[TCA_NETEM_SLOT]);
983
928 return ret; 984 return ret;
929} 985}
930 986
@@ -1014,9 +1070,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1014 struct tc_netem_reorder reorder; 1070 struct tc_netem_reorder reorder;
1015 struct tc_netem_corrupt corrupt; 1071 struct tc_netem_corrupt corrupt;
1016 struct tc_netem_rate rate; 1072 struct tc_netem_rate rate;
1073 struct tc_netem_slot slot;
1017 1074
1018 qopt.latency = q->latency; 1075 qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1019 qopt.jitter = q->jitter; 1076 UINT_MAX);
1077 qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1078 UINT_MAX);
1020 qopt.limit = q->limit; 1079 qopt.limit = q->limit;
1021 qopt.loss = q->loss; 1080 qopt.loss = q->loss;
1022 qopt.gap = q->gap; 1081 qopt.gap = q->gap;
@@ -1024,6 +1083,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1024 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) 1083 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1025 goto nla_put_failure; 1084 goto nla_put_failure;
1026 1085
1086 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1087 goto nla_put_failure;
1088
1089 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1090 goto nla_put_failure;
1091
1027 cor.delay_corr = q->delay_cor.rho; 1092 cor.delay_corr = q->delay_cor.rho;
1028 cor.loss_corr = q->loss_cor.rho; 1093 cor.loss_corr = q->loss_cor.rho;
1029 cor.dup_corr = q->dup_cor.rho; 1094 cor.dup_corr = q->dup_cor.rho;
@@ -1060,6 +1125,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1060 if (dump_loss_model(q, skb) != 0) 1125 if (dump_loss_model(q, skb) != 0)
1061 goto nla_put_failure; 1126 goto nla_put_failure;
1062 1127
1128 if (q->slot_config.min_delay | q->slot_config.max_delay) {
1129 slot = q->slot_config;
1130 if (slot.max_packets == INT_MAX)
1131 slot.max_packets = 0;
1132 if (slot.max_bytes == INT_MAX)
1133 slot.max_bytes = 0;
1134 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1135 goto nla_put_failure;
1136 }
1137
1063 return nla_nest_end(skb, nla); 1138 return nla_nest_end(skb, nla);
1064 1139
1065nla_put_failure: 1140nla_put_failure:
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 6c2791d6102d..776c694c77c7 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -74,6 +74,7 @@ struct pie_sched_data {
74 struct pie_vars vars; 74 struct pie_vars vars;
75 struct pie_stats stats; 75 struct pie_stats stats;
76 struct timer_list adapt_timer; 76 struct timer_list adapt_timer;
77 struct Qdisc *sch;
77}; 78};
78 79
79static void pie_params_init(struct pie_params *params) 80static void pie_params_init(struct pie_params *params)
@@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch)
422 pie_vars_init(&q->vars); 423 pie_vars_init(&q->vars);
423} 424}
424 425
425static void pie_timer(unsigned long arg) 426static void pie_timer(struct timer_list *t)
426{ 427{
427 struct Qdisc *sch = (struct Qdisc *)arg; 428 struct pie_sched_data *q = from_timer(q, t, adapt_timer);
428 struct pie_sched_data *q = qdisc_priv(sch); 429 struct Qdisc *sch = q->sch;
429 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 430 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
430 431
431 spin_lock(root_lock); 432 spin_lock(root_lock);
@@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt)
446 pie_vars_init(&q->vars); 447 pie_vars_init(&q->vars);
447 sch->limit = q->params.limit; 448 sch->limit = q->params.limit;
448 449
449 setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); 450 q->sch = sch;
451 timer_setup(&q->adapt_timer, pie_timer, 0);
450 452
451 if (opt) { 453 if (opt) {
452 int err = pie_change(sch, opt); 454 int err = pie_change(sch, opt);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2dd6c68ae91e..2c79559a0d31 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
50 case TC_ACT_QUEUED: 50 case TC_ACT_QUEUED:
51 case TC_ACT_TRAP: 51 case TC_ACT_TRAP:
52 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 52 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
53 /* fall through */
53 case TC_ACT_SHOT: 54 case TC_ACT_SHOT:
54 return NULL; 55 return NULL;
55 } 56 }
@@ -212,7 +213,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
212 if (!opt) 213 if (!opt)
213 return -EINVAL; 214 return -EINVAL;
214 215
215 err = tcf_block_get(&q->block, &q->filter_list); 216 err = tcf_block_get(&q->block, &q->filter_list, sch);
216 if (err) 217 if (err)
217 return err; 218 return err;
218 219
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6ddfd4991108..6962b37a3ad3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -709,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
709 case TC_ACT_STOLEN: 709 case TC_ACT_STOLEN:
710 case TC_ACT_TRAP: 710 case TC_ACT_TRAP:
711 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 711 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
712 /* fall through */
712 case TC_ACT_SHOT: 713 case TC_ACT_SHOT:
713 return NULL; 714 return NULL;
714 } 715 }
@@ -1419,7 +1420,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1419 int i, j, err; 1420 int i, j, err;
1420 u32 max_cl_shift, maxbudg_shift, max_classes; 1421 u32 max_cl_shift, maxbudg_shift, max_classes;
1421 1422
1422 err = tcf_block_get(&q->block, &q->filter_list); 1423 err = tcf_block_get(&q->block, &q->filter_list, sch);
1423 if (err) 1424 if (err)
1424 return err; 1425 return err;
1425 1426
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 93b9d70a9b28..7f8ea9e297c3 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -19,6 +19,7 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/skbuff.h> 20#include <linux/skbuff.h>
21#include <net/pkt_sched.h> 21#include <net/pkt_sched.h>
22#include <net/pkt_cls.h>
22#include <net/inet_ecn.h> 23#include <net/inet_ecn.h>
23#include <net/red.h> 24#include <net/red.h>
24 25
@@ -40,6 +41,7 @@ struct red_sched_data {
40 u32 limit; /* HARD maximal queue length */ 41 u32 limit; /* HARD maximal queue length */
41 unsigned char flags; 42 unsigned char flags;
42 struct timer_list adapt_timer; 43 struct timer_list adapt_timer;
44 struct Qdisc *sch;
43 struct red_parms parms; 45 struct red_parms parms;
44 struct red_vars vars; 46 struct red_vars vars;
45 struct red_stats stats; 47 struct red_stats stats;
@@ -147,11 +149,37 @@ static void red_reset(struct Qdisc *sch)
147 red_restart(&q->vars); 149 red_restart(&q->vars);
148} 150}
149 151
152static int red_offload(struct Qdisc *sch, bool enable)
153{
154 struct red_sched_data *q = qdisc_priv(sch);
155 struct net_device *dev = qdisc_dev(sch);
156 struct tc_red_qopt_offload opt = {
157 .handle = sch->handle,
158 .parent = sch->parent,
159 };
160
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 return -EOPNOTSUPP;
163
164 if (enable) {
165 opt.command = TC_RED_REPLACE;
166 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 opt.set.probability = q->parms.max_P;
169 opt.set.is_ecn = red_use_ecn(q);
170 } else {
171 opt.command = TC_RED_DESTROY;
172 }
173
174 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
175}
176
150static void red_destroy(struct Qdisc *sch) 177static void red_destroy(struct Qdisc *sch)
151{ 178{
152 struct red_sched_data *q = qdisc_priv(sch); 179 struct red_sched_data *q = qdisc_priv(sch);
153 180
154 del_timer_sync(&q->adapt_timer); 181 del_timer_sync(&q->adapt_timer);
182 red_offload(sch, false);
155 qdisc_destroy(q->qdisc); 183 qdisc_destroy(q->qdisc);
156} 184}
157 185
@@ -218,13 +246,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
218 red_start_of_idle_period(&q->vars); 246 red_start_of_idle_period(&q->vars);
219 247
220 sch_tree_unlock(sch); 248 sch_tree_unlock(sch);
249 red_offload(sch, true);
221 return 0; 250 return 0;
222} 251}
223 252
224static inline void red_adaptative_timer(unsigned long arg) 253static inline void red_adaptative_timer(struct timer_list *t)
225{ 254{
226 struct Qdisc *sch = (struct Qdisc *)arg; 255 struct red_sched_data *q = from_timer(q, t, adapt_timer);
227 struct red_sched_data *q = qdisc_priv(sch); 256 struct Qdisc *sch = q->sch;
228 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 257 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
229 258
230 spin_lock(root_lock); 259 spin_lock(root_lock);
@@ -238,10 +267,40 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
238 struct red_sched_data *q = qdisc_priv(sch); 267 struct red_sched_data *q = qdisc_priv(sch);
239 268
240 q->qdisc = &noop_qdisc; 269 q->qdisc = &noop_qdisc;
241 setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch); 270 q->sch = sch;
271 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
242 return red_change(sch, opt); 272 return red_change(sch, opt);
243} 273}
244 274
275static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
276{
277 struct net_device *dev = qdisc_dev(sch);
278 struct tc_red_qopt_offload hw_stats = {
279 .command = TC_RED_STATS,
280 .handle = sch->handle,
281 .parent = sch->parent,
282 {
283 .stats.bstats = &sch->bstats,
284 .stats.qstats = &sch->qstats,
285 },
286 };
287 int err;
288
289 opt->flags &= ~TC_RED_OFFLOADED;
290 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
291 return 0;
292
293 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
294 &hw_stats);
295 if (err == -EOPNOTSUPP)
296 return 0;
297
298 if (!err)
299 opt->flags |= TC_RED_OFFLOADED;
300
301 return err;
302}
303
245static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 304static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
246{ 305{
247 struct red_sched_data *q = qdisc_priv(sch); 306 struct red_sched_data *q = qdisc_priv(sch);
@@ -255,8 +314,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
255 .Plog = q->parms.Plog, 314 .Plog = q->parms.Plog,
256 .Scell_log = q->parms.Scell_log, 315 .Scell_log = q->parms.Scell_log,
257 }; 316 };
317 int err;
258 318
259 sch->qstats.backlog = q->qdisc->qstats.backlog; 319 sch->qstats.backlog = q->qdisc->qstats.backlog;
320 err = red_dump_offload(sch, &opt);
321 if (err)
322 goto nla_put_failure;
323
260 opts = nla_nest_start(skb, TCA_OPTIONS); 324 opts = nla_nest_start(skb, TCA_OPTIONS);
261 if (opts == NULL) 325 if (opts == NULL)
262 goto nla_put_failure; 326 goto nla_put_failure;
@@ -273,6 +337,7 @@ nla_put_failure:
273static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 337static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
274{ 338{
275 struct red_sched_data *q = qdisc_priv(sch); 339 struct red_sched_data *q = qdisc_priv(sch);
340 struct net_device *dev = qdisc_dev(sch);
276 struct tc_red_xstats st = { 341 struct tc_red_xstats st = {
277 .early = q->stats.prob_drop + q->stats.forced_drop, 342 .early = q->stats.prob_drop + q->stats.forced_drop,
278 .pdrop = q->stats.pdrop, 343 .pdrop = q->stats.pdrop,
@@ -280,6 +345,26 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
280 .marked = q->stats.prob_mark + q->stats.forced_mark, 345 .marked = q->stats.prob_mark + q->stats.forced_mark,
281 }; 346 };
282 347
348 if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
349 struct red_stats hw_stats = {0};
350 struct tc_red_qopt_offload hw_stats_request = {
351 .command = TC_RED_XSTATS,
352 .handle = sch->handle,
353 .parent = sch->parent,
354 {
355 .xstats = &hw_stats,
356 },
357 };
358 if (!dev->netdev_ops->ndo_setup_tc(dev,
359 TC_SETUP_QDISC_RED,
360 &hw_stats_request)) {
361 st.early += hw_stats.prob_drop + hw_stats.forced_drop;
362 st.pdrop += hw_stats.pdrop;
363 st.other += hw_stats.other;
364 st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
365 }
366 }
367
283 return gnet_stats_copy_app(d, &st, sizeof(st)); 368 return gnet_stats_copy_app(d, &st, sizeof(st));
284} 369}
285 370
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index cc39e170b4aa..0678debdd856 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
268 case TC_ACT_QUEUED: 268 case TC_ACT_QUEUED:
269 case TC_ACT_TRAP: 269 case TC_ACT_TRAP:
270 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 270 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
271 /* fall through */
271 case TC_ACT_SHOT: 272 case TC_ACT_SHOT:
272 return false; 273 return false;
273 } 274 }
@@ -553,7 +554,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
553 struct sfb_sched_data *q = qdisc_priv(sch); 554 struct sfb_sched_data *q = qdisc_priv(sch);
554 int err; 555 int err;
555 556
556 err = tcf_block_get(&q->block, &q->filter_list); 557 err = tcf_block_get(&q->block, &q->filter_list, sch);
557 if (err) 558 if (err)
558 return err; 559 return err;
559 560
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 74ea863b8240..890f4a4564e7 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -145,6 +145,7 @@ struct sfq_sched_data {
145 int perturb_period; 145 int perturb_period;
146 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ 146 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
147 struct timer_list perturb_timer; 147 struct timer_list perturb_timer;
148 struct Qdisc *sch;
148}; 149};
149 150
150/* 151/*
@@ -189,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
189 case TC_ACT_QUEUED: 190 case TC_ACT_QUEUED:
190 case TC_ACT_TRAP: 191 case TC_ACT_TRAP:
191 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 192 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
193 /* fall through */
192 case TC_ACT_SHOT: 194 case TC_ACT_SHOT:
193 return 0; 195 return 0;
194 } 196 }
@@ -604,10 +606,10 @@ drop:
604 qdisc_tree_reduce_backlog(sch, dropped, drop_len); 606 qdisc_tree_reduce_backlog(sch, dropped, drop_len);
605} 607}
606 608
607static void sfq_perturbation(unsigned long arg) 609static void sfq_perturbation(struct timer_list *t)
608{ 610{
609 struct Qdisc *sch = (struct Qdisc *)arg; 611 struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
610 struct sfq_sched_data *q = qdisc_priv(sch); 612 struct Qdisc *sch = q->sch;
611 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 613 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
612 614
613 spin_lock(root_lock); 615 spin_lock(root_lock);
@@ -722,10 +724,9 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
722 int i; 724 int i;
723 int err; 725 int err;
724 726
725 setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, 727 timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
726 (unsigned long)sch);
727 728
728 err = tcf_block_get(&q->block, &q->filter_list); 729 err = tcf_block_get(&q->block, &q->filter_list, sch);
729 if (err) 730 if (err)
730 return err; 731 return err;
731 732
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 8c434af3e68f..1ca84a288443 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -13,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
13 inqueue.o outqueue.o ulpqueue.o \ 13 inqueue.o outqueue.o ulpqueue.o \
14 tsnmap.o bind_addr.o socket.o primitive.o \ 14 tsnmap.o bind_addr.o socket.o primitive.o \
15 output.o input.o debug.o stream.o auth.o \ 15 output.o input.o debug.o stream.o auth.o \
16 offload.o 16 offload.o stream_sched.o stream_sched_prio.o \
17 stream_sched_rr.o
17 18
18sctp_probe-y := probe.o 19sctp_probe-y := probe.o
19 20
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dfb9651e818b..69394f4d6091 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -149,8 +149,7 @@ static struct sctp_association *sctp_association_init(
149 149
150 /* Initializes the timers */ 150 /* Initializes the timers */
151 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) 151 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
152 setup_timer(&asoc->timers[i], sctp_timer_events[i], 152 timer_setup(&asoc->timers[i], sctp_timer_events[i], 0);
153 (unsigned long)asoc);
154 153
155 /* Pull default initialization values from the sock options. 154 /* Pull default initialization values from the sock options.
156 * Note: This assumes that the values have already been 155 * Note: This assumes that the values have already been
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3afac275ee82..7b261afc47b9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
311 311
312 if (chunk->sent_count) { 312 if (chunk->sent_count) {
313 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; 313 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
314 streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; 314 streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
315 } else { 315 } else {
316 chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 316 chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
317 streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; 317 streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
318 } 318 }
319 return 1; 319 return 1;
320 } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && 320 } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
@@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
323 &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; 323 &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
324 324
325 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 325 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
326 streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 326 streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
327 return 1; 327 return 1;
328 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && 328 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
329 chunk->msg->expires_at && 329 chunk->msg->expires_at &&
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a6dfa86c0201..3b18085e3b10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -807,9 +807,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
807 addr->v6.sin6_flowinfo = 0; 807 addr->v6.sin6_flowinfo = 0;
808 addr->v6.sin6_port = sh->source; 808 addr->v6.sin6_port = sh->source;
809 addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; 809 addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
810 if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) { 810 if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
811 addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb); 811 addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
812 } 812 else
813 addr->v6.sin6_scope_id = 0;
813 } 814 }
814 815
815 *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr); 816 *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 2966ff400755..4db012aa25f7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -50,6 +50,7 @@
50 50
51#include <net/sctp/sctp.h> 51#include <net/sctp/sctp.h>
52#include <net/sctp/sm.h> 52#include <net/sctp/sm.h>
53#include <net/sctp/stream_sched.h>
53 54
54/* Declare internal functions here. */ 55/* Declare internal functions here. */
55static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn); 56static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
@@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
72 73
73/* Add data to the front of the queue. */ 74/* Add data to the front of the queue. */
74static inline void sctp_outq_head_data(struct sctp_outq *q, 75static inline void sctp_outq_head_data(struct sctp_outq *q,
75 struct sctp_chunk *ch) 76 struct sctp_chunk *ch)
76{ 77{
78 struct sctp_stream_out_ext *oute;
79 __u16 stream;
80
77 list_add(&ch->list, &q->out_chunk_list); 81 list_add(&ch->list, &q->out_chunk_list);
78 q->out_qlen += ch->skb->len; 82 q->out_qlen += ch->skb->len;
83
84 stream = sctp_chunk_stream_no(ch);
85 oute = q->asoc->stream.out[stream].ext;
86 list_add(&ch->stream_list, &oute->outq);
79} 87}
80 88
81/* Take data from the front of the queue. */ 89/* Take data from the front of the queue. */
82static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q) 90static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
83{ 91{
84 struct sctp_chunk *ch = NULL; 92 return q->sched->dequeue(q);
85
86 if (!list_empty(&q->out_chunk_list)) {
87 struct list_head *entry = q->out_chunk_list.next;
88
89 ch = list_entry(entry, struct sctp_chunk, list);
90 list_del_init(entry);
91 q->out_qlen -= ch->skb->len;
92 }
93 return ch;
94} 93}
94
95/* Add data chunk to the end of the queue. */ 95/* Add data chunk to the end of the queue. */
96static inline void sctp_outq_tail_data(struct sctp_outq *q, 96static inline void sctp_outq_tail_data(struct sctp_outq *q,
97 struct sctp_chunk *ch) 97 struct sctp_chunk *ch)
98{ 98{
99 struct sctp_stream_out_ext *oute;
100 __u16 stream;
101
99 list_add_tail(&ch->list, &q->out_chunk_list); 102 list_add_tail(&ch->list, &q->out_chunk_list);
100 q->out_qlen += ch->skb->len; 103 q->out_qlen += ch->skb->len;
104
105 stream = sctp_chunk_stream_no(ch);
106 oute = q->asoc->stream.out[stream].ext;
107 list_add_tail(&ch->stream_list, &oute->outq);
101} 108}
102 109
103/* 110/*
@@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
207 INIT_LIST_HEAD(&q->retransmit); 214 INIT_LIST_HEAD(&q->retransmit);
208 INIT_LIST_HEAD(&q->sacked); 215 INIT_LIST_HEAD(&q->sacked);
209 INIT_LIST_HEAD(&q->abandoned); 216 INIT_LIST_HEAD(&q->abandoned);
217 sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
210} 218}
211 219
212/* Free the outqueue structure and any related pending chunks. 220/* Free the outqueue structure and any related pending chunks.
@@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q)
258 266
259 /* Throw away any leftover data chunks. */ 267 /* Throw away any leftover data chunks. */
260 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { 268 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
269 sctp_sched_dequeue_done(q, chunk);
261 270
262 /* Mark as send failure. */ 271 /* Mark as send failure. */
263 sctp_chunk_fail(chunk, q->error); 272 sctp_chunk_fail(chunk, q->error);
@@ -366,7 +375,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
366 streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; 375 streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
367 asoc->sent_cnt_removable--; 376 asoc->sent_cnt_removable--;
368 asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; 377 asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
369 streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; 378 streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
370 379
371 if (!chk->tsn_gap_acked) { 380 if (!chk->tsn_gap_acked) {
372 if (chk->transport) 381 if (chk->transport)
@@ -391,20 +400,21 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
391 struct sctp_outq *q = &asoc->outqueue; 400 struct sctp_outq *q = &asoc->outqueue;
392 struct sctp_chunk *chk, *temp; 401 struct sctp_chunk *chk, *temp;
393 402
403 q->sched->unsched_all(&asoc->stream);
404
394 list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { 405 list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
395 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || 406 if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
396 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) 407 chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
397 continue; 408 continue;
398 409
399 list_del_init(&chk->list); 410 sctp_sched_dequeue_common(q, chk);
400 q->out_qlen -= chk->skb->len;
401 asoc->sent_cnt_removable--; 411 asoc->sent_cnt_removable--;
402 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; 412 asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
403 if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { 413 if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
404 struct sctp_stream_out *streamout = 414 struct sctp_stream_out *streamout =
405 &asoc->stream.out[chk->sinfo.sinfo_stream]; 415 &asoc->stream.out[chk->sinfo.sinfo_stream];
406 416
407 streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; 417 streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
408 } 418 }
409 419
410 msg_len -= SCTP_DATA_SNDSIZE(chk) + 420 msg_len -= SCTP_DATA_SNDSIZE(chk) +
@@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
415 break; 425 break;
416 } 426 }
417 427
428 q->sched->sched_all(&asoc->stream);
429
418 return msg_len; 430 return msg_len;
419} 431}
420 432
@@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
1033 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { 1045 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
1034 __u32 sid = ntohs(chunk->subh.data_hdr->stream); 1046 __u32 sid = ntohs(chunk->subh.data_hdr->stream);
1035 1047
1036 /* RFC 2960 6.5 Every DATA chunk MUST carry a valid
1037 * stream identifier.
1038 */
1039 if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) {
1040
1041 /* Mark as failed send. */
1042 sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
1043 if (asoc->peer.prsctp_capable &&
1044 SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
1045 asoc->sent_cnt_removable--;
1046 sctp_chunk_free(chunk);
1047 continue;
1048 }
1049
1050 /* Has this chunk expired? */ 1048 /* Has this chunk expired? */
1051 if (sctp_chunk_abandoned(chunk)) { 1049 if (sctp_chunk_abandoned(chunk)) {
1050 sctp_sched_dequeue_done(q, chunk);
1052 sctp_chunk_fail(chunk, 0); 1051 sctp_chunk_fail(chunk, 0);
1053 sctp_chunk_free(chunk); 1052 sctp_chunk_free(chunk);
1054 continue; 1053 continue;
@@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
1070 new_transport = asoc->peer.active_path; 1069 new_transport = asoc->peer.active_path;
1071 if (new_transport->state == SCTP_UNCONFIRMED) { 1070 if (new_transport->state == SCTP_UNCONFIRMED) {
1072 WARN_ONCE(1, "Attempt to send packet on unconfirmed path."); 1071 WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
1072 sctp_sched_dequeue_done(q, chunk);
1073 sctp_chunk_fail(chunk, 0); 1073 sctp_chunk_fail(chunk, 0);
1074 sctp_chunk_free(chunk); 1074 sctp_chunk_free(chunk);
1075 continue; 1075 continue;
@@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
1133 else 1133 else
1134 asoc->stats.oodchunks++; 1134 asoc->stats.oodchunks++;
1135 1135
1136 /* Only now it's safe to consider this
1137 * chunk as sent, sched-wise.
1138 */
1139 sctp_sched_dequeue_done(q, chunk);
1140
1136 break; 1141 break;
1137 1142
1138 default: 1143 default:
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index fcd80feb293f..f5172c21349b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -622,9 +622,9 @@ static void sctp_v4_ecn_capable(struct sock *sk)
622 INET_ECN_xmit(sk); 622 INET_ECN_xmit(sk);
623} 623}
624 624
625static void sctp_addr_wq_timeout_handler(unsigned long arg) 625static void sctp_addr_wq_timeout_handler(struct timer_list *t)
626{ 626{
627 struct net *net = (struct net *)arg; 627 struct net *net = from_timer(net, t, sctp.addr_wq_timer);
628 struct sctp_sockaddr_entry *addrw, *temp; 628 struct sctp_sockaddr_entry *addrw, *temp;
629 struct sctp_sock *sp; 629 struct sctp_sock *sp;
630 630
@@ -1304,8 +1304,7 @@ static int __net_init sctp_defaults_init(struct net *net)
1304 INIT_LIST_HEAD(&net->sctp.auto_asconf_splist); 1304 INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
1305 spin_lock_init(&net->sctp.addr_wq_lock); 1305 spin_lock_init(&net->sctp.addr_wq_lock);
1306 net->sctp.addr_wq_timer.expires = 0; 1306 net->sctp.addr_wq_timer.expires = 0;
1307 setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 1307 timer_setup(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
1308 (unsigned long)net);
1309 1308
1310 return 0; 1309 return 0;
1311 1310
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 514465b03829..9bf575f2e8ed 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3594,8 +3594,8 @@ struct sctp_chunk *sctp_make_strreset_req(
3594 __u16 stream_num, __be16 *stream_list, 3594 __u16 stream_num, __be16 *stream_list,
3595 bool out, bool in) 3595 bool out, bool in)
3596{ 3596{
3597 __u16 stream_len = stream_num * sizeof(__u16);
3597 struct sctp_strreset_outreq outreq; 3598 struct sctp_strreset_outreq outreq;
3598 __u16 stream_len = stream_num * 2;
3599 struct sctp_strreset_inreq inreq; 3599 struct sctp_strreset_inreq inreq;
3600 struct sctp_chunk *retval; 3600 struct sctp_chunk *retval;
3601 __u16 outlen, inlen; 3601 __u16 outlen, inlen;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e2d9a4b49c9c..df94d77401e7 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -50,6 +50,7 @@
50#include <net/sock.h> 50#include <net/sock.h>
51#include <net/sctp/sctp.h> 51#include <net/sctp/sctp.h>
52#include <net/sctp/sm.h> 52#include <net/sctp/sm.h>
53#include <net/sctp/stream_sched.h>
53 54
54static int sctp_cmd_interpreter(enum sctp_event event_type, 55static int sctp_cmd_interpreter(enum sctp_event event_type,
55 union sctp_subtype subtype, 56 union sctp_subtype subtype,
@@ -242,9 +243,10 @@ nomem:
242/* When the T3-RTX timer expires, it calls this function to create the 243/* When the T3-RTX timer expires, it calls this function to create the
243 * relevant state machine event. 244 * relevant state machine event.
244 */ 245 */
245void sctp_generate_t3_rtx_event(unsigned long peer) 246void sctp_generate_t3_rtx_event(struct timer_list *t)
246{ 247{
247 struct sctp_transport *transport = (struct sctp_transport *) peer; 248 struct sctp_transport *transport =
249 from_timer(transport, t, T3_rtx_timer);
248 struct sctp_association *asoc = transport->asoc; 250 struct sctp_association *asoc = transport->asoc;
249 struct sock *sk = asoc->base.sk; 251 struct sock *sk = asoc->base.sk;
250 struct net *net = sock_net(sk); 252 struct net *net = sock_net(sk);
@@ -318,50 +320,63 @@ out_unlock:
318 sctp_association_put(asoc); 320 sctp_association_put(asoc);
319} 321}
320 322
321static void sctp_generate_t1_cookie_event(unsigned long data) 323static void sctp_generate_t1_cookie_event(struct timer_list *t)
322{ 324{
323 struct sctp_association *asoc = (struct sctp_association *) data; 325 struct sctp_association *asoc =
326 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
327
324 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE); 328 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE);
325} 329}
326 330
327static void sctp_generate_t1_init_event(unsigned long data) 331static void sctp_generate_t1_init_event(struct timer_list *t)
328{ 332{
329 struct sctp_association *asoc = (struct sctp_association *) data; 333 struct sctp_association *asoc =
334 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_INIT]);
335
330 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT); 336 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT);
331} 337}
332 338
333static void sctp_generate_t2_shutdown_event(unsigned long data) 339static void sctp_generate_t2_shutdown_event(struct timer_list *t)
334{ 340{
335 struct sctp_association *asoc = (struct sctp_association *) data; 341 struct sctp_association *asoc =
342 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN]);
343
336 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN); 344 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN);
337} 345}
338 346
339static void sctp_generate_t4_rto_event(unsigned long data) 347static void sctp_generate_t4_rto_event(struct timer_list *t)
340{ 348{
341 struct sctp_association *asoc = (struct sctp_association *) data; 349 struct sctp_association *asoc =
350 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T4_RTO]);
351
342 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO); 352 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO);
343} 353}
344 354
345static void sctp_generate_t5_shutdown_guard_event(unsigned long data) 355static void sctp_generate_t5_shutdown_guard_event(struct timer_list *t)
346{ 356{
347 struct sctp_association *asoc = (struct sctp_association *)data; 357 struct sctp_association *asoc =
358 from_timer(asoc, t,
359 timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]);
360
348 sctp_generate_timeout_event(asoc, 361 sctp_generate_timeout_event(asoc,
349 SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD); 362 SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD);
350 363
351} /* sctp_generate_t5_shutdown_guard_event() */ 364} /* sctp_generate_t5_shutdown_guard_event() */
352 365
353static void sctp_generate_autoclose_event(unsigned long data) 366static void sctp_generate_autoclose_event(struct timer_list *t)
354{ 367{
355 struct sctp_association *asoc = (struct sctp_association *) data; 368 struct sctp_association *asoc =
369 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]);
370
356 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE); 371 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE);
357} 372}
358 373
359/* Generate a heart beat event. If the sock is busy, reschedule. Make 374/* Generate a heart beat event. If the sock is busy, reschedule. Make
360 * sure that the transport is still valid. 375 * sure that the transport is still valid.
361 */ 376 */
362void sctp_generate_heartbeat_event(unsigned long data) 377void sctp_generate_heartbeat_event(struct timer_list *t)
363{ 378{
364 struct sctp_transport *transport = (struct sctp_transport *) data; 379 struct sctp_transport *transport = from_timer(transport, t, hb_timer);
365 struct sctp_association *asoc = transport->asoc; 380 struct sctp_association *asoc = transport->asoc;
366 struct sock *sk = asoc->base.sk; 381 struct sock *sk = asoc->base.sk;
367 struct net *net = sock_net(sk); 382 struct net *net = sock_net(sk);
@@ -404,9 +419,10 @@ out_unlock:
404/* Handle the timeout of the ICMP protocol unreachable timer. Trigger 419/* Handle the timeout of the ICMP protocol unreachable timer. Trigger
405 * the correct state machine transition that will close the association. 420 * the correct state machine transition that will close the association.
406 */ 421 */
407void sctp_generate_proto_unreach_event(unsigned long data) 422void sctp_generate_proto_unreach_event(struct timer_list *t)
408{ 423{
409 struct sctp_transport *transport = (struct sctp_transport *)data; 424 struct sctp_transport *transport =
425 from_timer(transport, t, proto_unreach_timer);
410 struct sctp_association *asoc = transport->asoc; 426 struct sctp_association *asoc = transport->asoc;
411 struct sock *sk = asoc->base.sk; 427 struct sock *sk = asoc->base.sk;
412 struct net *net = sock_net(sk); 428 struct net *net = sock_net(sk);
@@ -438,9 +454,10 @@ out_unlock:
438} 454}
439 455
440 /* Handle the timeout of the RE-CONFIG timer. */ 456 /* Handle the timeout of the RE-CONFIG timer. */
441void sctp_generate_reconf_event(unsigned long data) 457void sctp_generate_reconf_event(struct timer_list *t)
442{ 458{
443 struct sctp_transport *transport = (struct sctp_transport *)data; 459 struct sctp_transport *transport =
460 from_timer(transport, t, reconf_timer);
444 struct sctp_association *asoc = transport->asoc; 461 struct sctp_association *asoc = transport->asoc;
445 struct sock *sk = asoc->base.sk; 462 struct sock *sk = asoc->base.sk;
446 struct net *net = sock_net(sk); 463 struct net *net = sock_net(sk);
@@ -470,24 +487,27 @@ out_unlock:
470} 487}
471 488
472/* Inject a SACK Timeout event into the state machine. */ 489/* Inject a SACK Timeout event into the state machine. */
473static void sctp_generate_sack_event(unsigned long data) 490static void sctp_generate_sack_event(struct timer_list *t)
474{ 491{
475 struct sctp_association *asoc = (struct sctp_association *)data; 492 struct sctp_association *asoc =
493 from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_SACK]);
494
476 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK); 495 sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
477} 496}
478 497
479sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { 498sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
480 NULL, 499 [SCTP_EVENT_TIMEOUT_NONE] = NULL,
481 sctp_generate_t1_cookie_event, 500 [SCTP_EVENT_TIMEOUT_T1_COOKIE] = sctp_generate_t1_cookie_event,
482 sctp_generate_t1_init_event, 501 [SCTP_EVENT_TIMEOUT_T1_INIT] = sctp_generate_t1_init_event,
483 sctp_generate_t2_shutdown_event, 502 [SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = sctp_generate_t2_shutdown_event,
484 NULL, 503 [SCTP_EVENT_TIMEOUT_T3_RTX] = NULL,
485 sctp_generate_t4_rto_event, 504 [SCTP_EVENT_TIMEOUT_T4_RTO] = sctp_generate_t4_rto_event,
486 sctp_generate_t5_shutdown_guard_event, 505 [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] =
487 NULL, 506 sctp_generate_t5_shutdown_guard_event,
488 NULL, 507 [SCTP_EVENT_TIMEOUT_HEARTBEAT] = NULL,
489 sctp_generate_sack_event, 508 [SCTP_EVENT_TIMEOUT_RECONF] = NULL,
490 sctp_generate_autoclose_event, 509 [SCTP_EVENT_TIMEOUT_SACK] = sctp_generate_sack_event,
510 [SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sctp_generate_autoclose_event,
491}; 511};
492 512
493 513
@@ -1089,6 +1109,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
1089 1109
1090 list_for_each_entry(chunk, &msg->chunks, frag_list) 1110 list_for_each_entry(chunk, &msg->chunks, frag_list)
1091 sctp_outq_tail(&asoc->outqueue, chunk, gfp); 1111 sctp_outq_tail(&asoc->outqueue, chunk, gfp);
1112
1113 asoc->outqueue.sched->enqueue(&asoc->outqueue, msg);
1092} 1114}
1093 1115
1094 1116
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6f45d1713452..3204a9b29407 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -79,12 +79,13 @@
79#include <net/sock.h> 79#include <net/sock.h>
80#include <net/sctp/sctp.h> 80#include <net/sctp/sctp.h>
81#include <net/sctp/sm.h> 81#include <net/sctp/sm.h>
82#include <net/sctp/stream_sched.h>
82 83
83/* Forward declarations for internal helper functions. */ 84/* Forward declarations for internal helper functions. */
84static int sctp_writeable(struct sock *sk); 85static int sctp_writeable(struct sock *sk);
85static void sctp_wfree(struct sk_buff *skb); 86static void sctp_wfree(struct sk_buff *skb);
86static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, 87static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
87 size_t msg_len); 88 size_t msg_len, struct sock **orig_sk);
88static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); 89static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
89static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); 90static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
90static int sctp_wait_for_accept(struct sock *sk, long timeo); 91static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1957,14 +1958,28 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
1957 goto out_free; 1958 goto out_free;
1958 } 1959 }
1959 1960
1961 /* Allocate sctp_stream_out_ext if not already done */
1962 if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
1963 err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
1964 if (err)
1965 goto out_free;
1966 }
1967
1960 if (sctp_wspace(asoc) < msg_len) 1968 if (sctp_wspace(asoc) < msg_len)
1961 sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); 1969 sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
1962 1970
1963 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1971 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1964 if (!sctp_wspace(asoc)) { 1972 if (!sctp_wspace(asoc)) {
1965 err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); 1973 /* sk can be changed by peel off when waiting for buf. */
1966 if (err) 1974 err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
1975 if (err) {
1976 if (err == -ESRCH) {
1977 /* asoc is already dead. */
1978 new_asoc = NULL;
1979 err = -EPIPE;
1980 }
1967 goto out_free; 1981 goto out_free;
1982 }
1968 } 1983 }
1969 1984
1970 /* If an address is passed with the sendto/sendmsg call, it is used 1985 /* If an address is passed with the sendto/sendmsg call, it is used
@@ -3125,9 +3140,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
3125 */ 3140 */
3126static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen) 3141static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
3127{ 3142{
3143 struct sctp_sock *sp = sctp_sk(sk);
3128 struct sctp_assoc_value params; 3144 struct sctp_assoc_value params;
3129 struct sctp_association *asoc; 3145 struct sctp_association *asoc;
3130 struct sctp_sock *sp = sctp_sk(sk);
3131 int val; 3146 int val;
3132 3147
3133 if (optlen == sizeof(int)) { 3148 if (optlen == sizeof(int)) {
@@ -3143,26 +3158,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
3143 if (copy_from_user(&params, optval, optlen)) 3158 if (copy_from_user(&params, optval, optlen))
3144 return -EFAULT; 3159 return -EFAULT;
3145 val = params.assoc_value; 3160 val = params.assoc_value;
3146 } else 3161 } else {
3147 return -EINVAL; 3162 return -EINVAL;
3163 }
3148 3164
3149 if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))) 3165 if (val) {
3150 return -EINVAL; 3166 int min_len, max_len;
3151 3167
3152 asoc = sctp_id2assoc(sk, params.assoc_id); 3168 min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
3153 if (!asoc && params.assoc_id && sctp_style(sk, UDP)) 3169 min_len -= sizeof(struct sctphdr) +
3154 return -EINVAL; 3170 sizeof(struct sctp_data_chunk);
3171
3172 max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
3173
3174 if (val < min_len || val > max_len)
3175 return -EINVAL;
3176 }
3155 3177
3178 asoc = sctp_id2assoc(sk, params.assoc_id);
3156 if (asoc) { 3179 if (asoc) {
3157 if (val == 0) { 3180 if (val == 0) {
3158 val = asoc->pathmtu; 3181 val = asoc->pathmtu - sp->pf->af->net_header_len;
3159 val -= sp->pf->af->net_header_len;
3160 val -= sizeof(struct sctphdr) + 3182 val -= sizeof(struct sctphdr) +
3161 sizeof(struct sctp_data_chunk); 3183 sizeof(struct sctp_data_chunk);
3162 } 3184 }
3163 asoc->user_frag = val; 3185 asoc->user_frag = val;
3164 asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); 3186 asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
3165 } else { 3187 } else {
3188 if (params.assoc_id && sctp_style(sk, UDP))
3189 return -EINVAL;
3166 sp->user_frag = val; 3190 sp->user_frag = val;
3167 } 3191 }
3168 3192
@@ -3937,6 +3961,64 @@ out:
3937 return retval; 3961 return retval;
3938} 3962}
3939 3963
3964static int sctp_setsockopt_scheduler(struct sock *sk,
3965 char __user *optval,
3966 unsigned int optlen)
3967{
3968 struct sctp_association *asoc;
3969 struct sctp_assoc_value params;
3970 int retval = -EINVAL;
3971
3972 if (optlen < sizeof(params))
3973 goto out;
3974
3975 optlen = sizeof(params);
3976 if (copy_from_user(&params, optval, optlen)) {
3977 retval = -EFAULT;
3978 goto out;
3979 }
3980
3981 if (params.assoc_value > SCTP_SS_MAX)
3982 goto out;
3983
3984 asoc = sctp_id2assoc(sk, params.assoc_id);
3985 if (!asoc)
3986 goto out;
3987
3988 retval = sctp_sched_set_sched(asoc, params.assoc_value);
3989
3990out:
3991 return retval;
3992}
3993
3994static int sctp_setsockopt_scheduler_value(struct sock *sk,
3995 char __user *optval,
3996 unsigned int optlen)
3997{
3998 struct sctp_association *asoc;
3999 struct sctp_stream_value params;
4000 int retval = -EINVAL;
4001
4002 if (optlen < sizeof(params))
4003 goto out;
4004
4005 optlen = sizeof(params);
4006 if (copy_from_user(&params, optval, optlen)) {
4007 retval = -EFAULT;
4008 goto out;
4009 }
4010
4011 asoc = sctp_id2assoc(sk, params.assoc_id);
4012 if (!asoc)
4013 goto out;
4014
4015 retval = sctp_sched_set_value(asoc, params.stream_id,
4016 params.stream_value, GFP_KERNEL);
4017
4018out:
4019 return retval;
4020}
4021
3940/* API 6.2 setsockopt(), getsockopt() 4022/* API 6.2 setsockopt(), getsockopt()
3941 * 4023 *
3942 * Applications use setsockopt() and getsockopt() to set or retrieve 4024 * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4118,6 +4200,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
4118 case SCTP_ADD_STREAMS: 4200 case SCTP_ADD_STREAMS:
4119 retval = sctp_setsockopt_add_streams(sk, optval, optlen); 4201 retval = sctp_setsockopt_add_streams(sk, optval, optlen);
4120 break; 4202 break;
4203 case SCTP_STREAM_SCHEDULER:
4204 retval = sctp_setsockopt_scheduler(sk, optval, optlen);
4205 break;
4206 case SCTP_STREAM_SCHEDULER_VALUE:
4207 retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
4208 break;
4121 default: 4209 default:
4122 retval = -ENOPROTOOPT; 4210 retval = -ENOPROTOOPT;
4123 break; 4211 break;
@@ -4943,12 +5031,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
4943 if (!asoc) 5031 if (!asoc)
4944 return -EINVAL; 5032 return -EINVAL;
4945 5033
4946 /* If there is a thread waiting on more sndbuf space for
4947 * sending on this asoc, it cannot be peeled.
4948 */
4949 if (waitqueue_active(&asoc->wait))
4950 return -EBUSY;
4951
4952 /* An association cannot be branched off from an already peeled-off 5034 /* An association cannot be branched off from an already peeled-off
4953 * socket, nor is this supported for tcp style sockets. 5035 * socket, nor is this supported for tcp style sockets.
4954 */ 5036 */
@@ -6679,7 +6761,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
6679 char __user *optval, 6761 char __user *optval,
6680 int __user *optlen) 6762 int __user *optlen)
6681{ 6763{
6682 struct sctp_stream_out *streamout; 6764 struct sctp_stream_out_ext *streamoute;
6683 struct sctp_association *asoc; 6765 struct sctp_association *asoc;
6684 struct sctp_prstatus params; 6766 struct sctp_prstatus params;
6685 int retval = -EINVAL; 6767 int retval = -EINVAL;
@@ -6702,21 +6784,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
6702 if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) 6784 if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
6703 goto out; 6785 goto out;
6704 6786
6705 streamout = &asoc->stream.out[params.sprstat_sid]; 6787 streamoute = asoc->stream.out[params.sprstat_sid].ext;
6788 if (!streamoute) {
6789 /* Not allocated yet, means all stats are 0 */
6790 params.sprstat_abandoned_unsent = 0;
6791 params.sprstat_abandoned_sent = 0;
6792 retval = 0;
6793 goto out;
6794 }
6795
6706 if (policy == SCTP_PR_SCTP_NONE) { 6796 if (policy == SCTP_PR_SCTP_NONE) {
6707 params.sprstat_abandoned_unsent = 0; 6797 params.sprstat_abandoned_unsent = 0;
6708 params.sprstat_abandoned_sent = 0; 6798 params.sprstat_abandoned_sent = 0;
6709 for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { 6799 for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
6710 params.sprstat_abandoned_unsent += 6800 params.sprstat_abandoned_unsent +=
6711 streamout->abandoned_unsent[policy]; 6801 streamoute->abandoned_unsent[policy];
6712 params.sprstat_abandoned_sent += 6802 params.sprstat_abandoned_sent +=
6713 streamout->abandoned_sent[policy]; 6803 streamoute->abandoned_sent[policy];
6714 } 6804 }
6715 } else { 6805 } else {
6716 params.sprstat_abandoned_unsent = 6806 params.sprstat_abandoned_unsent =
6717 streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; 6807 streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)];
6718 params.sprstat_abandoned_sent = 6808 params.sprstat_abandoned_sent =
6719 streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; 6809 streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)];
6720 } 6810 }
6721 6811
6722 if (put_user(len, optlen) || copy_to_user(optval, &params, len)) { 6812 if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
@@ -6812,6 +6902,85 @@ out:
6812 return retval; 6902 return retval;
6813} 6903}
6814 6904
6905static int sctp_getsockopt_scheduler(struct sock *sk, int len,
6906 char __user *optval,
6907 int __user *optlen)
6908{
6909 struct sctp_assoc_value params;
6910 struct sctp_association *asoc;
6911 int retval = -EFAULT;
6912
6913 if (len < sizeof(params)) {
6914 retval = -EINVAL;
6915 goto out;
6916 }
6917
6918 len = sizeof(params);
6919 if (copy_from_user(&params, optval, len))
6920 goto out;
6921
6922 asoc = sctp_id2assoc(sk, params.assoc_id);
6923 if (!asoc) {
6924 retval = -EINVAL;
6925 goto out;
6926 }
6927
6928 params.assoc_value = sctp_sched_get_sched(asoc);
6929
6930 if (put_user(len, optlen))
6931 goto out;
6932
6933 if (copy_to_user(optval, &params, len))
6934 goto out;
6935
6936 retval = 0;
6937
6938out:
6939 return retval;
6940}
6941
6942static int sctp_getsockopt_scheduler_value(struct sock *sk, int len,
6943 char __user *optval,
6944 int __user *optlen)
6945{
6946 struct sctp_stream_value params;
6947 struct sctp_association *asoc;
6948 int retval = -EFAULT;
6949
6950 if (len < sizeof(params)) {
6951 retval = -EINVAL;
6952 goto out;
6953 }
6954
6955 len = sizeof(params);
6956 if (copy_from_user(&params, optval, len))
6957 goto out;
6958
6959 asoc = sctp_id2assoc(sk, params.assoc_id);
6960 if (!asoc) {
6961 retval = -EINVAL;
6962 goto out;
6963 }
6964
6965 retval = sctp_sched_get_value(asoc, params.stream_id,
6966 &params.stream_value);
6967 if (retval)
6968 goto out;
6969
6970 if (put_user(len, optlen)) {
6971 retval = -EFAULT;
6972 goto out;
6973 }
6974
6975 if (copy_to_user(optval, &params, len)) {
6976 retval = -EFAULT;
6977 goto out;
6978 }
6979
6980out:
6981 return retval;
6982}
6983
6815static int sctp_getsockopt(struct sock *sk, int level, int optname, 6984static int sctp_getsockopt(struct sock *sk, int level, int optname,
6816 char __user *optval, int __user *optlen) 6985 char __user *optval, int __user *optlen)
6817{ 6986{
@@ -6994,6 +7163,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
6994 retval = sctp_getsockopt_enable_strreset(sk, len, optval, 7163 retval = sctp_getsockopt_enable_strreset(sk, len, optval,
6995 optlen); 7164 optlen);
6996 break; 7165 break;
7166 case SCTP_STREAM_SCHEDULER:
7167 retval = sctp_getsockopt_scheduler(sk, len, optval,
7168 optlen);
7169 break;
7170 case SCTP_STREAM_SCHEDULER_VALUE:
7171 retval = sctp_getsockopt_scheduler_value(sk, len, optval,
7172 optlen);
7173 break;
6997 default: 7174 default:
6998 retval = -ENOPROTOOPT; 7175 retval = -ENOPROTOOPT;
6999 break; 7176 break;
@@ -7822,7 +7999,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
7822 7999
7823/* Helper function to wait for space in the sndbuf. */ 8000/* Helper function to wait for space in the sndbuf. */
7824static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, 8001static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
7825 size_t msg_len) 8002 size_t msg_len, struct sock **orig_sk)
7826{ 8003{
7827 struct sock *sk = asoc->base.sk; 8004 struct sock *sk = asoc->base.sk;
7828 int err = 0; 8005 int err = 0;
@@ -7839,10 +8016,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
7839 for (;;) { 8016 for (;;) {
7840 prepare_to_wait_exclusive(&asoc->wait, &wait, 8017 prepare_to_wait_exclusive(&asoc->wait, &wait,
7841 TASK_INTERRUPTIBLE); 8018 TASK_INTERRUPTIBLE);
8019 if (asoc->base.dead)
8020 goto do_dead;
7842 if (!*timeo_p) 8021 if (!*timeo_p)
7843 goto do_nonblock; 8022 goto do_nonblock;
7844 if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING || 8023 if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
7845 asoc->base.dead)
7846 goto do_error; 8024 goto do_error;
7847 if (signal_pending(current)) 8025 if (signal_pending(current))
7848 goto do_interrupted; 8026 goto do_interrupted;
@@ -7855,11 +8033,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
7855 release_sock(sk); 8033 release_sock(sk);
7856 current_timeo = schedule_timeout(current_timeo); 8034 current_timeo = schedule_timeout(current_timeo);
7857 lock_sock(sk); 8035 lock_sock(sk);
8036 if (sk != asoc->base.sk) {
8037 release_sock(sk);
8038 sk = asoc->base.sk;
8039 lock_sock(sk);
8040 }
7858 8041
7859 *timeo_p = current_timeo; 8042 *timeo_p = current_timeo;
7860 } 8043 }
7861 8044
7862out: 8045out:
8046 *orig_sk = sk;
7863 finish_wait(&asoc->wait, &wait); 8047 finish_wait(&asoc->wait, &wait);
7864 8048
7865 /* Release the association's refcnt. */ 8049 /* Release the association's refcnt. */
@@ -7867,6 +8051,10 @@ out:
7867 8051
7868 return err; 8052 return err;
7869 8053
8054do_dead:
8055 err = -ESRCH;
8056 goto out;
8057
7870do_error: 8058do_error:
7871 err = -EPIPE; 8059 err = -EPIPE;
7872 goto out; 8060 goto out;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index fa8371ff05c4..a11db21dc8a0 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -32,44 +32,181 @@
32 * Xin Long <lucien.xin@gmail.com> 32 * Xin Long <lucien.xin@gmail.com>
33 */ 33 */
34 34
35#include <linux/list.h>
35#include <net/sctp/sctp.h> 36#include <net/sctp/sctp.h>
36#include <net/sctp/sm.h> 37#include <net/sctp/sm.h>
38#include <net/sctp/stream_sched.h>
39
40/* Migrates chunks from stream queues to new stream queues if needed,
41 * but not across associations. Also, removes those chunks to streams
42 * higher than the new max.
43 */
44static void sctp_stream_outq_migrate(struct sctp_stream *stream,
45 struct sctp_stream *new, __u16 outcnt)
46{
47 struct sctp_association *asoc;
48 struct sctp_chunk *ch, *temp;
49 struct sctp_outq *outq;
50 int i;
51
52 asoc = container_of(stream, struct sctp_association, stream);
53 outq = &asoc->outqueue;
54
55 list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) {
56 __u16 sid = sctp_chunk_stream_no(ch);
57
58 if (sid < outcnt)
59 continue;
60
61 sctp_sched_dequeue_common(outq, ch);
62 /* No need to call dequeue_done here because
63 * the chunks are not scheduled by now.
64 */
65
66 /* Mark as failed send. */
67 sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM);
68 if (asoc->peer.prsctp_capable &&
69 SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
70 asoc->sent_cnt_removable--;
71
72 sctp_chunk_free(ch);
73 }
74
75 if (new) {
76 /* Here we actually move the old ext stuff into the new
77 * buffer, because we want to keep it. Then
78 * sctp_stream_update will swap ->out pointers.
79 */
80 for (i = 0; i < outcnt; i++) {
81 kfree(new->out[i].ext);
82 new->out[i].ext = stream->out[i].ext;
83 stream->out[i].ext = NULL;
84 }
85 }
86
87 for (i = outcnt; i < stream->outcnt; i++)
88 kfree(stream->out[i].ext);
89}
90
91static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
92 gfp_t gfp)
93{
94 struct sctp_stream_out *out;
95
96 out = kmalloc_array(outcnt, sizeof(*out), gfp);
97 if (!out)
98 return -ENOMEM;
99
100 if (stream->out) {
101 memcpy(out, stream->out, min(outcnt, stream->outcnt) *
102 sizeof(*out));
103 kfree(stream->out);
104 }
105
106 if (outcnt > stream->outcnt)
107 memset(out + stream->outcnt, 0,
108 (outcnt - stream->outcnt) * sizeof(*out));
109
110 stream->out = out;
111
112 return 0;
113}
114
115static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
116 gfp_t gfp)
117{
118 struct sctp_stream_in *in;
119
120 in = kmalloc_array(incnt, sizeof(*stream->in), gfp);
121
122 if (!in)
123 return -ENOMEM;
124
125 if (stream->in) {
126 memcpy(in, stream->in, min(incnt, stream->incnt) *
127 sizeof(*in));
128 kfree(stream->in);
129 }
130
131 if (incnt > stream->incnt)
132 memset(in + stream->incnt, 0,
133 (incnt - stream->incnt) * sizeof(*in));
134
135 stream->in = in;
136
137 return 0;
138}
37 139
38int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, 140int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
39 gfp_t gfp) 141 gfp_t gfp)
40{ 142{
41 int i; 143 struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
144 int i, ret = 0;
145
146 gfp |= __GFP_NOWARN;
42 147
43 /* Initial stream->out size may be very big, so free it and alloc 148 /* Initial stream->out size may be very big, so free it and alloc
44 * a new one with new outcnt to save memory. 149 * a new one with new outcnt to save memory if needed.
45 */ 150 */
46 kfree(stream->out); 151 if (outcnt == stream->outcnt)
152 goto in;
47 153
48 stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); 154 /* Filter out chunks queued on streams that won't exist anymore */
49 if (!stream->out) 155 sched->unsched_all(stream);
50 return -ENOMEM; 156 sctp_stream_outq_migrate(stream, NULL, outcnt);
157 sched->sched_all(stream);
158
159 i = sctp_stream_alloc_out(stream, outcnt, gfp);
160 if (i)
161 return i;
51 162
52 stream->outcnt = outcnt; 163 stream->outcnt = outcnt;
53 for (i = 0; i < stream->outcnt; i++) 164 for (i = 0; i < stream->outcnt; i++)
54 stream->out[i].state = SCTP_STREAM_OPEN; 165 stream->out[i].state = SCTP_STREAM_OPEN;
55 166
167 sched->init(stream);
168
169in:
56 if (!incnt) 170 if (!incnt)
57 return 0; 171 goto out;
58 172
59 stream->in = kcalloc(incnt, sizeof(*stream->in), gfp); 173 i = sctp_stream_alloc_in(stream, incnt, gfp);
60 if (!stream->in) { 174 if (i) {
61 kfree(stream->out); 175 ret = -ENOMEM;
62 stream->out = NULL; 176 goto free;
63 return -ENOMEM;
64 } 177 }
65 178
66 stream->incnt = incnt; 179 stream->incnt = incnt;
180 goto out;
67 181
68 return 0; 182free:
183 sched->free(stream);
184 kfree(stream->out);
185 stream->out = NULL;
186out:
187 return ret;
188}
189
190int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
191{
192 struct sctp_stream_out_ext *soute;
193
194 soute = kzalloc(sizeof(*soute), GFP_KERNEL);
195 if (!soute)
196 return -ENOMEM;
197 stream->out[sid].ext = soute;
198
199 return sctp_sched_init_sid(stream, sid, GFP_KERNEL);
69} 200}
70 201
71void sctp_stream_free(struct sctp_stream *stream) 202void sctp_stream_free(struct sctp_stream *stream)
72{ 203{
204 struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
205 int i;
206
207 sched->free(stream);
208 for (i = 0; i < stream->outcnt; i++)
209 kfree(stream->out[i].ext);
73 kfree(stream->out); 210 kfree(stream->out);
74 kfree(stream->in); 211 kfree(stream->in);
75} 212}
@@ -87,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream)
87 224
88void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) 225void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
89{ 226{
227 struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
228
229 sched->unsched_all(stream);
230 sctp_stream_outq_migrate(stream, new, new->outcnt);
90 sctp_stream_free(stream); 231 sctp_stream_free(stream);
91 232
92 stream->out = new->out; 233 stream->out = new->out;
@@ -94,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
94 stream->outcnt = new->outcnt; 235 stream->outcnt = new->outcnt;
95 stream->incnt = new->incnt; 236 stream->incnt = new->incnt;
96 237
238 sched->sched_all(stream);
239
97 new->out = NULL; 240 new->out = NULL;
98 new->in = NULL; 241 new->in = NULL;
99} 242}
@@ -139,15 +282,31 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
139 282
140 str_nums = params->srs_number_streams; 283 str_nums = params->srs_number_streams;
141 str_list = params->srs_stream_list; 284 str_list = params->srs_stream_list;
142 if (out && str_nums) 285 if (str_nums) {
143 for (i = 0; i < str_nums; i++) 286 int param_len = 0;
144 if (str_list[i] >= stream->outcnt)
145 goto out;
146 287
147 if (in && str_nums) 288 if (out) {
148 for (i = 0; i < str_nums; i++) 289 for (i = 0; i < str_nums; i++)
149 if (str_list[i] >= stream->incnt) 290 if (str_list[i] >= stream->outcnt)
150 goto out; 291 goto out;
292
293 param_len = str_nums * sizeof(__u16) +
294 sizeof(struct sctp_strreset_outreq);
295 }
296
297 if (in) {
298 for (i = 0; i < str_nums; i++)
299 if (str_list[i] >= stream->incnt)
300 goto out;
301
302 param_len += str_nums * sizeof(__u16) +
303 sizeof(struct sctp_strreset_inreq);
304 }
305
306 if (param_len > SCTP_MAX_CHUNK_LEN -
307 sizeof(struct sctp_reconf_chunk))
308 goto out;
309 }
151 310
152 nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL); 311 nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL);
153 if (!nstr_list) { 312 if (!nstr_list) {
@@ -250,7 +409,7 @@ int sctp_send_add_streams(struct sctp_association *asoc,
250{ 409{
251 struct sctp_stream *stream = &asoc->stream; 410 struct sctp_stream *stream = &asoc->stream;
252 struct sctp_chunk *chunk = NULL; 411 struct sctp_chunk *chunk = NULL;
253 int retval = -ENOMEM; 412 int retval;
254 __u32 outcnt, incnt; 413 __u32 outcnt, incnt;
255 __u16 out, in; 414 __u16 out, in;
256 415
@@ -276,20 +435,16 @@ int sctp_send_add_streams(struct sctp_association *asoc,
276 } 435 }
277 436
278 if (out) { 437 if (out) {
279 struct sctp_stream_out *streamout; 438 retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL);
280 439 if (retval)
281 streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
282 GFP_KERNEL);
283 if (!streamout)
284 goto out; 440 goto out;
285
286 memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
287 stream->out = streamout;
288 } 441 }
289 442
290 chunk = sctp_make_strreset_addstrm(asoc, out, in); 443 chunk = sctp_make_strreset_addstrm(asoc, out, in);
291 if (!chunk) 444 if (!chunk) {
445 retval = -ENOMEM;
292 goto out; 446 goto out;
447 }
293 448
294 asoc->strreset_chunk = chunk; 449 asoc->strreset_chunk = chunk;
295 sctp_chunk_hold(asoc->strreset_chunk); 450 sctp_chunk_hold(asoc->strreset_chunk);
@@ -609,7 +764,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
609 struct sctp_strreset_addstrm *addstrm = param.v; 764 struct sctp_strreset_addstrm *addstrm = param.v;
610 struct sctp_stream *stream = &asoc->stream; 765 struct sctp_stream *stream = &asoc->stream;
611 __u32 result = SCTP_STRRESET_DENIED; 766 __u32 result = SCTP_STRRESET_DENIED;
612 struct sctp_stream_in *streamin;
613 __u32 request_seq, incnt; 767 __u32 request_seq, incnt;
614 __u16 in, i; 768 __u16 in, i;
615 769
@@ -656,13 +810,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
656 if (!in || incnt > SCTP_MAX_STREAM) 810 if (!in || incnt > SCTP_MAX_STREAM)
657 goto out; 811 goto out;
658 812
659 streamin = krealloc(stream->in, incnt * sizeof(*streamin), 813 if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
660 GFP_ATOMIC);
661 if (!streamin)
662 goto out; 814 goto out;
663 815
664 memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
665 stream->in = streamin;
666 stream->incnt = incnt; 816 stream->incnt = incnt;
667 817
668 result = SCTP_STRRESET_PERFORMED; 818 result = SCTP_STRRESET_PERFORMED;
@@ -684,10 +834,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
684 struct sctp_strreset_addstrm *addstrm = param.v; 834 struct sctp_strreset_addstrm *addstrm = param.v;
685 struct sctp_stream *stream = &asoc->stream; 835 struct sctp_stream *stream = &asoc->stream;
686 __u32 result = SCTP_STRRESET_DENIED; 836 __u32 result = SCTP_STRRESET_DENIED;
687 struct sctp_stream_out *streamout;
688 struct sctp_chunk *chunk = NULL; 837 struct sctp_chunk *chunk = NULL;
689 __u32 request_seq, outcnt; 838 __u32 request_seq, outcnt;
690 __u16 out, i; 839 __u16 out, i;
840 int ret;
691 841
692 request_seq = ntohl(addstrm->request_seq); 842 request_seq = ntohl(addstrm->request_seq);
693 if (TSN_lt(asoc->strreset_inseq, request_seq) || 843 if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -716,14 +866,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
716 if (!out || outcnt > SCTP_MAX_STREAM) 866 if (!out || outcnt > SCTP_MAX_STREAM)
717 goto out; 867 goto out;
718 868
719 streamout = krealloc(stream->out, outcnt * sizeof(*streamout), 869 ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC);
720 GFP_ATOMIC); 870 if (ret)
721 if (!streamout)
722 goto out; 871 goto out;
723 872
724 memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
725 stream->out = streamout;
726
727 chunk = sctp_make_strreset_addstrm(asoc, out, 0); 873 chunk = sctp_make_strreset_addstrm(asoc, out, 0);
728 if (!chunk) 874 if (!chunk)
729 goto out; 875 goto out;
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
new file mode 100644
index 000000000000..0b83ec51e43b
--- /dev/null
+++ b/net/sctp/stream_sched.c
@@ -0,0 +1,275 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions manipulate sctp stream queue/scheduling.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
30 */
31
32#include <linux/list.h>
33#include <net/sctp/sctp.h>
34#include <net/sctp/sm.h>
35#include <net/sctp/stream_sched.h>
36
37/* First Come First Serve (a.k.a. FIFO)
38 * RFC DRAFT ndata Section 3.1
39 */
40static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid,
41 __u16 value, gfp_t gfp)
42{
43 return 0;
44}
45
46static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid,
47 __u16 *value)
48{
49 *value = 0;
50 return 0;
51}
52
53static int sctp_sched_fcfs_init(struct sctp_stream *stream)
54{
55 return 0;
56}
57
58static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
59 gfp_t gfp)
60{
61 return 0;
62}
63
64static void sctp_sched_fcfs_free(struct sctp_stream *stream)
65{
66}
67
68static void sctp_sched_fcfs_enqueue(struct sctp_outq *q,
69 struct sctp_datamsg *msg)
70{
71}
72
73static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q)
74{
75 struct sctp_stream *stream = &q->asoc->stream;
76 struct sctp_chunk *ch = NULL;
77 struct list_head *entry;
78
79 if (list_empty(&q->out_chunk_list))
80 goto out;
81
82 if (stream->out_curr) {
83 ch = list_entry(stream->out_curr->ext->outq.next,
84 struct sctp_chunk, stream_list);
85 } else {
86 entry = q->out_chunk_list.next;
87 ch = list_entry(entry, struct sctp_chunk, list);
88 }
89
90 sctp_sched_dequeue_common(q, ch);
91
92out:
93 return ch;
94}
95
96static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q,
97 struct sctp_chunk *chunk)
98{
99}
100
101static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream)
102{
103}
104
105static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream)
106{
107}
108
109static struct sctp_sched_ops sctp_sched_fcfs = {
110 .set = sctp_sched_fcfs_set,
111 .get = sctp_sched_fcfs_get,
112 .init = sctp_sched_fcfs_init,
113 .init_sid = sctp_sched_fcfs_init_sid,
114 .free = sctp_sched_fcfs_free,
115 .enqueue = sctp_sched_fcfs_enqueue,
116 .dequeue = sctp_sched_fcfs_dequeue,
117 .dequeue_done = sctp_sched_fcfs_dequeue_done,
118 .sched_all = sctp_sched_fcfs_sched_all,
119 .unsched_all = sctp_sched_fcfs_unsched_all,
120};
121
122/* API to other parts of the stack */
123
124extern struct sctp_sched_ops sctp_sched_prio;
125extern struct sctp_sched_ops sctp_sched_rr;
126
127static struct sctp_sched_ops *sctp_sched_ops[] = {
128 &sctp_sched_fcfs,
129 &sctp_sched_prio,
130 &sctp_sched_rr,
131};
132
133int sctp_sched_set_sched(struct sctp_association *asoc,
134 enum sctp_sched_type sched)
135{
136 struct sctp_sched_ops *n = sctp_sched_ops[sched];
137 struct sctp_sched_ops *old = asoc->outqueue.sched;
138 struct sctp_datamsg *msg = NULL;
139 struct sctp_chunk *ch;
140 int i, ret = 0;
141
142 if (old == n)
143 return ret;
144
145 if (sched > SCTP_SS_MAX)
146 return -EINVAL;
147
148 if (old) {
149 old->free(&asoc->stream);
150
151 /* Give the next scheduler a clean slate. */
152 for (i = 0; i < asoc->stream.outcnt; i++) {
153 void *p = asoc->stream.out[i].ext;
154
155 if (!p)
156 continue;
157
158 p += offsetofend(struct sctp_stream_out_ext, outq);
159 memset(p, 0, sizeof(struct sctp_stream_out_ext) -
160 offsetofend(struct sctp_stream_out_ext, outq));
161 }
162 }
163
164 asoc->outqueue.sched = n;
165 n->init(&asoc->stream);
166 for (i = 0; i < asoc->stream.outcnt; i++) {
167 if (!asoc->stream.out[i].ext)
168 continue;
169
170 ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
171 if (ret)
172 goto err;
173 }
174
175 /* We have to requeue all chunks already queued. */
176 list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
177 if (ch->msg == msg)
178 continue;
179 msg = ch->msg;
180 n->enqueue(&asoc->outqueue, msg);
181 }
182
183 return ret;
184
185err:
186 n->free(&asoc->stream);
187 asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */
188
189 return ret;
190}
191
192int sctp_sched_get_sched(struct sctp_association *asoc)
193{
194 int i;
195
196 for (i = 0; i <= SCTP_SS_MAX; i++)
197 if (asoc->outqueue.sched == sctp_sched_ops[i])
198 return i;
199
200 return 0;
201}
202
203int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
204 __u16 value, gfp_t gfp)
205{
206 if (sid >= asoc->stream.outcnt)
207 return -EINVAL;
208
209 if (!asoc->stream.out[sid].ext) {
210 int ret;
211
212 ret = sctp_stream_init_ext(&asoc->stream, sid);
213 if (ret)
214 return ret;
215 }
216
217 return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp);
218}
219
220int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
221 __u16 *value)
222{
223 if (sid >= asoc->stream.outcnt)
224 return -EINVAL;
225
226 if (!asoc->stream.out[sid].ext)
227 return 0;
228
229 return asoc->outqueue.sched->get(&asoc->stream, sid, value);
230}
231
232void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
233{
234 if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) {
235 struct sctp_stream_out *sout;
236 __u16 sid;
237
238 /* datamsg is not finish, so save it as current one,
239 * in case application switch scheduler or a higher
240 * priority stream comes in.
241 */
242 sid = sctp_chunk_stream_no(ch);
243 sout = &q->asoc->stream.out[sid];
244 q->asoc->stream.out_curr = sout;
245 return;
246 }
247
248 q->asoc->stream.out_curr = NULL;
249 q->sched->dequeue_done(q, ch);
250}
251
252/* Auxiliary functions for the schedulers */
253void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch)
254{
255 list_del_init(&ch->list);
256 list_del_init(&ch->stream_list);
257 q->out_qlen -= ch->skb->len;
258}
259
260int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp)
261{
262 struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
263
264 INIT_LIST_HEAD(&stream->out[sid].ext->outq);
265 return sched->init_sid(stream, sid, gfp);
266}
267
268struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream)
269{
270 struct sctp_association *asoc;
271
272 asoc = container_of(stream, struct sctp_association, stream);
273
274 return asoc->outqueue.sched;
275}
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
new file mode 100644
index 000000000000..384dbf3c8760
--- /dev/null
+++ b/net/sctp/stream_sched_prio.c
@@ -0,0 +1,347 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions manipulate sctp stream queue/scheduling.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
30 */
31
32#include <linux/list.h>
33#include <net/sctp/sctp.h>
34#include <net/sctp/sm.h>
35#include <net/sctp/stream_sched.h>
36
37/* Priority handling
38 * RFC DRAFT ndata section 3.4
39 */
40
41static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
42
43static struct sctp_stream_priorities *sctp_sched_prio_new_head(
44 struct sctp_stream *stream, int prio, gfp_t gfp)
45{
46 struct sctp_stream_priorities *p;
47
48 p = kmalloc(sizeof(*p), gfp);
49 if (!p)
50 return NULL;
51
52 INIT_LIST_HEAD(&p->prio_sched);
53 INIT_LIST_HEAD(&p->active);
54 p->next = NULL;
55 p->prio = prio;
56
57 return p;
58}
59
60static struct sctp_stream_priorities *sctp_sched_prio_get_head(
61 struct sctp_stream *stream, int prio, gfp_t gfp)
62{
63 struct sctp_stream_priorities *p;
64 int i;
65
66 /* Look into scheduled priorities first, as they are sorted and
67 * we can find it fast IF it's scheduled.
68 */
69 list_for_each_entry(p, &stream->prio_list, prio_sched) {
70 if (p->prio == prio)
71 return p;
72 if (p->prio > prio)
73 break;
74 }
75
76 /* No luck. So we search on all streams now. */
77 for (i = 0; i < stream->outcnt; i++) {
78 if (!stream->out[i].ext)
79 continue;
80
81 p = stream->out[i].ext->prio_head;
82 if (!p)
83 /* Means all other streams won't be initialized
84 * as well.
85 */
86 break;
87 if (p->prio == prio)
88 return p;
89 }
90
91 /* If not even there, allocate a new one. */
92 return sctp_sched_prio_new_head(stream, prio, gfp);
93}
94
95static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p)
96{
97 struct list_head *pos;
98
99 pos = p->next->prio_list.next;
100 if (pos == &p->active)
101 pos = pos->next;
102 p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list);
103}
104
105static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute)
106{
107 bool scheduled = false;
108
109 if (!list_empty(&soute->prio_list)) {
110 struct sctp_stream_priorities *prio_head = soute->prio_head;
111
112 /* Scheduled */
113 scheduled = true;
114
115 if (prio_head->next == soute)
116 /* Try to move to the next stream */
117 sctp_sched_prio_next_stream(prio_head);
118
119 list_del_init(&soute->prio_list);
120
121 /* Also unsched the priority if this was the last stream */
122 if (list_empty(&prio_head->active)) {
123 list_del_init(&prio_head->prio_sched);
124 /* If there is no stream left, clear next */
125 prio_head->next = NULL;
126 }
127 }
128
129 return scheduled;
130}
131
132static void sctp_sched_prio_sched(struct sctp_stream *stream,
133 struct sctp_stream_out_ext *soute)
134{
135 struct sctp_stream_priorities *prio, *prio_head;
136
137 prio_head = soute->prio_head;
138
139 /* Nothing to do if already scheduled */
140 if (!list_empty(&soute->prio_list))
141 return;
142
143 /* Schedule the stream. If there is a next, we schedule the new
144 * one before it, so it's the last in round robin order.
145 * If there isn't, we also have to schedule the priority.
146 */
147 if (prio_head->next) {
148 list_add(&soute->prio_list, prio_head->next->prio_list.prev);
149 return;
150 }
151
152 list_add(&soute->prio_list, &prio_head->active);
153 prio_head->next = soute;
154
155 list_for_each_entry(prio, &stream->prio_list, prio_sched) {
156 if (prio->prio > prio_head->prio) {
157 list_add(&prio_head->prio_sched, prio->prio_sched.prev);
158 return;
159 }
160 }
161
162 list_add_tail(&prio_head->prio_sched, &stream->prio_list);
163}
164
165static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
166 __u16 prio, gfp_t gfp)
167{
168 struct sctp_stream_out *sout = &stream->out[sid];
169 struct sctp_stream_out_ext *soute = sout->ext;
170 struct sctp_stream_priorities *prio_head, *old;
171 bool reschedule = false;
172 int i;
173
174 prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
175 if (!prio_head)
176 return -ENOMEM;
177
178 reschedule = sctp_sched_prio_unsched(soute);
179 old = soute->prio_head;
180 soute->prio_head = prio_head;
181 if (reschedule)
182 sctp_sched_prio_sched(stream, soute);
183
184 if (!old)
185 /* Happens when we set the priority for the first time */
186 return 0;
187
188 for (i = 0; i < stream->outcnt; i++) {
189 soute = stream->out[i].ext;
190 if (soute && soute->prio_head == old)
191 /* It's still in use, nothing else to do here. */
192 return 0;
193 }
194
195 /* No hits, we are good to free it. */
196 kfree(old);
197
198 return 0;
199}
200
201static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid,
202 __u16 *value)
203{
204 *value = stream->out[sid].ext->prio_head->prio;
205 return 0;
206}
207
208static int sctp_sched_prio_init(struct sctp_stream *stream)
209{
210 INIT_LIST_HEAD(&stream->prio_list);
211
212 return 0;
213}
214
215static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
216 gfp_t gfp)
217{
218 INIT_LIST_HEAD(&stream->out[sid].ext->prio_list);
219 return sctp_sched_prio_set(stream, sid, 0, gfp);
220}
221
222static void sctp_sched_prio_free(struct sctp_stream *stream)
223{
224 struct sctp_stream_priorities *prio, *n;
225 LIST_HEAD(list);
226 int i;
227
228 /* As we don't keep a list of priorities, to avoid multiple
229 * frees we have to do it in 3 steps:
230 * 1. unsched everyone, so the lists are free to use in 2.
231 * 2. build the list of the priorities
232 * 3. free the list
233 */
234 sctp_sched_prio_unsched_all(stream);
235 for (i = 0; i < stream->outcnt; i++) {
236 if (!stream->out[i].ext)
237 continue;
238 prio = stream->out[i].ext->prio_head;
239 if (prio && list_empty(&prio->prio_sched))
240 list_add(&prio->prio_sched, &list);
241 }
242 list_for_each_entry_safe(prio, n, &list, prio_sched) {
243 list_del_init(&prio->prio_sched);
244 kfree(prio);
245 }
246}
247
248static void sctp_sched_prio_enqueue(struct sctp_outq *q,
249 struct sctp_datamsg *msg)
250{
251 struct sctp_stream *stream;
252 struct sctp_chunk *ch;
253 __u16 sid;
254
255 ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
256 sid = sctp_chunk_stream_no(ch);
257 stream = &q->asoc->stream;
258 sctp_sched_prio_sched(stream, stream->out[sid].ext);
259}
260
261static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q)
262{
263 struct sctp_stream *stream = &q->asoc->stream;
264 struct sctp_stream_priorities *prio;
265 struct sctp_stream_out_ext *soute;
266 struct sctp_chunk *ch = NULL;
267
268 /* Bail out quickly if queue is empty */
269 if (list_empty(&q->out_chunk_list))
270 goto out;
271
272 /* Find which chunk is next. It's easy, it's either the current
273 * one or the first chunk on the next active stream.
274 */
275 if (stream->out_curr) {
276 soute = stream->out_curr->ext;
277 } else {
278 prio = list_entry(stream->prio_list.next,
279 struct sctp_stream_priorities, prio_sched);
280 soute = prio->next;
281 }
282 ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
283 sctp_sched_dequeue_common(q, ch);
284
285out:
286 return ch;
287}
288
289static void sctp_sched_prio_dequeue_done(struct sctp_outq *q,
290 struct sctp_chunk *ch)
291{
292 struct sctp_stream_priorities *prio;
293 struct sctp_stream_out_ext *soute;
294 __u16 sid;
295
296 /* Last chunk on that msg, move to the next stream on
297 * this priority.
298 */
299 sid = sctp_chunk_stream_no(ch);
300 soute = q->asoc->stream.out[sid].ext;
301 prio = soute->prio_head;
302
303 sctp_sched_prio_next_stream(prio);
304
305 if (list_empty(&soute->outq))
306 sctp_sched_prio_unsched(soute);
307}
308
309static void sctp_sched_prio_sched_all(struct sctp_stream *stream)
310{
311 struct sctp_association *asoc;
312 struct sctp_stream_out *sout;
313 struct sctp_chunk *ch;
314
315 asoc = container_of(stream, struct sctp_association, stream);
316 list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
317 __u16 sid;
318
319 sid = sctp_chunk_stream_no(ch);
320 sout = &stream->out[sid];
321 if (sout->ext)
322 sctp_sched_prio_sched(stream, sout->ext);
323 }
324}
325
326static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
327{
328 struct sctp_stream_priorities *p, *tmp;
329 struct sctp_stream_out_ext *soute, *souttmp;
330
331 list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched)
332 list_for_each_entry_safe(soute, souttmp, &p->active, prio_list)
333 sctp_sched_prio_unsched(soute);
334}
335
336struct sctp_sched_ops sctp_sched_prio = {
337 .set = sctp_sched_prio_set,
338 .get = sctp_sched_prio_get,
339 .init = sctp_sched_prio_init,
340 .init_sid = sctp_sched_prio_init_sid,
341 .free = sctp_sched_prio_free,
342 .enqueue = sctp_sched_prio_enqueue,
343 .dequeue = sctp_sched_prio_dequeue,
344 .dequeue_done = sctp_sched_prio_dequeue_done,
345 .sched_all = sctp_sched_prio_sched_all,
346 .unsched_all = sctp_sched_prio_unsched_all,
347};
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
new file mode 100644
index 000000000000..7612a438c5b9
--- /dev/null
+++ b/net/sctp/stream_sched_rr.c
@@ -0,0 +1,201 @@
1/* SCTP kernel implementation
2 * (C) Copyright Red Hat Inc. 2017
3 *
4 * This file is part of the SCTP kernel implementation
5 *
6 * These functions manipulate sctp stream queue/scheduling.
7 *
8 * This SCTP implementation is free software;
9 * you can redistribute it and/or modify it under the terms of
10 * the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * This SCTP implementation is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * ************************
17 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 * See the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with GNU CC; see the file COPYING. If not, see
22 * <http://www.gnu.org/licenses/>.
23 *
24 * Please send any bug reports or fixes you make to the
25 * email addresched(es):
26 * lksctp developers <linux-sctp@vger.kernel.org>
27 *
28 * Written or modified by:
29 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
30 */
31
32#include <linux/list.h>
33#include <net/sctp/sctp.h>
34#include <net/sctp/sm.h>
35#include <net/sctp/stream_sched.h>
36
37/* Priority handling
38 * RFC DRAFT ndata section 3.2
39 */
40static void sctp_sched_rr_unsched_all(struct sctp_stream *stream);
41
42static void sctp_sched_rr_next_stream(struct sctp_stream *stream)
43{
44 struct list_head *pos;
45
46 pos = stream->rr_next->rr_list.next;
47 if (pos == &stream->rr_list)
48 pos = pos->next;
49 stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list);
50}
51
52static void sctp_sched_rr_unsched(struct sctp_stream *stream,
53 struct sctp_stream_out_ext *soute)
54{
55 if (stream->rr_next == soute)
56 /* Try to move to the next stream */
57 sctp_sched_rr_next_stream(stream);
58
59 list_del_init(&soute->rr_list);
60
61 /* If we have no other stream queued, clear next */
62 if (list_empty(&stream->rr_list))
63 stream->rr_next = NULL;
64}
65
66static void sctp_sched_rr_sched(struct sctp_stream *stream,
67 struct sctp_stream_out_ext *soute)
68{
69 if (!list_empty(&soute->rr_list))
70 /* Already scheduled. */
71 return;
72
73 /* Schedule the stream */
74 list_add_tail(&soute->rr_list, &stream->rr_list);
75
76 if (!stream->rr_next)
77 stream->rr_next = soute;
78}
79
80static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid,
81 __u16 prio, gfp_t gfp)
82{
83 return 0;
84}
85
86static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid,
87 __u16 *value)
88{
89 return 0;
90}
91
92static int sctp_sched_rr_init(struct sctp_stream *stream)
93{
94 INIT_LIST_HEAD(&stream->rr_list);
95 stream->rr_next = NULL;
96
97 return 0;
98}
99
100static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
101 gfp_t gfp)
102{
103 INIT_LIST_HEAD(&stream->out[sid].ext->rr_list);
104
105 return 0;
106}
107
108static void sctp_sched_rr_free(struct sctp_stream *stream)
109{
110 sctp_sched_rr_unsched_all(stream);
111}
112
113static void sctp_sched_rr_enqueue(struct sctp_outq *q,
114 struct sctp_datamsg *msg)
115{
116 struct sctp_stream *stream;
117 struct sctp_chunk *ch;
118 __u16 sid;
119
120 ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
121 sid = sctp_chunk_stream_no(ch);
122 stream = &q->asoc->stream;
123 sctp_sched_rr_sched(stream, stream->out[sid].ext);
124}
125
126static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q)
127{
128 struct sctp_stream *stream = &q->asoc->stream;
129 struct sctp_stream_out_ext *soute;
130 struct sctp_chunk *ch = NULL;
131
132 /* Bail out quickly if queue is empty */
133 if (list_empty(&q->out_chunk_list))
134 goto out;
135
136 /* Find which chunk is next */
137 if (stream->out_curr)
138 soute = stream->out_curr->ext;
139 else
140 soute = stream->rr_next;
141 ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
142
143 sctp_sched_dequeue_common(q, ch);
144
145out:
146 return ch;
147}
148
149static void sctp_sched_rr_dequeue_done(struct sctp_outq *q,
150 struct sctp_chunk *ch)
151{
152 struct sctp_stream_out_ext *soute;
153 __u16 sid;
154
155 /* Last chunk on that msg, move to the next stream */
156 sid = sctp_chunk_stream_no(ch);
157 soute = q->asoc->stream.out[sid].ext;
158
159 sctp_sched_rr_next_stream(&q->asoc->stream);
160
161 if (list_empty(&soute->outq))
162 sctp_sched_rr_unsched(&q->asoc->stream, soute);
163}
164
165static void sctp_sched_rr_sched_all(struct sctp_stream *stream)
166{
167 struct sctp_association *asoc;
168 struct sctp_stream_out_ext *soute;
169 struct sctp_chunk *ch;
170
171 asoc = container_of(stream, struct sctp_association, stream);
172 list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
173 __u16 sid;
174
175 sid = sctp_chunk_stream_no(ch);
176 soute = stream->out[sid].ext;
177 if (soute)
178 sctp_sched_rr_sched(stream, soute);
179 }
180}
181
182static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
183{
184 struct sctp_stream_out_ext *soute, *tmp;
185
186 list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list)
187 sctp_sched_rr_unsched(stream, soute);
188}
189
190struct sctp_sched_ops sctp_sched_rr = {
191 .set = sctp_sched_rr_set,
192 .get = sctp_sched_rr_get,
193 .init = sctp_sched_rr_init,
194 .init_sid = sctp_sched_rr_init_sid,
195 .free = sctp_sched_rr_free,
196 .enqueue = sctp_sched_rr_enqueue,
197 .dequeue = sctp_sched_rr_dequeue,
198 .dequeue_done = sctp_sched_rr_dequeue_done,
199 .sched_all = sctp_sched_rr_sched_all,
200 .unsched_all = sctp_sched_rr_unsched_all,
201};
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2d9bd3776bc8..1e5a22430cf5 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -87,14 +87,11 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
87 INIT_LIST_HEAD(&peer->send_ready); 87 INIT_LIST_HEAD(&peer->send_ready);
88 INIT_LIST_HEAD(&peer->transports); 88 INIT_LIST_HEAD(&peer->transports);
89 89
90 setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 90 timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
91 (unsigned long)peer); 91 timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
92 setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, 92 timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
93 (unsigned long)peer); 93 timer_setup(&peer->proto_unreach_timer,
94 setup_timer(&peer->reconf_timer, sctp_generate_reconf_event, 94 sctp_generate_proto_unreach_event, 0);
95 (unsigned long)peer);
96 setup_timer(&peer->proto_unreach_timer,
97 sctp_generate_proto_unreach_event, (unsigned long)peer);
98 95
99 /* Initialize the 64-bit random nonce sent with heartbeat. */ 96 /* Initialize the 64-bit random nonce sent with heartbeat. */
100 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); 97 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 745f145d4c4d..6451c5013e06 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -390,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc)
390 int rc = 0; 390 int rc = 0;
391 u8 ibport; 391 u8 ibport;
392 392
393 if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
394 /* peer has not signalled SMC-capability */
395 smc->use_fallback = true;
396 goto out_connected;
397 }
398
393 /* IPSec connections opt out of SMC-R optimizations */ 399 /* IPSec connections opt out of SMC-R optimizations */
394 if (using_ipsec(smc)) { 400 if (using_ipsec(smc)) {
395 reason_code = SMC_CLC_DECL_IPSEC; 401 reason_code = SMC_CLC_DECL_IPSEC;
@@ -555,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
555 } 561 }
556 562
557 smc_copy_sock_settings_to_clc(smc); 563 smc_copy_sock_settings_to_clc(smc);
564 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
558 rc = kernel_connect(smc->clcsock, addr, alen, flags); 565 rc = kernel_connect(smc->clcsock, addr, alen, flags);
559 if (rc) 566 if (rc)
560 goto out; 567 goto out;
@@ -759,6 +766,12 @@ static void smc_listen_work(struct work_struct *work)
759 u8 prefix_len; 766 u8 prefix_len;
760 u8 ibport; 767 u8 ibport;
761 768
769 /* check if peer is smc capable */
770 if (!tcp_sk(newclcsock->sk)->syn_smc) {
771 new_smc->use_fallback = true;
772 goto out_connected;
773 }
774
762 /* do inband token exchange - 775 /* do inband token exchange -
763 *wait for and receive SMC Proposal CLC message 776 *wait for and receive SMC Proposal CLC message
764 */ 777 */
@@ -808,7 +821,7 @@ static void smc_listen_work(struct work_struct *work)
808 rc = local_contact; 821 rc = local_contact;
809 if (rc == -ENOMEM) 822 if (rc == -ENOMEM)
810 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 823 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
811 goto decline_rdma; 824 goto decline_rdma_unlock;
812 } 825 }
813 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 826 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
814 827
@@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work)
816 rc = smc_buf_create(new_smc); 829 rc = smc_buf_create(new_smc);
817 if (rc) { 830 if (rc) {
818 reason_code = SMC_CLC_DECL_MEM; 831 reason_code = SMC_CLC_DECL_MEM;
819 goto decline_rdma; 832 goto decline_rdma_unlock;
820 } 833 }
821 834
822 smc_close_init(new_smc); 835 smc_close_init(new_smc);
@@ -831,7 +844,7 @@ static void smc_listen_work(struct work_struct *work)
831 buf_desc->mr_rx[SMC_SINGLE_LINK]); 844 buf_desc->mr_rx[SMC_SINGLE_LINK]);
832 if (rc) { 845 if (rc) {
833 reason_code = SMC_CLC_DECL_INTERR; 846 reason_code = SMC_CLC_DECL_INTERR;
834 goto decline_rdma; 847 goto decline_rdma_unlock;
835 } 848 }
836 } 849 }
837 } 850 }
@@ -839,15 +852,15 @@ static void smc_listen_work(struct work_struct *work)
839 852
840 rc = smc_clc_send_accept(new_smc, local_contact); 853 rc = smc_clc_send_accept(new_smc, local_contact);
841 if (rc) 854 if (rc)
842 goto out_err; 855 goto out_err_unlock;
843 856
844 /* receive SMC Confirm CLC message */ 857 /* receive SMC Confirm CLC message */
845 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 858 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
846 SMC_CLC_CONFIRM); 859 SMC_CLC_CONFIRM);
847 if (reason_code < 0) 860 if (reason_code < 0)
848 goto out_err; 861 goto out_err_unlock;
849 if (reason_code > 0) 862 if (reason_code > 0)
850 goto decline_rdma; 863 goto decline_rdma_unlock;
851 smc_conn_save_peer_info(new_smc, &cclc); 864 smc_conn_save_peer_info(new_smc, &cclc);
852 if (local_contact == SMC_FIRST_CONTACT) 865 if (local_contact == SMC_FIRST_CONTACT)
853 smc_link_save_peer_info(link, &cclc); 866 smc_link_save_peer_info(link, &cclc);
@@ -855,34 +868,34 @@ static void smc_listen_work(struct work_struct *work)
855 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); 868 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
856 if (rc) { 869 if (rc) {
857 reason_code = SMC_CLC_DECL_INTERR; 870 reason_code = SMC_CLC_DECL_INTERR;
858 goto decline_rdma; 871 goto decline_rdma_unlock;
859 } 872 }
860 873
861 if (local_contact == SMC_FIRST_CONTACT) { 874 if (local_contact == SMC_FIRST_CONTACT) {
862 rc = smc_ib_ready_link(link); 875 rc = smc_ib_ready_link(link);
863 if (rc) { 876 if (rc) {
864 reason_code = SMC_CLC_DECL_INTERR; 877 reason_code = SMC_CLC_DECL_INTERR;
865 goto decline_rdma; 878 goto decline_rdma_unlock;
866 } 879 }
867 /* QP confirmation over RoCE fabric */ 880 /* QP confirmation over RoCE fabric */
868 reason_code = smc_serv_conf_first_link(new_smc); 881 reason_code = smc_serv_conf_first_link(new_smc);
869 if (reason_code < 0) { 882 if (reason_code < 0) {
870 /* peer is not aware of a problem */ 883 /* peer is not aware of a problem */
871 rc = reason_code; 884 rc = reason_code;
872 goto out_err; 885 goto out_err_unlock;
873 } 886 }
874 if (reason_code > 0) 887 if (reason_code > 0)
875 goto decline_rdma; 888 goto decline_rdma_unlock;
876 } 889 }
877 890
878 smc_tx_init(new_smc); 891 smc_tx_init(new_smc);
892 mutex_unlock(&smc_create_lgr_pending);
879 893
880out_connected: 894out_connected:
881 sk_refcnt_debug_inc(newsmcsk); 895 sk_refcnt_debug_inc(newsmcsk);
882 if (newsmcsk->sk_state == SMC_INIT) 896 if (newsmcsk->sk_state == SMC_INIT)
883 newsmcsk->sk_state = SMC_ACTIVE; 897 newsmcsk->sk_state = SMC_ACTIVE;
884enqueue: 898enqueue:
885 mutex_unlock(&smc_create_lgr_pending);
886 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 899 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
887 if (lsmc->sk.sk_state == SMC_LISTEN) { 900 if (lsmc->sk.sk_state == SMC_LISTEN) {
888 smc_accept_enqueue(&lsmc->sk, newsmcsk); 901 smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -896,6 +909,8 @@ enqueue:
896 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 909 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
897 return; 910 return;
898 911
912decline_rdma_unlock:
913 mutex_unlock(&smc_create_lgr_pending);
899decline_rdma: 914decline_rdma:
900 /* RDMA setup failed, switch back to TCP */ 915 /* RDMA setup failed, switch back to TCP */
901 smc_conn_free(&new_smc->conn); 916 smc_conn_free(&new_smc->conn);
@@ -907,6 +922,8 @@ decline_rdma:
907 } 922 }
908 goto out_connected; 923 goto out_connected;
909 924
925out_err_unlock:
926 mutex_unlock(&smc_create_lgr_pending);
910out_err: 927out_err:
911 newsmcsk->sk_state = SMC_CLOSED; 928 newsmcsk->sk_state = SMC_CLOSED;
912 smc_conn_free(&new_smc->conn); 929 smc_conn_free(&new_smc->conn);
@@ -963,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog)
963 * them to the clc socket -- copy smc socket options to clc socket 980 * them to the clc socket -- copy smc socket options to clc socket
964 */ 981 */
965 smc_copy_sock_settings_to_clc(smc); 982 smc_copy_sock_settings_to_clc(smc);
983 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
966 984
967 rc = kernel_listen(smc->clcsock, backlog); 985 rc = kernel_listen(smc->clcsock, backlog);
968 if (rc) 986 if (rc)
@@ -1405,6 +1423,7 @@ static int __init smc_init(void)
1405 goto out_sock; 1423 goto out_sock;
1406 } 1424 }
1407 1425
1426 static_branch_enable(&tcp_have_smc);
1408 return 0; 1427 return 0;
1409 1428
1410out_sock: 1429out_sock:
@@ -1429,6 +1448,7 @@ static void __exit smc_exit(void)
1429 list_del_init(&lgr->list); 1448 list_del_init(&lgr->list);
1430 smc_lgr_free(lgr); /* free link group */ 1449 smc_lgr_free(lgr); /* free link group */
1431 } 1450 }
1451 static_branch_disable(&tcp_have_smc);
1432 smc_ib_unregister_client(); 1452 smc_ib_unregister_client();
1433 sock_unregister(PF_SMC); 1453 sock_unregister(PF_SMC);
1434 proto_unregister(&smc_proto); 1454 proto_unregister(&smc_proto);
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 535e72cfc64b..87f7bede6eab 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -63,10 +63,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
63 bh_unlock_sock(&smc->sk); 63 bh_unlock_sock(&smc->sk);
64} 64}
65 65
66int smc_cdc_get_free_slot(struct smc_link *link, 66int smc_cdc_get_free_slot(struct smc_connection *conn,
67 struct smc_wr_buf **wr_buf, 67 struct smc_wr_buf **wr_buf,
68 struct smc_cdc_tx_pend **pend) 68 struct smc_cdc_tx_pend **pend)
69{ 69{
70 struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
71
70 return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, 72 return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
71 (struct smc_wr_tx_pend_priv **)pend); 73 (struct smc_wr_tx_pend_priv **)pend);
72} 74}
@@ -119,8 +121,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
119 struct smc_wr_buf *wr_buf; 121 struct smc_wr_buf *wr_buf;
120 int rc; 122 int rc;
121 123
122 rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, 124 rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
123 &pend);
124 if (rc) 125 if (rc)
125 return rc; 126 return rc;
126 127
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index bea61dadaf36..149ceda1b088 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -207,7 +207,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
207 207
208struct smc_cdc_tx_pend; 208struct smc_cdc_tx_pend;
209 209
210int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf, 210int smc_cdc_get_free_slot(struct smc_connection *conn,
211 struct smc_wr_buf **wr_buf,
211 struct smc_cdc_tx_pend **pend); 212 struct smc_cdc_tx_pend **pend);
212void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); 213void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
213int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, 214int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 2427a1f3d0d1..48615d2ac4aa 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -361,7 +361,8 @@ static void smc_close_passive_work(struct work_struct *work)
361 case SMC_PEERCLOSEWAIT1: 361 case SMC_PEERCLOSEWAIT1:
362 if (rxflags->peer_done_writing) 362 if (rxflags->peer_done_writing)
363 sk->sk_state = SMC_PEERCLOSEWAIT2; 363 sk->sk_state = SMC_PEERCLOSEWAIT2;
364 /* fall through to check for closing */ 364 /* fall through */
365 /* to check for closing */
365 case SMC_PEERCLOSEWAIT2: 366 case SMC_PEERCLOSEWAIT2:
366 case SMC_PEERFINCLOSEWAIT: 367 case SMC_PEERFINCLOSEWAIT:
367 if (!smc_cdc_rxed_any_close(&smc->conn)) 368 if (!smc_cdc_rxed_any_close(&smc->conn))
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 413e3868fbf3..2578fbd95664 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -381,10 +381,14 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
381 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, 381 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
382 &gattr)) 382 &gattr))
383 continue; 383 continue;
384 if (gattr.ndev && 384 if (gattr.ndev) {
385 (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) { 385 if (is_vlan_dev(gattr.ndev) &&
386 lnk->gid = gid; 386 vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
387 return 0; 387 lnk->gid = gid;
388 dev_put(gattr.ndev);
389 return 0;
390 }
391 dev_put(gattr.ndev);
388 } 392 }
389 } 393 }
390 return -ENODEV; 394 return -ENODEV;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9033b8a36fe1..90f1a7f9085c 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -370,26 +370,17 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
370 370
371static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) 371static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
372{ 372{
373 struct net_device *ndev; 373 struct ib_gid_attr gattr;
374 int rc; 374 int rc;
375 375
376 rc = ib_query_gid(smcibdev->ibdev, ibport, 0, 376 rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
377 &smcibdev->gid[ibport - 1], NULL); 377 &smcibdev->gid[ibport - 1], &gattr);
378 /* the SMC protocol requires specification of the roce MAC address; 378 if (rc || !gattr.ndev)
379 * if net_device cannot be determined, it can be derived from gid 0 379 return -ENODEV;
380 */ 380
381 ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport); 381 memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
382 if (ndev) { 382 dev_put(gattr.ndev);
383 memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN); 383 return 0;
384 dev_put(ndev);
385 } else if (!rc) {
386 memcpy(&smcibdev->mac[ibport - 1][0],
387 &smcibdev->gid[ibport - 1].raw[8], 3);
388 memcpy(&smcibdev->mac[ibport - 1][3],
389 &smcibdev->gid[ibport - 1].raw[13], 3);
390 smcibdev->mac[ibport - 1][0] &= ~0x02;
391 }
392 return rc;
393} 384}
394 385
395/* Create an identifier unique for this instance of SMC-R. 386/* Create an identifier unique for this instance of SMC-R.
@@ -420,6 +411,7 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
420 &smcibdev->pattr[ibport - 1]); 411 &smcibdev->pattr[ibport - 1]);
421 if (rc) 412 if (rc)
422 goto out; 413 goto out;
414 /* the SMC protocol requires specification of the RoCE MAC address */
423 rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); 415 rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
424 if (rc) 416 if (rc)
425 goto out; 417 goto out;
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 710ab3fbf607..c48dc2d5fd3a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -397,8 +397,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
397 int rc; 397 int rc;
398 398
399 spin_lock_bh(&conn->send_lock); 399 spin_lock_bh(&conn->send_lock);
400 rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, 400 rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
401 &pend);
402 if (rc < 0) { 401 if (rc < 0) {
403 if (rc == -EBUSY) { 402 if (rc == -EBUSY) {
404 struct smc_sock *smc = 403 struct smc_sock *smc =
@@ -467,8 +466,7 @@ void smc_tx_consumer_update(struct smc_connection *conn)
467 ((to_confirm > conn->rmbe_update_limit) && 466 ((to_confirm > conn->rmbe_update_limit) &&
468 ((to_confirm > (conn->rmbe_size / 2)) || 467 ((to_confirm > (conn->rmbe_size / 2)) ||
469 conn->local_rx_ctrl.prod_flags.write_blocked))) { 468 conn->local_rx_ctrl.prod_flags.write_blocked))) {
470 rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], 469 rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
471 &wr_buf, &pend);
472 if (!rc) 470 if (!rc)
473 rc = smc_cdc_msg_send(conn, wr_buf, pend); 471 rc = smc_cdc_msg_send(conn, wr_buf, pend);
474 if (rc < 0) { 472 if (rc < 0) {
diff --git a/net/socket.c b/net/socket.c
index c729625eb5d3..42d8e9c9ccd5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -568,7 +568,6 @@ struct socket *sock_alloc(void)
568 568
569 sock = SOCKET_I(inode); 569 sock = SOCKET_I(inode);
570 570
571 kmemcheck_annotate_bitfield(sock, type);
572 inode->i_ino = get_next_ino(); 571 inode->i_ino = get_next_ino();
573 inode->i_mode = S_IFSOCK | S_IRWXUGO; 572 inode->i_mode = S_IFSOCK | S_IRWXUGO;
574 inode->i_uid = current_fsuid(); 573 inode->i_uid = current_fsuid();
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 7b1ee5a0b03c..73165e9ca5bf 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
855 return stat; 855 return stat;
856 if (integ_len > buf->len) 856 if (integ_len > buf->len)
857 return stat; 857 return stat;
858 if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) 858 if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
859 BUG(); 859 WARN_ON_ONCE(1);
860 return stat;
861 }
860 /* copy out mic... */ 862 /* copy out mic... */
861 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) 863 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
862 BUG(); 864 return stat;
863 if (mic.len > RPC_MAX_AUTH_SIZE) 865 if (mic.len > RPC_MAX_AUTH_SIZE)
864 return stat; 866 return stat;
865 mic.data = kmalloc(mic.len, GFP_KERNEL); 867 mic.data = kmalloc(mic.len, GFP_KERNEL);
@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
1611 BUG_ON(integ_len % 4); 1613 BUG_ON(integ_len % 4);
1612 *p++ = htonl(integ_len); 1614 *p++ = htonl(integ_len);
1613 *p++ = htonl(gc->gc_seq); 1615 *p++ = htonl(gc->gc_seq);
1614 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) 1616 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
1615 BUG(); 1617 WARN_ON_ONCE(1);
1618 goto out_err;
1619 }
1616 if (resbuf->tail[0].iov_base == NULL) { 1620 if (resbuf->tail[0].iov_base == NULL) {
1617 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) 1621 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
1618 goto out_err; 1622 goto out_err;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2ad827db2704..a801da812f86 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task)
1491} 1491}
1492EXPORT_SYMBOL_GPL(rpc_restart_call); 1492EXPORT_SYMBOL_GPL(rpc_restart_call);
1493 1493
1494#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
1495const char 1494const char
1496*rpc_proc_name(const struct rpc_task *task) 1495*rpc_proc_name(const struct rpc_task *task)
1497{ 1496{
@@ -1505,7 +1504,6 @@ const char
1505 } else 1504 } else
1506 return "no proc"; 1505 return "no proc";
1507} 1506}
1508#endif
1509 1507
1510/* 1508/*
1511 * 0. Initial state 1509 * 0. Initial state
@@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task)
1519 struct rpc_clnt *clnt = task->tk_client; 1517 struct rpc_clnt *clnt = task->tk_client;
1520 int idx = task->tk_msg.rpc_proc->p_statidx; 1518 int idx = task->tk_msg.rpc_proc->p_statidx;
1521 1519
1520 trace_rpc_request(task);
1522 dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, 1521 dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
1523 clnt->cl_program->name, clnt->cl_vers, 1522 clnt->cl_program->name, clnt->cl_vers,
1524 rpc_proc_name(task), 1523 rpc_proc_name(task),
@@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task)
1586 switch (status) { 1585 switch (status) {
1587 case -ENOMEM: 1586 case -ENOMEM:
1588 rpc_delay(task, HZ >> 2); 1587 rpc_delay(task, HZ >> 2);
1588 /* fall through */
1589 case -EAGAIN: /* woken up; retry */ 1589 case -EAGAIN: /* woken up; retry */
1590 task->tk_action = call_retry_reserve; 1590 task->tk_action = call_retry_reserve;
1591 return; 1591 return;
@@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task)
1647 /* Use rate-limiting and a max number of retries if refresh 1647 /* Use rate-limiting and a max number of retries if refresh
1648 * had status 0 but failed to update the cred. 1648 * had status 0 but failed to update the cred.
1649 */ 1649 */
1650 /* fall through */
1650 case -ETIMEDOUT: 1651 case -ETIMEDOUT:
1651 rpc_delay(task, 3*HZ); 1652 rpc_delay(task, 3*HZ);
1653 /* fall through */
1652 case -EAGAIN: 1654 case -EAGAIN:
1653 status = -EACCES; 1655 status = -EACCES;
1656 /* fall through */
1654 case -EKEYEXPIRED: 1657 case -EKEYEXPIRED:
1655 if (!task->tk_cred_retry) 1658 if (!task->tk_cred_retry)
1656 break; 1659 break;
@@ -1911,6 +1914,7 @@ call_connect_status(struct rpc_task *task)
1911 task->tk_action = call_bind; 1914 task->tk_action = call_bind;
1912 return; 1915 return;
1913 } 1916 }
1917 /* fall through */
1914 case -ECONNRESET: 1918 case -ECONNRESET:
1915 case -ECONNABORTED: 1919 case -ECONNABORTED:
1916 case -ENETUNREACH: 1920 case -ENETUNREACH:
@@ -1924,6 +1928,7 @@ call_connect_status(struct rpc_task *task)
1924 break; 1928 break;
1925 /* retry with existing socket, after a delay */ 1929 /* retry with existing socket, after a delay */
1926 rpc_delay(task, 3*HZ); 1930 rpc_delay(task, 3*HZ);
1931 /* fall through */
1927 case -EAGAIN: 1932 case -EAGAIN:
1928 /* Check for timeouts before looping back to call_bind */ 1933 /* Check for timeouts before looping back to call_bind */
1929 case -ETIMEDOUT: 1934 case -ETIMEDOUT:
@@ -2025,6 +2030,7 @@ call_transmit_status(struct rpc_task *task)
2025 rpc_exit(task, task->tk_status); 2030 rpc_exit(task, task->tk_status);
2026 break; 2031 break;
2027 } 2032 }
2033 /* fall through */
2028 case -ECONNRESET: 2034 case -ECONNRESET:
2029 case -ECONNABORTED: 2035 case -ECONNABORTED:
2030 case -EADDRINUSE: 2036 case -EADDRINUSE:
@@ -2145,6 +2151,7 @@ call_status(struct rpc_task *task)
2145 * were a timeout. 2151 * were a timeout.
2146 */ 2152 */
2147 rpc_delay(task, 3*HZ); 2153 rpc_delay(task, 3*HZ);
2154 /* fall through */
2148 case -ETIMEDOUT: 2155 case -ETIMEDOUT:
2149 task->tk_action = call_timeout; 2156 task->tk_action = call_timeout;
2150 break; 2157 break;
@@ -2152,14 +2159,17 @@ call_status(struct rpc_task *task)
2152 case -ECONNRESET: 2159 case -ECONNRESET:
2153 case -ECONNABORTED: 2160 case -ECONNABORTED:
2154 rpc_force_rebind(clnt); 2161 rpc_force_rebind(clnt);
2162 /* fall through */
2155 case -EADDRINUSE: 2163 case -EADDRINUSE:
2156 rpc_delay(task, 3*HZ); 2164 rpc_delay(task, 3*HZ);
2165 /* fall through */
2157 case -EPIPE: 2166 case -EPIPE:
2158 case -ENOTCONN: 2167 case -ENOTCONN:
2159 task->tk_action = call_bind; 2168 task->tk_action = call_bind;
2160 break; 2169 break;
2161 case -ENOBUFS: 2170 case -ENOBUFS:
2162 rpc_delay(task, HZ>>2); 2171 rpc_delay(task, HZ>>2);
2172 /* fall through */
2163 case -EAGAIN: 2173 case -EAGAIN:
2164 task->tk_action = call_transmit; 2174 task->tk_action = call_transmit;
2165 break; 2175 break;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 61a504fb1ae2..7803f3b6aa53 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1410 return PTR_ERR(gssd_dentry); 1410 return PTR_ERR(gssd_dentry);
1411 } 1411 }
1412 1412
1413 dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", 1413 dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
1414 net, NET_NAME(net)); 1414 net->ns.inum, NET_NAME(net));
1415 mutex_lock(&sn->pipefs_sb_lock); 1415 mutex_lock(&sn->pipefs_sb_lock);
1416 sn->pipefs_sb = sb; 1416 sn->pipefs_sb = sb;
1417 err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, 1417 err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
@@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb)
1462 goto out; 1462 goto out;
1463 } 1463 }
1464 sn->pipefs_sb = NULL; 1464 sn->pipefs_sb = NULL;
1465 dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", 1465 dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n",
1466 net, NET_NAME(net)); 1466 net->ns.inum, NET_NAME(net));
1467 blocking_notifier_call_chain(&rpc_pipefs_notifier_list, 1467 blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
1468 RPC_PIPEFS_UMOUNT, 1468 RPC_PIPEFS_UMOUNT,
1469 sb); 1469 sb);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index ea0676f199c8..c526f8fb37c9 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
216 smp_wmb(); 216 smp_wmb();
217 sn->rpcb_users = 1; 217 sn->rpcb_users = 1;
218 dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " 218 dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
219 "%p, rpcb_local_clnt4: %p) for net %p%s\n", 219 "%p, rpcb_local_clnt4: %p) for net %x%s\n",
220 sn->rpcb_local_clnt, sn->rpcb_local_clnt4, 220 sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
221 net, (net == &init_net) ? " (init_net)" : ""); 221 net->ns.inum, (net == &init_net) ? " (init_net)" : "");
222} 222}
223 223
224/* 224/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 0cc83839c13c..b1b49edd7c4d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -44,7 +44,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
44 44
45static void rpc_async_schedule(struct work_struct *); 45static void rpc_async_schedule(struct work_struct *);
46static void rpc_release_task(struct rpc_task *task); 46static void rpc_release_task(struct rpc_task *task);
47static void __rpc_queue_timer_fn(unsigned long ptr); 47static void __rpc_queue_timer_fn(struct timer_list *t);
48 48
49/* 49/*
50 * RPC tasks sit here while waiting for conditions to improve. 50 * RPC tasks sit here while waiting for conditions to improve.
@@ -228,7 +228,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
228 queue->maxpriority = nr_queues - 1; 228 queue->maxpriority = nr_queues - 1;
229 rpc_reset_waitqueue_priority(queue); 229 rpc_reset_waitqueue_priority(queue);
230 queue->qlen = 0; 230 queue->qlen = 0;
231 setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); 231 timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
232 INIT_LIST_HEAD(&queue->timer_list.list); 232 INIT_LIST_HEAD(&queue->timer_list.list);
233 rpc_assign_waitqueue_name(queue, qname); 233 rpc_assign_waitqueue_name(queue, qname);
234} 234}
@@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
274 274
275static void rpc_set_active(struct rpc_task *task) 275static void rpc_set_active(struct rpc_task *task)
276{ 276{
277 trace_rpc_task_begin(task->tk_client, task, NULL);
278
279 rpc_task_set_debuginfo(task); 277 rpc_task_set_debuginfo(task);
280 set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 278 set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
279 trace_rpc_task_begin(task->tk_client, task, NULL);
281} 280}
282 281
283/* 282/*
@@ -635,9 +634,9 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
635} 634}
636EXPORT_SYMBOL_GPL(rpc_wake_up_status); 635EXPORT_SYMBOL_GPL(rpc_wake_up_status);
637 636
638static void __rpc_queue_timer_fn(unsigned long ptr) 637static void __rpc_queue_timer_fn(struct timer_list *t)
639{ 638{
640 struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; 639 struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
641 struct rpc_task *task, *n; 640 struct rpc_task *task, *n;
642 unsigned long expires, now, timeo; 641 unsigned long expires, now, timeo;
643 642
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c73de181467a..56f9eff74150 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -65,10 +65,13 @@ err_proc:
65 65
66static __net_exit void sunrpc_exit_net(struct net *net) 66static __net_exit void sunrpc_exit_net(struct net *net)
67{ 67{
68 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
69
68 rpc_pipefs_exit_net(net); 70 rpc_pipefs_exit_net(net);
69 unix_gid_cache_destroy(net); 71 unix_gid_cache_destroy(net);
70 ip_map_cache_destroy(net); 72 ip_map_cache_destroy(net);
71 rpc_proc_exit(net); 73 rpc_proc_exit(net);
74 WARN_ON_ONCE(!list_empty(&sn->all_clients));
72} 75}
73 76
74static struct pernet_operations sunrpc_net_ops = { 77static struct pernet_operations sunrpc_net_ops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aa04666f929d..387cc4add6f6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map);
50static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ 50static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
51 51
52static int 52static int
53param_set_pool_mode(const char *val, struct kernel_param *kp) 53param_set_pool_mode(const char *val, const struct kernel_param *kp)
54{ 54{
55 int *ip = (int *)kp->arg; 55 int *ip = (int *)kp->arg;
56 struct svc_pool_map *m = &svc_pool_map; 56 struct svc_pool_map *m = &svc_pool_map;
@@ -80,7 +80,7 @@ out:
80} 80}
81 81
82static int 82static int
83param_get_pool_mode(char *buf, struct kernel_param *kp) 83param_get_pool_mode(char *buf, const struct kernel_param *kp)
84{ 84{
85 int *ip = (int *)kp->arg; 85 int *ip = (int *)kp->arg;
86 86
@@ -455,7 +455,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
455 serv->sv_xdrsize = xdrsize; 455 serv->sv_xdrsize = xdrsize;
456 INIT_LIST_HEAD(&serv->sv_tempsocks); 456 INIT_LIST_HEAD(&serv->sv_tempsocks);
457 INIT_LIST_HEAD(&serv->sv_permsocks); 457 INIT_LIST_HEAD(&serv->sv_permsocks);
458 init_timer(&serv->sv_temptimer); 458 timer_setup(&serv->sv_temptimer, NULL, 0);
459 spin_lock_init(&serv->sv_lock); 459 spin_lock_init(&serv->sv_lock);
460 460
461 __svc_init_bc(serv); 461 __svc_init_bc(serv);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d16a8b423c20..f9307bd6644b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -28,7 +28,7 @@ module_param(svc_rpc_per_connection_limit, uint, 0644);
28static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); 28static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
29static int svc_deferred_recv(struct svc_rqst *rqstp); 29static int svc_deferred_recv(struct svc_rqst *rqstp);
30static struct cache_deferred_req *svc_defer(struct cache_req *req); 30static struct cache_deferred_req *svc_defer(struct cache_req *req);
31static void svc_age_temp_xprts(unsigned long closure); 31static void svc_age_temp_xprts(struct timer_list *t);
32static void svc_delete_xprt(struct svc_xprt *xprt); 32static void svc_delete_xprt(struct svc_xprt *xprt);
33 33
34/* apparently the "standard" is that clients close 34/* apparently the "standard" is that clients close
@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
250 svc_xprt_received(new); 250 svc_xprt_received(new);
251} 251}
252 252
253int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 253static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
254 struct net *net, const int family, 254 struct net *net, const int family,
255 const unsigned short port, int flags) 255 const unsigned short port, int flags)
256{ 256{
257 struct svc_xprt_class *xcl; 257 struct svc_xprt_class *xcl;
258 258
@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
380 struct svc_pool *pool; 380 struct svc_pool *pool;
381 struct svc_rqst *rqstp = NULL; 381 struct svc_rqst *rqstp = NULL;
382 int cpu; 382 int cpu;
383 bool queued = false;
384 383
385 if (!svc_xprt_has_something_to_do(xprt)) 384 if (!svc_xprt_has_something_to_do(xprt))
386 goto out; 385 goto out;
@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
401 400
402 atomic_long_inc(&pool->sp_stats.packets); 401 atomic_long_inc(&pool->sp_stats.packets);
403 402
404redo_search: 403 dprintk("svc: transport %p put into queue\n", xprt);
404 spin_lock_bh(&pool->sp_lock);
405 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
406 pool->sp_stats.sockets_queued++;
407 spin_unlock_bh(&pool->sp_lock);
408
405 /* find a thread for this xprt */ 409 /* find a thread for this xprt */
406 rcu_read_lock(); 410 rcu_read_lock();
407 list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { 411 list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
408 /* Do a lockless check first */ 412 if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
409 if (test_bit(RQ_BUSY, &rqstp->rq_flags))
410 continue; 413 continue;
411
412 /*
413 * Once the xprt has been queued, it can only be dequeued by
414 * the task that intends to service it. All we can do at that
415 * point is to try to wake this thread back up so that it can
416 * do so.
417 */
418 if (!queued) {
419 spin_lock_bh(&rqstp->rq_lock);
420 if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
421 /* already busy, move on... */
422 spin_unlock_bh(&rqstp->rq_lock);
423 continue;
424 }
425
426 /* this one will do */
427 rqstp->rq_xprt = xprt;
428 svc_xprt_get(xprt);
429 spin_unlock_bh(&rqstp->rq_lock);
430 }
431 rcu_read_unlock();
432
433 atomic_long_inc(&pool->sp_stats.threads_woken); 414 atomic_long_inc(&pool->sp_stats.threads_woken);
434 wake_up_process(rqstp->rq_task); 415 wake_up_process(rqstp->rq_task);
435 put_cpu(); 416 goto out_unlock;
436 goto out;
437 }
438 rcu_read_unlock();
439
440 /*
441 * We didn't find an idle thread to use, so we need to queue the xprt.
442 * Do so and then search again. If we find one, we can't hook this one
443 * up to it directly but we can wake the thread up in the hopes that it
444 * will pick it up once it searches for a xprt to service.
445 */
446 if (!queued) {
447 queued = true;
448 dprintk("svc: transport %p put into queue\n", xprt);
449 spin_lock_bh(&pool->sp_lock);
450 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
451 pool->sp_stats.sockets_queued++;
452 spin_unlock_bh(&pool->sp_lock);
453 goto redo_search;
454 } 417 }
418 set_bit(SP_CONGESTED, &pool->sp_flags);
455 rqstp = NULL; 419 rqstp = NULL;
420out_unlock:
421 rcu_read_unlock();
456 put_cpu(); 422 put_cpu();
457out: 423out:
458 trace_svc_xprt_do_enqueue(xprt, rqstp); 424 trace_svc_xprt_do_enqueue(xprt, rqstp);
@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
721 687
722static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) 688static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
723{ 689{
724 struct svc_xprt *xprt;
725 struct svc_pool *pool = rqstp->rq_pool; 690 struct svc_pool *pool = rqstp->rq_pool;
726 long time_left = 0; 691 long time_left = 0;
727 692
728 /* rq_xprt should be clear on entry */ 693 /* rq_xprt should be clear on entry */
729 WARN_ON_ONCE(rqstp->rq_xprt); 694 WARN_ON_ONCE(rqstp->rq_xprt);
730 695
731 /* Normally we will wait up to 5 seconds for any required 696 rqstp->rq_xprt = svc_xprt_dequeue(pool);
732 * cache information to be provided. 697 if (rqstp->rq_xprt)
733 */ 698 goto out_found;
734 rqstp->rq_chandle.thread_wait = 5*HZ;
735
736 xprt = svc_xprt_dequeue(pool);
737 if (xprt) {
738 rqstp->rq_xprt = xprt;
739
740 /* As there is a shortage of threads and this request
741 * had to be queued, don't allow the thread to wait so
742 * long for cache updates.
743 */
744 rqstp->rq_chandle.thread_wait = 1*HZ;
745 clear_bit(SP_TASK_PENDING, &pool->sp_flags);
746 return xprt;
747 }
748 699
749 /* 700 /*
750 * We have to be able to interrupt this wait 701 * We have to be able to interrupt this wait
751 * to bring down the daemons ... 702 * to bring down the daemons ...
752 */ 703 */
753 set_current_state(TASK_INTERRUPTIBLE); 704 set_current_state(TASK_INTERRUPTIBLE);
705 smp_mb__before_atomic();
706 clear_bit(SP_CONGESTED, &pool->sp_flags);
754 clear_bit(RQ_BUSY, &rqstp->rq_flags); 707 clear_bit(RQ_BUSY, &rqstp->rq_flags);
755 smp_mb(); 708 smp_mb__after_atomic();
756 709
757 if (likely(rqst_should_sleep(rqstp))) 710 if (likely(rqst_should_sleep(rqstp)))
758 time_left = schedule_timeout(timeout); 711 time_left = schedule_timeout(timeout);
@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
761 714
762 try_to_freeze(); 715 try_to_freeze();
763 716
764 spin_lock_bh(&rqstp->rq_lock);
765 set_bit(RQ_BUSY, &rqstp->rq_flags); 717 set_bit(RQ_BUSY, &rqstp->rq_flags);
766 spin_unlock_bh(&rqstp->rq_lock); 718 smp_mb__after_atomic();
767 719 rqstp->rq_xprt = svc_xprt_dequeue(pool);
768 xprt = rqstp->rq_xprt; 720 if (rqstp->rq_xprt)
769 if (xprt != NULL) 721 goto out_found;
770 return xprt;
771 722
772 if (!time_left) 723 if (!time_left)
773 atomic_long_inc(&pool->sp_stats.threads_timedout); 724 atomic_long_inc(&pool->sp_stats.threads_timedout);
@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
775 if (signalled() || kthread_should_stop()) 726 if (signalled() || kthread_should_stop())
776 return ERR_PTR(-EINTR); 727 return ERR_PTR(-EINTR);
777 return ERR_PTR(-EAGAIN); 728 return ERR_PTR(-EAGAIN);
729out_found:
730 /* Normally we will wait up to 5 seconds for any required
731 * cache information to be provided.
732 */
733 if (!test_bit(SP_CONGESTED, &pool->sp_flags))
734 rqstp->rq_chandle.thread_wait = 5*HZ;
735 else
736 rqstp->rq_chandle.thread_wait = 1*HZ;
737 return rqstp->rq_xprt;
778} 738}
779 739
780static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) 740static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -785,8 +745,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt
785 serv->sv_tmpcnt++; 745 serv->sv_tmpcnt++;
786 if (serv->sv_temptimer.function == NULL) { 746 if (serv->sv_temptimer.function == NULL) {
787 /* setup timer to age temp transports */ 747 /* setup timer to age temp transports */
788 setup_timer(&serv->sv_temptimer, svc_age_temp_xprts, 748 serv->sv_temptimer.function = svc_age_temp_xprts;
789 (unsigned long)serv);
790 mod_timer(&serv->sv_temptimer, 749 mod_timer(&serv->sv_temptimer,
791 jiffies + svc_conn_age_period * HZ); 750 jiffies + svc_conn_age_period * HZ);
792 } 751 }
@@ -960,9 +919,9 @@ out:
960 * Timer function to close old temporary transports, using 919 * Timer function to close old temporary transports, using
961 * a mark-and-sweep algorithm. 920 * a mark-and-sweep algorithm.
962 */ 921 */
963static void svc_age_temp_xprts(unsigned long closure) 922static void svc_age_temp_xprts(struct timer_list *t)
964{ 923{
965 struct svc_serv *serv = (struct svc_serv *)closure; 924 struct svc_serv *serv = from_timer(serv, t, sv_temptimer);
966 struct svc_xprt *xprt; 925 struct svc_xprt *xprt;
967 struct list_head *le, *next; 926 struct list_head *le, *next;
968 927
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 898485e3ece4..333b9d697ae5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -696,9 +696,9 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
696} 696}
697 697
698static void 698static void
699xprt_init_autodisconnect(unsigned long data) 699xprt_init_autodisconnect(struct timer_list *t)
700{ 700{
701 struct rpc_xprt *xprt = (struct rpc_xprt *)data; 701 struct rpc_xprt *xprt = from_timer(xprt, t, timer);
702 702
703 spin_lock(&xprt->transport_lock); 703 spin_lock(&xprt->transport_lock);
704 if (!list_empty(&xprt->recv)) 704 if (!list_empty(&xprt->recv))
@@ -1139,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
1139 case -EAGAIN: 1139 case -EAGAIN:
1140 xprt_add_backlog(xprt, task); 1140 xprt_add_backlog(xprt, task);
1141 dprintk("RPC: waiting for request slot\n"); 1141 dprintk("RPC: waiting for request slot\n");
1142 /* fall through */
1142 default: 1143 default:
1143 task->tk_status = -EAGAIN; 1144 task->tk_status = -EAGAIN;
1144 } 1145 }
@@ -1422,10 +1423,9 @@ found:
1422 xprt->idle_timeout = 0; 1423 xprt->idle_timeout = 0;
1423 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1424 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1424 if (xprt_has_timer(xprt)) 1425 if (xprt_has_timer(xprt))
1425 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1426 timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
1426 (unsigned long)xprt);
1427 else 1427 else
1428 init_timer(&xprt->timer); 1428 timer_setup(&xprt->timer, NULL, 0);
1429 1429
1430 if (strlen(args->servername) > RPC_MAXNETNAMELEN) { 1430 if (strlen(args->servername) > RPC_MAXNETNAMELEN) {
1431 xprt_destroy(xprt); 1431 xprt_destroy(xprt);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 823a781ec89c..8b818bb3518a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
43 req = rpcrdma_create_req(r_xprt); 43 req = rpcrdma_create_req(r_xprt);
44 if (IS_ERR(req)) 44 if (IS_ERR(req))
45 return PTR_ERR(req); 45 return PTR_ERR(req);
46 req->rl_backchannel = true; 46 __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
47 47
48 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, 48 rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
49 DMA_TO_DEVICE, GFP_KERNEL); 49 DMA_TO_DEVICE, GFP_KERNEL);
@@ -223,8 +223,8 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
223 *p++ = xdr_zero; 223 *p++ = xdr_zero;
224 *p = xdr_zero; 224 *p = xdr_zero;
225 225
226 if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN, 226 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
227 &rqst->rq_snd_buf, rpcrdma_noch)) 227 &rqst->rq_snd_buf, rpcrdma_noch))
228 return -EIO; 228 return -EIO;
229 return 0; 229 return 0;
230} 230}
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index fa759dd2b0f3..29fc84c7ff98 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -306,28 +306,9 @@ out_reset:
306 } 306 }
307} 307}
308 308
309/* Use a slow, safe mechanism to invalidate all memory regions
310 * that were registered for "req".
311 */
312static void
313fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
314 bool sync)
315{
316 struct rpcrdma_mw *mw;
317
318 while (!list_empty(&req->rl_registered)) {
319 mw = rpcrdma_pop_mw(&req->rl_registered);
320 if (sync)
321 fmr_op_recover_mr(mw);
322 else
323 rpcrdma_defer_mr_recovery(mw);
324 }
325}
326
327const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { 309const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
328 .ro_map = fmr_op_map, 310 .ro_map = fmr_op_map,
329 .ro_unmap_sync = fmr_op_unmap_sync, 311 .ro_unmap_sync = fmr_op_unmap_sync,
330 .ro_unmap_safe = fmr_op_unmap_safe,
331 .ro_recover_mr = fmr_op_recover_mr, 312 .ro_recover_mr = fmr_op_recover_mr,
332 .ro_open = fmr_op_open, 313 .ro_open = fmr_op_open,
333 .ro_maxpages = fmr_op_maxpages, 314 .ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 35d7517ef0e6..773e66e10a15 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -420,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
420 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 420 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
421 IB_ACCESS_REMOTE_READ; 421 IB_ACCESS_REMOTE_READ;
422 422
423 rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
424 rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr); 423 rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
425 if (rc) 424 if (rc)
426 goto out_senderr; 425 goto out_senderr;
@@ -508,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
508 f->fr_cqe.done = frwr_wc_localinv_wake; 507 f->fr_cqe.done = frwr_wc_localinv_wake;
509 reinit_completion(&f->fr_linv_done); 508 reinit_completion(&f->fr_linv_done);
510 509
511 /* Initialize CQ count, since there is always a signaled
512 * WR being posted here. The new cqcount depends on how
513 * many SQEs are about to be consumed.
514 */
515 rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
516
517 /* Transport disconnect drains the receive CQ before it 510 /* Transport disconnect drains the receive CQ before it
518 * replaces the QP. The RPC reply handler won't call us 511 * replaces the QP. The RPC reply handler won't call us
519 * unless ri_id->qp is a valid pointer. 512 * unless ri_id->qp is a valid pointer.
@@ -546,7 +539,6 @@ reset_mrs:
546 /* Find and reset the MRs in the LOCAL_INV WRs that did not 539 /* Find and reset the MRs in the LOCAL_INV WRs that did not
547 * get posted. 540 * get posted.
548 */ 541 */
549 rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
550 while (bad_wr) { 542 while (bad_wr) {
551 f = container_of(bad_wr, struct rpcrdma_frmr, 543 f = container_of(bad_wr, struct rpcrdma_frmr,
552 fr_invwr); 544 fr_invwr);
@@ -559,28 +551,9 @@ reset_mrs:
559 goto unmap; 551 goto unmap;
560} 552}
561 553
562/* Use a slow, safe mechanism to invalidate all memory regions
563 * that were registered for "req".
564 */
565static void
566frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
567 bool sync)
568{
569 struct rpcrdma_mw *mw;
570
571 while (!list_empty(&req->rl_registered)) {
572 mw = rpcrdma_pop_mw(&req->rl_registered);
573 if (sync)
574 frwr_op_recover_mr(mw);
575 else
576 rpcrdma_defer_mr_recovery(mw);
577 }
578}
579
580const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 554const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
581 .ro_map = frwr_op_map, 555 .ro_map = frwr_op_map,
582 .ro_unmap_sync = frwr_op_unmap_sync, 556 .ro_unmap_sync = frwr_op_unmap_sync,
583 .ro_unmap_safe = frwr_op_unmap_safe,
584 .ro_recover_mr = frwr_op_recover_mr, 557 .ro_recover_mr = frwr_op_recover_mr,
585 .ro_open = frwr_op_open, 558 .ro_open = frwr_op_open,
586 .ro_maxpages = frwr_op_maxpages, 559 .ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f1889f4d4803..ed34dc0f144c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved.
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
75 76
76 /* Maximum Read list size */ 77 /* Maximum Read list size */
77 maxsegs += 2; /* segment for head and tail buffers */ 78 maxsegs += 2; /* segment for head and tail buffers */
78 size = maxsegs * sizeof(struct rpcrdma_read_chunk); 79 size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
79 80
80 /* Minimal Read chunk size */ 81 /* Minimal Read chunk size */
81 size += sizeof(__be32); /* segment count */ 82 size += sizeof(__be32); /* segment count */
82 size += sizeof(struct rpcrdma_segment); 83 size += rpcrdma_segment_maxsz * sizeof(__be32);
83 size += sizeof(__be32); /* list discriminator */ 84 size += sizeof(__be32); /* list discriminator */
84 85
85 dprintk("RPC: %s: max call header size = %u\n", 86 dprintk("RPC: %s: max call header size = %u\n",
@@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
102 /* Maximum Write list size */ 103 /* Maximum Write list size */
103 maxsegs += 2; /* segment for head and tail buffers */ 104 maxsegs += 2; /* segment for head and tail buffers */
104 size = sizeof(__be32); /* segment count */ 105 size = sizeof(__be32); /* segment count */
105 size += maxsegs * sizeof(struct rpcrdma_segment); 106 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
106 size += sizeof(__be32); /* list discriminator */ 107 size += sizeof(__be32); /* list discriminator */
107 108
108 dprintk("RPC: %s: max reply header size = %u\n", 109 dprintk("RPC: %s: max reply header size = %u\n",
@@ -511,27 +512,60 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
511 return 0; 512 return 0;
512} 513}
513 514
514/* Prepare the RPC-over-RDMA header SGE. 515/**
516 * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
517 * @sc: sendctx containing SGEs to unmap
518 *
519 */
520void
521rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
522{
523 struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
524 struct ib_sge *sge;
525 unsigned int count;
526
527 dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
528 __func__, sc->sc_unmap_count, sc);
529
530 /* The first two SGEs contain the transport header and
531 * the inline buffer. These are always left mapped so
532 * they can be cheaply re-used.
533 */
534 sge = &sc->sc_sges[2];
535 for (count = sc->sc_unmap_count; count; ++sge, --count)
536 ib_dma_unmap_page(ia->ri_device,
537 sge->addr, sge->length, DMA_TO_DEVICE);
538
539 if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
540 smp_mb__after_atomic();
541 wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
542 }
543}
544
545/* Prepare an SGE for the RPC-over-RDMA transport header.
515 */ 546 */
516static bool 547static bool
517rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 548rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
518 u32 len) 549 u32 len)
519{ 550{
551 struct rpcrdma_sendctx *sc = req->rl_sendctx;
520 struct rpcrdma_regbuf *rb = req->rl_rdmabuf; 552 struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
521 struct ib_sge *sge = &req->rl_send_sge[0]; 553 struct ib_sge *sge = sc->sc_sges;
522 554
523 if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) { 555 if (!rpcrdma_dma_map_regbuf(ia, rb))
524 if (!__rpcrdma_dma_map_regbuf(ia, rb)) 556 goto out_regbuf;
525 return false; 557 sge->addr = rdmab_addr(rb);
526 sge->addr = rdmab_addr(rb);
527 sge->lkey = rdmab_lkey(rb);
528 }
529 sge->length = len; 558 sge->length = len;
559 sge->lkey = rdmab_lkey(rb);
530 560
531 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, 561 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
532 sge->length, DMA_TO_DEVICE); 562 sge->length, DMA_TO_DEVICE);
533 req->rl_send_wr.num_sge++; 563 sc->sc_wr.num_sge++;
534 return true; 564 return true;
565
566out_regbuf:
567 pr_err("rpcrdma: failed to DMA map a Send buffer\n");
568 return false;
535} 569}
536 570
537/* Prepare the Send SGEs. The head and tail iovec, and each entry 571/* Prepare the Send SGEs. The head and tail iovec, and each entry
@@ -541,10 +575,11 @@ static bool
541rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 575rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
542 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 576 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
543{ 577{
578 struct rpcrdma_sendctx *sc = req->rl_sendctx;
544 unsigned int sge_no, page_base, len, remaining; 579 unsigned int sge_no, page_base, len, remaining;
545 struct rpcrdma_regbuf *rb = req->rl_sendbuf; 580 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
546 struct ib_device *device = ia->ri_device; 581 struct ib_device *device = ia->ri_device;
547 struct ib_sge *sge = req->rl_send_sge; 582 struct ib_sge *sge = sc->sc_sges;
548 u32 lkey = ia->ri_pd->local_dma_lkey; 583 u32 lkey = ia->ri_pd->local_dma_lkey;
549 struct page *page, **ppages; 584 struct page *page, **ppages;
550 585
@@ -552,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
552 * DMA-mapped. Sync the content that has changed. 587 * DMA-mapped. Sync the content that has changed.
553 */ 588 */
554 if (!rpcrdma_dma_map_regbuf(ia, rb)) 589 if (!rpcrdma_dma_map_regbuf(ia, rb))
555 return false; 590 goto out_regbuf;
556 sge_no = 1; 591 sge_no = 1;
557 sge[sge_no].addr = rdmab_addr(rb); 592 sge[sge_no].addr = rdmab_addr(rb);
558 sge[sge_no].length = xdr->head[0].iov_len; 593 sge[sge_no].length = xdr->head[0].iov_len;
@@ -607,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
607 sge[sge_no].length = len; 642 sge[sge_no].length = len;
608 sge[sge_no].lkey = lkey; 643 sge[sge_no].lkey = lkey;
609 644
610 req->rl_mapped_sges++; 645 sc->sc_unmap_count++;
611 ppages++; 646 ppages++;
612 remaining -= len; 647 remaining -= len;
613 page_base = 0; 648 page_base = 0;
@@ -633,56 +668,61 @@ map_tail:
633 goto out_mapping_err; 668 goto out_mapping_err;
634 sge[sge_no].length = len; 669 sge[sge_no].length = len;
635 sge[sge_no].lkey = lkey; 670 sge[sge_no].lkey = lkey;
636 req->rl_mapped_sges++; 671 sc->sc_unmap_count++;
637 } 672 }
638 673
639out: 674out:
640 req->rl_send_wr.num_sge = sge_no + 1; 675 sc->sc_wr.num_sge += sge_no;
676 if (sc->sc_unmap_count)
677 __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
641 return true; 678 return true;
642 679
680out_regbuf:
681 pr_err("rpcrdma: failed to DMA map a Send buffer\n");
682 return false;
683
643out_mapping_overflow: 684out_mapping_overflow:
685 rpcrdma_unmap_sendctx(sc);
644 pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); 686 pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
645 return false; 687 return false;
646 688
647out_mapping_err: 689out_mapping_err:
690 rpcrdma_unmap_sendctx(sc);
648 pr_err("rpcrdma: Send mapping error\n"); 691 pr_err("rpcrdma: Send mapping error\n");
649 return false; 692 return false;
650} 693}
651 694
652bool 695/**
653rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, 696 * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
654 u32 hdrlen, struct xdr_buf *xdr, 697 * @r_xprt: controlling transport
655 enum rpcrdma_chunktype rtype) 698 * @req: context of RPC Call being marshalled
699 * @hdrlen: size of transport header, in bytes
700 * @xdr: xdr_buf containing RPC Call
701 * @rtype: chunk type being encoded
702 *
703 * Returns 0 on success; otherwise a negative errno is returned.
704 */
705int
706rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
707 struct rpcrdma_req *req, u32 hdrlen,
708 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
656{ 709{
657 req->rl_send_wr.num_sge = 0; 710 req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
658 req->rl_mapped_sges = 0; 711 if (!req->rl_sendctx)
659 712 return -ENOBUFS;
660 if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen)) 713 req->rl_sendctx->sc_wr.num_sge = 0;
661 goto out_map; 714 req->rl_sendctx->sc_unmap_count = 0;
715 req->rl_sendctx->sc_req = req;
716 __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
717
718 if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
719 return -EIO;
662 720
663 if (rtype != rpcrdma_areadch) 721 if (rtype != rpcrdma_areadch)
664 if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype)) 722 if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
665 goto out_map; 723 return -EIO;
666
667 return true;
668
669out_map:
670 pr_err("rpcrdma: failed to DMA map a Send buffer\n");
671 return false;
672}
673
674void
675rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
676{
677 struct ib_device *device = ia->ri_device;
678 struct ib_sge *sge;
679 int count;
680 724
681 sge = &req->rl_send_sge[2]; 725 return 0;
682 for (count = req->rl_mapped_sges; count--; sge++)
683 ib_dma_unmap_page(device, sge->addr, sge->length,
684 DMA_TO_DEVICE);
685 req->rl_mapped_sges = 0;
686} 726}
687 727
688/** 728/**
@@ -833,12 +873,10 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
833 transfertypes[rtype], transfertypes[wtype], 873 transfertypes[rtype], transfertypes[wtype],
834 xdr_stream_pos(xdr)); 874 xdr_stream_pos(xdr));
835 875
836 if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, 876 ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
837 xdr_stream_pos(xdr), 877 &rqst->rq_snd_buf, rtype);
838 &rqst->rq_snd_buf, rtype)) { 878 if (ret)
839 ret = -EIO;
840 goto out_err; 879 goto out_err;
841 }
842 return 0; 880 return 0;
843 881
844out_err: 882out_err:
@@ -970,14 +1008,13 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws,
970 * straightforward to check the RPC header's direction field. 1008 * straightforward to check the RPC header's direction field.
971 */ 1009 */
972static bool 1010static bool
973rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, 1011rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
974 __be32 xid, __be32 proc)
975#if defined(CONFIG_SUNRPC_BACKCHANNEL) 1012#if defined(CONFIG_SUNRPC_BACKCHANNEL)
976{ 1013{
977 struct xdr_stream *xdr = &rep->rr_stream; 1014 struct xdr_stream *xdr = &rep->rr_stream;
978 __be32 *p; 1015 __be32 *p;
979 1016
980 if (proc != rdma_msg) 1017 if (rep->rr_proc != rdma_msg)
981 return false; 1018 return false;
982 1019
983 /* Peek at stream contents without advancing. */ 1020 /* Peek at stream contents without advancing. */
@@ -992,7 +1029,7 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
992 return false; 1029 return false;
993 1030
994 /* RPC header */ 1031 /* RPC header */
995 if (*p++ != xid) 1032 if (*p++ != rep->rr_xid)
996 return false; 1033 return false;
997 if (*p != cpu_to_be32(RPC_CALL)) 1034 if (*p != cpu_to_be32(RPC_CALL))
998 return false; 1035 return false;
@@ -1212,105 +1249,170 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1212 return -EREMOTEIO; 1249 return -EREMOTEIO;
1213} 1250}
1214 1251
1252/* Perform XID lookup, reconstruction of the RPC reply, and
1253 * RPC completion while holding the transport lock to ensure
1254 * the rep, rqst, and rq_task pointers remain stable.
1255 */
1256void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1257{
1258 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1259 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1260 struct rpc_rqst *rqst = rep->rr_rqst;
1261 unsigned long cwnd;
1262 int status;
1263
1264 xprt->reestablish_timeout = 0;
1265
1266 switch (rep->rr_proc) {
1267 case rdma_msg:
1268 status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1269 break;
1270 case rdma_nomsg:
1271 status = rpcrdma_decode_nomsg(r_xprt, rep);
1272 break;
1273 case rdma_error:
1274 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1275 break;
1276 default:
1277 status = -EIO;
1278 }
1279 if (status < 0)
1280 goto out_badheader;
1281
1282out:
1283 spin_lock(&xprt->recv_lock);
1284 cwnd = xprt->cwnd;
1285 xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
1286 if (xprt->cwnd > cwnd)
1287 xprt_release_rqst_cong(rqst->rq_task);
1288
1289 xprt_complete_rqst(rqst->rq_task, status);
1290 xprt_unpin_rqst(rqst);
1291 spin_unlock(&xprt->recv_lock);
1292 return;
1293
1294/* If the incoming reply terminated a pending RPC, the next
1295 * RPC call will post a replacement receive buffer as it is
1296 * being marshaled.
1297 */
1298out_badheader:
1299 dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
1300 rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
1301 r_xprt->rx_stats.bad_reply_count++;
1302 status = -EIO;
1303 goto out;
1304}
1305
1306void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
1307{
1308 /* Invalidate and unmap the data payloads before waking
1309 * the waiting application. This guarantees the memory
1310 * regions are properly fenced from the server before the
1311 * application accesses the data. It also ensures proper
1312 * send flow control: waking the next RPC waits until this
1313 * RPC has relinquished all its Send Queue entries.
1314 */
1315 if (!list_empty(&req->rl_registered))
1316 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
1317 &req->rl_registered);
1318
1319 /* Ensure that any DMA mapped pages associated with
1320 * the Send of the RPC Call have been unmapped before
1321 * allowing the RPC to complete. This protects argument
1322 * memory not controlled by the RPC client from being
1323 * re-used before we're done with it.
1324 */
1325 if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1326 r_xprt->rx_stats.reply_waits_for_send++;
1327 out_of_line_wait_on_bit(&req->rl_flags,
1328 RPCRDMA_REQ_F_TX_RESOURCES,
1329 bit_wait,
1330 TASK_UNINTERRUPTIBLE);
1331 }
1332}
1333
1334/* Reply handling runs in the poll worker thread. Anything that
1335 * might wait is deferred to a separate workqueue.
1336 */
1337void rpcrdma_deferred_completion(struct work_struct *work)
1338{
1339 struct rpcrdma_rep *rep =
1340 container_of(work, struct rpcrdma_rep, rr_work);
1341 struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
1342
1343 rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
1344 rpcrdma_release_rqst(rep->rr_rxprt, req);
1345 rpcrdma_complete_rqst(rep);
1346}
1347
1215/* Process received RPC/RDMA messages. 1348/* Process received RPC/RDMA messages.
1216 * 1349 *
1217 * Errors must result in the RPC task either being awakened, or 1350 * Errors must result in the RPC task either being awakened, or
1218 * allowed to timeout, to discover the errors at that time. 1351 * allowed to timeout, to discover the errors at that time.
1219 */ 1352 */
1220void 1353void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1221rpcrdma_reply_handler(struct work_struct *work)
1222{ 1354{
1223 struct rpcrdma_rep *rep =
1224 container_of(work, struct rpcrdma_rep, rr_work);
1225 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; 1355 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1226 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 1356 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1227 struct xdr_stream *xdr = &rep->rr_stream; 1357 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1228 struct rpcrdma_req *req; 1358 struct rpcrdma_req *req;
1229 struct rpc_rqst *rqst; 1359 struct rpc_rqst *rqst;
1230 __be32 *p, xid, vers, proc; 1360 u32 credits;
1231 unsigned long cwnd; 1361 __be32 *p;
1232 int status;
1233 1362
1234 dprintk("RPC: %s: incoming rep %p\n", __func__, rep); 1363 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
1235 1364
1236 if (rep->rr_hdrbuf.head[0].iov_len == 0) 1365 if (rep->rr_hdrbuf.head[0].iov_len == 0)
1237 goto out_badstatus; 1366 goto out_badstatus;
1238 1367
1239 xdr_init_decode(xdr, &rep->rr_hdrbuf, 1368 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
1240 rep->rr_hdrbuf.head[0].iov_base); 1369 rep->rr_hdrbuf.head[0].iov_base);
1241 1370
1242 /* Fixed transport header fields */ 1371 /* Fixed transport header fields */
1243 p = xdr_inline_decode(xdr, 4 * sizeof(*p)); 1372 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
1244 if (unlikely(!p)) 1373 if (unlikely(!p))
1245 goto out_shortreply; 1374 goto out_shortreply;
1246 xid = *p++; 1375 rep->rr_xid = *p++;
1247 vers = *p++; 1376 rep->rr_vers = *p++;
1248 p++; /* credits */ 1377 credits = be32_to_cpu(*p++);
1249 proc = *p++; 1378 rep->rr_proc = *p++;
1379
1380 if (rep->rr_vers != rpcrdma_version)
1381 goto out_badversion;
1250 1382
1251 if (rpcrdma_is_bcall(r_xprt, rep, xid, proc)) 1383 if (rpcrdma_is_bcall(r_xprt, rep))
1252 return; 1384 return;
1253 1385
1254 /* Match incoming rpcrdma_rep to an rpcrdma_req to 1386 /* Match incoming rpcrdma_rep to an rpcrdma_req to
1255 * get context for handling any incoming chunks. 1387 * get context for handling any incoming chunks.
1256 */ 1388 */
1257 spin_lock(&xprt->recv_lock); 1389 spin_lock(&xprt->recv_lock);
1258 rqst = xprt_lookup_rqst(xprt, xid); 1390 rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
1259 if (!rqst) 1391 if (!rqst)
1260 goto out_norqst; 1392 goto out_norqst;
1261 xprt_pin_rqst(rqst); 1393 xprt_pin_rqst(rqst);
1394
1395 if (credits == 0)
1396 credits = 1; /* don't deadlock */
1397 else if (credits > buf->rb_max_requests)
1398 credits = buf->rb_max_requests;
1399 buf->rb_credits = credits;
1400
1262 spin_unlock(&xprt->recv_lock); 1401 spin_unlock(&xprt->recv_lock);
1402
1263 req = rpcr_to_rdmar(rqst); 1403 req = rpcr_to_rdmar(rqst);
1264 req->rl_reply = rep; 1404 req->rl_reply = rep;
1405 rep->rr_rqst = rqst;
1406 clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
1265 1407
1266 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", 1408 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
1267 __func__, rep, req, be32_to_cpu(xid)); 1409 __func__, rep, req, be32_to_cpu(rep->rr_xid));
1268
1269 /* Invalidate and unmap the data payloads before waking the
1270 * waiting application. This guarantees the memory regions
1271 * are properly fenced from the server before the application
1272 * accesses the data. It also ensures proper send flow control:
1273 * waking the next RPC waits until this RPC has relinquished
1274 * all its Send Queue entries.
1275 */
1276 if (!list_empty(&req->rl_registered)) {
1277 rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
1278 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
1279 &req->rl_registered);
1280 }
1281
1282 xprt->reestablish_timeout = 0;
1283 if (vers != rpcrdma_version)
1284 goto out_badversion;
1285 1410
1286 switch (proc) { 1411 if (list_empty(&req->rl_registered) &&
1287 case rdma_msg: 1412 !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
1288 status = rpcrdma_decode_msg(r_xprt, rep, rqst); 1413 rpcrdma_complete_rqst(rep);
1289 break; 1414 else
1290 case rdma_nomsg: 1415 queue_work(rpcrdma_receive_wq, &rep->rr_work);
1291 status = rpcrdma_decode_nomsg(r_xprt, rep);
1292 break;
1293 case rdma_error:
1294 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1295 break;
1296 default:
1297 status = -EIO;
1298 }
1299 if (status < 0)
1300 goto out_badheader;
1301
1302out:
1303 spin_lock(&xprt->recv_lock);
1304 cwnd = xprt->cwnd;
1305 xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
1306 if (xprt->cwnd > cwnd)
1307 xprt_release_rqst_cong(rqst->rq_task);
1308
1309 xprt_complete_rqst(rqst->rq_task, status);
1310 xprt_unpin_rqst(rqst);
1311 spin_unlock(&xprt->recv_lock);
1312 dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
1313 __func__, xprt, rqst, status);
1314 return; 1416 return;
1315 1417
1316out_badstatus: 1418out_badstatus:
@@ -1321,37 +1423,22 @@ out_badstatus:
1321 } 1423 }
1322 return; 1424 return;
1323 1425
1324/* If the incoming reply terminated a pending RPC, the next
1325 * RPC call will post a replacement receive buffer as it is
1326 * being marshaled.
1327 */
1328out_badversion: 1426out_badversion:
1329 dprintk("RPC: %s: invalid version %d\n", 1427 dprintk("RPC: %s: invalid version %d\n",
1330 __func__, be32_to_cpu(vers)); 1428 __func__, be32_to_cpu(rep->rr_vers));
1331 status = -EIO; 1429 goto repost;
1332 r_xprt->rx_stats.bad_reply_count++;
1333 goto out;
1334
1335out_badheader:
1336 dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
1337 rqst->rq_task->tk_pid, __func__, be32_to_cpu(proc));
1338 r_xprt->rx_stats.bad_reply_count++;
1339 status = -EIO;
1340 goto out;
1341 1430
1342/* The req was still available, but by the time the recv_lock 1431/* The RPC transaction has already been terminated, or the header
1343 * was acquired, the rqst and task had been released. Thus the RPC 1432 * is corrupt.
1344 * has already been terminated.
1345 */ 1433 */
1346out_norqst: 1434out_norqst:
1347 spin_unlock(&xprt->recv_lock); 1435 spin_unlock(&xprt->recv_lock);
1348 dprintk("RPC: %s: no match for incoming xid 0x%08x\n", 1436 dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
1349 __func__, be32_to_cpu(xid)); 1437 __func__, be32_to_cpu(rep->rr_xid));
1350 goto repost; 1438 goto repost;
1351 1439
1352out_shortreply: 1440out_shortreply:
1353 dprintk("RPC: %s: short/invalid reply\n", __func__); 1441 dprintk("RPC: %s: short/invalid reply\n", __func__);
1354 goto repost;
1355 1442
1356/* If no pending RPC transaction was matched, post a replacement 1443/* If no pending RPC transaction was matched, post a replacement
1357 * receive buffer before returning. 1444 * receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 992594b7cc6b..af7893501e40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
133 if (ret) 133 if (ret)
134 goto out_err; 134 goto out_err;
135 135
136 /* Bump page refcnt so Send completion doesn't release
137 * the rq_buffer before all retransmits are complete.
138 */
139 get_page(virt_to_page(rqst->rq_buffer));
136 ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); 140 ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
137 if (ret) 141 if (ret)
138 goto out_unmap; 142 goto out_unmap;
@@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
165 return -EINVAL; 169 return -EINVAL;
166 } 170 }
167 171
168 /* svc_rdma_sendto releases this page */
169 page = alloc_page(RPCRDMA_DEF_GFP); 172 page = alloc_page(RPCRDMA_DEF_GFP);
170 if (!page) 173 if (!page)
171 return -ENOMEM; 174 return -ENOMEM;
@@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
184{ 187{
185 struct rpc_rqst *rqst = task->tk_rqstp; 188 struct rpc_rqst *rqst = task->tk_rqstp;
186 189
190 put_page(virt_to_page(rqst->rq_buffer));
187 kfree(rqst->rq_rbuffer); 191 kfree(rqst->rq_rbuffer);
188} 192}
189 193
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5caf8e722a11..46ec069150d5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
290 ib_event_msg(event->event), event->event, 290 ib_event_msg(event->event), event->event,
291 event->element.qp); 291 event->element.qp);
292 set_bit(XPT_CLOSE, &xprt->xpt_flags); 292 set_bit(XPT_CLOSE, &xprt->xpt_flags);
293 svc_xprt_enqueue(xprt);
293 break; 294 break;
294 } 295 }
295} 296}
@@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
322 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 323 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
323 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) 324 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
324 goto out; 325 goto out;
325 svc_xprt_enqueue(&xprt->sc_xprt); 326 goto out_enqueue;
326 goto out;
327 327
328flushed: 328flushed:
329 if (wc->status != IB_WC_WR_FLUSH_ERR) 329 if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -333,6 +333,8 @@ flushed:
333 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 333 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
334 svc_rdma_put_context(ctxt, 1); 334 svc_rdma_put_context(ctxt, 1);
335 335
336out_enqueue:
337 svc_xprt_enqueue(&xprt->sc_xprt);
336out: 338out:
337 svc_xprt_put(&xprt->sc_xprt); 339 svc_xprt_put(&xprt->sc_xprt);
338} 340}
@@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
358 360
359 if (unlikely(wc->status != IB_WC_SUCCESS)) { 361 if (unlikely(wc->status != IB_WC_SUCCESS)) {
360 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 362 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
363 svc_xprt_enqueue(&xprt->sc_xprt);
361 if (wc->status != IB_WC_WR_FLUSH_ERR) 364 if (wc->status != IB_WC_WR_FLUSH_ERR)
362 pr_err("svcrdma: Send: %s (%u/0x%x)\n", 365 pr_err("svcrdma: Send: %s (%u/0x%x)\n",
363 ib_wc_status_msg(wc->status), 366 ib_wc_status_msg(wc->status),
@@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
569 case RDMA_CM_EVENT_DEVICE_REMOVAL: 572 case RDMA_CM_EVENT_DEVICE_REMOVAL:
570 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", 573 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
571 xprt, cma_id); 574 xprt, cma_id);
572 if (xprt) 575 if (xprt) {
573 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 576 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
577 svc_xprt_enqueue(&xprt->sc_xprt);
578 }
574 break; 579 break;
575 580
576 default: 581 default:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c84e2b644e13..646c24494ea7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved.
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -678,16 +679,14 @@ xprt_rdma_free(struct rpc_task *task)
678 struct rpc_rqst *rqst = task->tk_rqstp; 679 struct rpc_rqst *rqst = task->tk_rqstp;
679 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 680 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
680 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 681 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
681 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
682 682
683 if (req->rl_backchannel) 683 if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
684 return; 684 return;
685 685
686 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); 686 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
687 687
688 if (!list_empty(&req->rl_registered)) 688 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
689 ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); 689 rpcrdma_release_rqst(r_xprt, req);
690 rpcrdma_unmap_sges(ia, req);
691 rpcrdma_buffer_put(req); 690 rpcrdma_buffer_put(req);
692} 691}
693 692
@@ -728,7 +727,8 @@ xprt_rdma_send_request(struct rpc_task *task)
728 727
729 /* On retransmit, remove any previously registered chunks */ 728 /* On retransmit, remove any previously registered chunks */
730 if (unlikely(!list_empty(&req->rl_registered))) 729 if (unlikely(!list_empty(&req->rl_registered)))
731 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); 730 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
731 &req->rl_registered);
732 732
733 rc = rpcrdma_marshal_req(r_xprt, rqst); 733 rc = rpcrdma_marshal_req(r_xprt, rqst);
734 if (rc < 0) 734 if (rc < 0)
@@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task)
742 goto drop_connection; 742 goto drop_connection;
743 req->rl_connect_cookie = xprt->connect_cookie; 743 req->rl_connect_cookie = xprt->connect_cookie;
744 744
745 set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
745 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 746 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
746 goto drop_connection; 747 goto drop_connection;
747 748
@@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
789 r_xprt->rx_stats.failed_marshal_count, 790 r_xprt->rx_stats.failed_marshal_count,
790 r_xprt->rx_stats.bad_reply_count, 791 r_xprt->rx_stats.bad_reply_count,
791 r_xprt->rx_stats.nomsg_call_count); 792 r_xprt->rx_stats.nomsg_call_count);
792 seq_printf(seq, "%lu %lu %lu %lu\n", 793 seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
793 r_xprt->rx_stats.mrs_recovered, 794 r_xprt->rx_stats.mrs_recovered,
794 r_xprt->rx_stats.mrs_orphaned, 795 r_xprt->rx_stats.mrs_orphaned,
795 r_xprt->rx_stats.mrs_allocated, 796 r_xprt->rx_stats.mrs_allocated,
796 r_xprt->rx_stats.local_inv_needed); 797 r_xprt->rx_stats.local_inv_needed,
798 r_xprt->rx_stats.empty_sendctx_q,
799 r_xprt->rx_stats.reply_waits_for_send);
797} 800}
798 801
799static int 802static int
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11a1fbf7e59e..710b3f77db82 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved.
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -49,9 +50,10 @@
49 50
50#include <linux/interrupt.h> 51#include <linux/interrupt.h>
51#include <linux/slab.h> 52#include <linux/slab.h>
52#include <linux/prefetch.h>
53#include <linux/sunrpc/addr.h> 53#include <linux/sunrpc/addr.h>
54#include <linux/sunrpc/svc_rdma.h> 54#include <linux/sunrpc/svc_rdma.h>
55
56#include <asm-generic/barrier.h>
55#include <asm/bitops.h> 57#include <asm/bitops.h>
56 58
57#include <rdma/ib_cm.h> 59#include <rdma/ib_cm.h>
@@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
73static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); 75static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
74static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 76static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
75 77
76static struct workqueue_struct *rpcrdma_receive_wq __read_mostly; 78struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
77 79
78int 80int
79rpcrdma_alloc_wq(void) 81rpcrdma_alloc_wq(void)
@@ -126,30 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
126static void 128static void
127rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 129rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
128{ 130{
131 struct ib_cqe *cqe = wc->wr_cqe;
132 struct rpcrdma_sendctx *sc =
133 container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
134
129 /* WARNING: Only wr_cqe and status are reliable at this point */ 135 /* WARNING: Only wr_cqe and status are reliable at this point */
130 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) 136 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
131 pr_err("rpcrdma: Send: %s (%u/0x%x)\n", 137 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
132 ib_wc_status_msg(wc->status), 138 ib_wc_status_msg(wc->status),
133 wc->status, wc->vendor_err); 139 wc->status, wc->vendor_err);
134}
135
136/* Perform basic sanity checking to avoid using garbage
137 * to update the credit grant value.
138 */
139static void
140rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
141{
142 struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
143 __be32 *p = rep->rr_rdmabuf->rg_base;
144 u32 credits;
145 140
146 credits = be32_to_cpup(p + 2); 141 rpcrdma_sendctx_put_locked(sc);
147 if (credits == 0)
148 credits = 1; /* don't deadlock */
149 else if (credits > buffer->rb_max_requests)
150 credits = buffer->rb_max_requests;
151
152 atomic_set(&buffer->rb_credits, credits);
153} 142}
154 143
155/** 144/**
@@ -181,11 +170,8 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
181 rdmab_addr(rep->rr_rdmabuf), 170 rdmab_addr(rep->rr_rdmabuf),
182 wc->byte_len, DMA_FROM_DEVICE); 171 wc->byte_len, DMA_FROM_DEVICE);
183 172
184 if (wc->byte_len >= RPCRDMA_HDRLEN_ERR)
185 rpcrdma_update_granted_credits(rep);
186
187out_schedule: 173out_schedule:
188 queue_work(rpcrdma_receive_wq, &rep->rr_work); 174 rpcrdma_reply_handler(rep);
189 return; 175 return;
190 176
191out_fail: 177out_fail:
@@ -295,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
295 case RDMA_CM_EVENT_DISCONNECTED: 281 case RDMA_CM_EVENT_DISCONNECTED:
296 connstate = -ECONNABORTED; 282 connstate = -ECONNABORTED;
297connected: 283connected:
298 atomic_set(&xprt->rx_buf.rb_credits, 1); 284 xprt->rx_buf.rb_credits = 1;
299 ep->rep_connected = connstate; 285 ep->rep_connected = connstate;
300 rpcrdma_conn_func(ep); 286 rpcrdma_conn_func(ep);
301 wake_up_all(&ep->rep_connect_wait); 287 wake_up_all(&ep->rep_connect_wait);
@@ -564,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
564 ep->rep_attr.cap.max_recv_sge); 550 ep->rep_attr.cap.max_recv_sge);
565 551
566 /* set trigger for requesting send completion */ 552 /* set trigger for requesting send completion */
567 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; 553 ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
568 if (ep->rep_cqinit <= 2) 554 cdata->max_requests >> 2);
569 ep->rep_cqinit = 0; /* always signal? */ 555 ep->rep_send_count = ep->rep_send_batch;
570 rpcrdma_init_cqcount(ep, 0);
571 init_waitqueue_head(&ep->rep_connect_wait); 556 init_waitqueue_head(&ep->rep_connect_wait);
572 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 557 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
573 558
574 sendcq = ib_alloc_cq(ia->ri_device, NULL, 559 sendcq = ib_alloc_cq(ia->ri_device, NULL,
575 ep->rep_attr.cap.max_send_wr + 1, 560 ep->rep_attr.cap.max_send_wr + 1,
576 0, IB_POLL_SOFTIRQ); 561 1, IB_POLL_WORKQUEUE);
577 if (IS_ERR(sendcq)) { 562 if (IS_ERR(sendcq)) {
578 rc = PTR_ERR(sendcq); 563 rc = PTR_ERR(sendcq);
579 dprintk("RPC: %s: failed to create send CQ: %i\n", 564 dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -583,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
583 568
584 recvcq = ib_alloc_cq(ia->ri_device, NULL, 569 recvcq = ib_alloc_cq(ia->ri_device, NULL,
585 ep->rep_attr.cap.max_recv_wr + 1, 570 ep->rep_attr.cap.max_recv_wr + 1,
586 0, IB_POLL_SOFTIRQ); 571 0, IB_POLL_WORKQUEUE);
587 if (IS_ERR(recvcq)) { 572 if (IS_ERR(recvcq)) {
588 rc = PTR_ERR(recvcq); 573 rc = PTR_ERR(recvcq);
589 dprintk("RPC: %s: failed to create recv CQ: %i\n", 574 dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -846,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
846 ib_drain_qp(ia->ri_id->qp); 831 ib_drain_qp(ia->ri_id->qp);
847} 832}
848 833
834/* Fixed-size circular FIFO queue. This implementation is wait-free and
835 * lock-free.
836 *
837 * Consumer is the code path that posts Sends. This path dequeues a
838 * sendctx for use by a Send operation. Multiple consumer threads
839 * are serialized by the RPC transport lock, which allows only one
840 * ->send_request call at a time.
841 *
842 * Producer is the code path that handles Send completions. This path
843 * enqueues a sendctx that has been completed. Multiple producer
844 * threads are serialized by the ib_poll_cq() function.
845 */
846
847/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
848 * queue activity, and ib_drain_qp has flushed all remaining Send
849 * requests.
850 */
851static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
852{
853 unsigned long i;
854
855 for (i = 0; i <= buf->rb_sc_last; i++)
856 kfree(buf->rb_sc_ctxs[i]);
857 kfree(buf->rb_sc_ctxs);
858}
859
860static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
861{
862 struct rpcrdma_sendctx *sc;
863
864 sc = kzalloc(sizeof(*sc) +
865 ia->ri_max_send_sges * sizeof(struct ib_sge),
866 GFP_KERNEL);
867 if (!sc)
868 return NULL;
869
870 sc->sc_wr.wr_cqe = &sc->sc_cqe;
871 sc->sc_wr.sg_list = sc->sc_sges;
872 sc->sc_wr.opcode = IB_WR_SEND;
873 sc->sc_cqe.done = rpcrdma_wc_send;
874 return sc;
875}
876
877static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
878{
879 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
880 struct rpcrdma_sendctx *sc;
881 unsigned long i;
882
883 /* Maximum number of concurrent outstanding Send WRs. Capping
884 * the circular queue size stops Send Queue overflow by causing
885 * the ->send_request call to fail temporarily before too many
886 * Sends are posted.
887 */
888 i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS;
889 dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i);
890 buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
891 if (!buf->rb_sc_ctxs)
892 return -ENOMEM;
893
894 buf->rb_sc_last = i - 1;
895 for (i = 0; i <= buf->rb_sc_last; i++) {
896 sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
897 if (!sc)
898 goto out_destroy;
899
900 sc->sc_xprt = r_xprt;
901 buf->rb_sc_ctxs[i] = sc;
902 }
903
904 return 0;
905
906out_destroy:
907 rpcrdma_sendctxs_destroy(buf);
908 return -ENOMEM;
909}
910
911/* The sendctx queue is not guaranteed to have a size that is a
912 * power of two, thus the helpers in circ_buf.h cannot be used.
913 * The other option is to use modulus (%), which can be expensive.
914 */
915static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
916 unsigned long item)
917{
918 return likely(item < buf->rb_sc_last) ? item + 1 : 0;
919}
920
921/**
922 * rpcrdma_sendctx_get_locked - Acquire a send context
923 * @buf: transport buffers from which to acquire an unused context
924 *
925 * Returns pointer to a free send completion context; or NULL if
926 * the queue is empty.
927 *
928 * Usage: Called to acquire an SGE array before preparing a Send WR.
929 *
930 * The caller serializes calls to this function (per rpcrdma_buffer),
931 * and provides an effective memory barrier that flushes the new value
932 * of rb_sc_head.
933 */
934struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
935{
936 struct rpcrdma_xprt *r_xprt;
937 struct rpcrdma_sendctx *sc;
938 unsigned long next_head;
939
940 next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head);
941
942 if (next_head == READ_ONCE(buf->rb_sc_tail))
943 goto out_emptyq;
944
945 /* ORDER: item must be accessed _before_ head is updated */
946 sc = buf->rb_sc_ctxs[next_head];
947
948 /* Releasing the lock in the caller acts as a memory
949 * barrier that flushes rb_sc_head.
950 */
951 buf->rb_sc_head = next_head;
952
953 return sc;
954
955out_emptyq:
956 /* The queue is "empty" if there have not been enough Send
957 * completions recently. This is a sign the Send Queue is
958 * backing up. Cause the caller to pause and try again.
959 */
960 dprintk("RPC: %s: empty sendctx queue\n", __func__);
961 r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
962 r_xprt->rx_stats.empty_sendctx_q++;
963 return NULL;
964}
965
966/**
967 * rpcrdma_sendctx_put_locked - Release a send context
968 * @sc: send context to release
969 *
970 * Usage: Called from Send completion to return a sendctxt
971 * to the queue.
972 *
973 * The caller serializes calls to this function (per rpcrdma_buffer).
974 */
975void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
976{
977 struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
978 unsigned long next_tail;
979
980 /* Unmap SGEs of previously completed by unsignaled
981 * Sends by walking up the queue until @sc is found.
982 */
983 next_tail = buf->rb_sc_tail;
984 do {
985 next_tail = rpcrdma_sendctx_next(buf, next_tail);
986
987 /* ORDER: item must be accessed _before_ tail is updated */
988 rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
989
990 } while (buf->rb_sc_ctxs[next_tail] != sc);
991
992 /* Paired with READ_ONCE */
993 smp_store_release(&buf->rb_sc_tail, next_tail);
994}
995
849static void 996static void
850rpcrdma_mr_recovery_worker(struct work_struct *work) 997rpcrdma_mr_recovery_worker(struct work_struct *work)
851{ 998{
@@ -941,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
941 spin_lock(&buffer->rb_reqslock); 1088 spin_lock(&buffer->rb_reqslock);
942 list_add(&req->rl_all, &buffer->rb_allreqs); 1089 list_add(&req->rl_all, &buffer->rb_allreqs);
943 spin_unlock(&buffer->rb_reqslock); 1090 spin_unlock(&buffer->rb_reqslock);
944 req->rl_cqe.done = rpcrdma_wc_send;
945 req->rl_buffer = &r_xprt->rx_buf; 1091 req->rl_buffer = &r_xprt->rx_buf;
946 INIT_LIST_HEAD(&req->rl_registered); 1092 INIT_LIST_HEAD(&req->rl_registered);
947 req->rl_send_wr.next = NULL;
948 req->rl_send_wr.wr_cqe = &req->rl_cqe;
949 req->rl_send_wr.sg_list = req->rl_send_sge;
950 req->rl_send_wr.opcode = IB_WR_SEND;
951 return req; 1093 return req;
952} 1094}
953 1095
@@ -974,7 +1116,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
974 1116
975 rep->rr_cqe.done = rpcrdma_wc_receive; 1117 rep->rr_cqe.done = rpcrdma_wc_receive;
976 rep->rr_rxprt = r_xprt; 1118 rep->rr_rxprt = r_xprt;
977 INIT_WORK(&rep->rr_work, rpcrdma_reply_handler); 1119 INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
978 rep->rr_recv_wr.next = NULL; 1120 rep->rr_recv_wr.next = NULL;
979 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 1121 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
980 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1122 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
@@ -995,7 +1137,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
995 1137
996 buf->rb_max_requests = r_xprt->rx_data.max_requests; 1138 buf->rb_max_requests = r_xprt->rx_data.max_requests;
997 buf->rb_bc_srv_max_requests = 0; 1139 buf->rb_bc_srv_max_requests = 0;
998 atomic_set(&buf->rb_credits, 1);
999 spin_lock_init(&buf->rb_mwlock); 1140 spin_lock_init(&buf->rb_mwlock);
1000 spin_lock_init(&buf->rb_lock); 1141 spin_lock_init(&buf->rb_lock);
1001 spin_lock_init(&buf->rb_recovery_lock); 1142 spin_lock_init(&buf->rb_recovery_lock);
@@ -1022,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1022 rc = PTR_ERR(req); 1163 rc = PTR_ERR(req);
1023 goto out; 1164 goto out;
1024 } 1165 }
1025 req->rl_backchannel = false;
1026 list_add(&req->rl_list, &buf->rb_send_bufs); 1166 list_add(&req->rl_list, &buf->rb_send_bufs);
1027 } 1167 }
1028 1168
@@ -1040,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1040 list_add(&rep->rr_list, &buf->rb_recv_bufs); 1180 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1041 } 1181 }
1042 1182
1183 rc = rpcrdma_sendctxs_create(r_xprt);
1184 if (rc)
1185 goto out;
1186
1043 return 0; 1187 return 0;
1044out: 1188out:
1045 rpcrdma_buffer_destroy(buf); 1189 rpcrdma_buffer_destroy(buf);
@@ -1116,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1116 cancel_delayed_work_sync(&buf->rb_recovery_worker); 1260 cancel_delayed_work_sync(&buf->rb_recovery_worker);
1117 cancel_delayed_work_sync(&buf->rb_refresh_worker); 1261 cancel_delayed_work_sync(&buf->rb_refresh_worker);
1118 1262
1263 rpcrdma_sendctxs_destroy(buf);
1264
1119 while (!list_empty(&buf->rb_recv_bufs)) { 1265 while (!list_empty(&buf->rb_recv_bufs)) {
1120 struct rpcrdma_rep *rep; 1266 struct rpcrdma_rep *rep;
1121 1267
@@ -1231,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1231 struct rpcrdma_buffer *buffers = req->rl_buffer; 1377 struct rpcrdma_buffer *buffers = req->rl_buffer;
1232 struct rpcrdma_rep *rep = req->rl_reply; 1378 struct rpcrdma_rep *rep = req->rl_reply;
1233 1379
1234 req->rl_send_wr.num_sge = 0;
1235 req->rl_reply = NULL; 1380 req->rl_reply = NULL;
1236 1381
1237 spin_lock(&buffers->rb_lock); 1382 spin_lock(&buffers->rb_lock);
@@ -1363,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1363 struct rpcrdma_ep *ep, 1508 struct rpcrdma_ep *ep,
1364 struct rpcrdma_req *req) 1509 struct rpcrdma_req *req)
1365{ 1510{
1366 struct ib_send_wr *send_wr = &req->rl_send_wr; 1511 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
1367 struct ib_send_wr *send_wr_fail; 1512 struct ib_send_wr *send_wr_fail;
1368 int rc; 1513 int rc;
1369 1514
@@ -1377,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1377 dprintk("RPC: %s: posting %d s/g entries\n", 1522 dprintk("RPC: %s: posting %d s/g entries\n",
1378 __func__, send_wr->num_sge); 1523 __func__, send_wr->num_sge);
1379 1524
1380 rpcrdma_set_signaled(ep, send_wr); 1525 if (!ep->rep_send_count ||
1526 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1527 send_wr->send_flags |= IB_SEND_SIGNALED;
1528 ep->rep_send_count = ep->rep_send_batch;
1529 } else {
1530 send_wr->send_flags &= ~IB_SEND_SIGNALED;
1531 --ep->rep_send_count;
1532 }
1381 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); 1533 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
1382 if (rc) 1534 if (rc)
1383 goto out_postsend_err; 1535 goto out_postsend_err;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e26a97d2f922..51686d9eac5f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017 Oracle. All rights reserved.
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 * 4 *
4 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -93,8 +94,8 @@ enum {
93 */ 94 */
94 95
95struct rpcrdma_ep { 96struct rpcrdma_ep {
96 atomic_t rep_cqcount; 97 unsigned int rep_send_count;
97 int rep_cqinit; 98 unsigned int rep_send_batch;
98 int rep_connected; 99 int rep_connected;
99 struct ib_qp_init_attr rep_attr; 100 struct ib_qp_init_attr rep_attr;
100 wait_queue_head_t rep_connect_wait; 101 wait_queue_head_t rep_connect_wait;
@@ -104,25 +105,6 @@ struct rpcrdma_ep {
104 struct delayed_work rep_connect_worker; 105 struct delayed_work rep_connect_worker;
105}; 106};
106 107
107static inline void
108rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
109{
110 atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
111}
112
113/* To update send queue accounting, provider must take a
114 * send completion every now and then.
115 */
116static inline void
117rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
118{
119 send_wr->send_flags = 0;
120 if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
121 rpcrdma_init_cqcount(ep, 0);
122 send_wr->send_flags = IB_SEND_SIGNALED;
123 }
124}
125
126/* Pre-allocate extra Work Requests for handling backward receives 108/* Pre-allocate extra Work Requests for handling backward receives
127 * and sends. This is a fixed value because the Work Queues are 109 * and sends. This is a fixed value because the Work Queues are
128 * allocated when the forward channel is set up. 110 * allocated when the forward channel is set up.
@@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb)
164 return rb->rg_iov.lkey; 146 return rb->rg_iov.lkey;
165} 147}
166 148
167static inline struct rpcrdma_msg *
168rdmab_to_msg(struct rpcrdma_regbuf *rb)
169{
170 return (struct rpcrdma_msg *)rb->rg_base;
171}
172
173static inline struct ib_device * 149static inline struct ib_device *
174rdmab_device(struct rpcrdma_regbuf *rb) 150rdmab_device(struct rpcrdma_regbuf *rb)
175{ 151{
@@ -202,22 +178,24 @@ enum {
202}; 178};
203 179
204/* 180/*
205 * struct rpcrdma_rep -- this structure encapsulates state required to recv 181 * struct rpcrdma_rep -- this structure encapsulates state required
206 * and complete a reply, asychronously. It needs several pieces of 182 * to receive and complete an RPC Reply, asychronously. It needs
207 * state: 183 * several pieces of state:
208 * o recv buffer (posted to provider)
209 * o ib_sge (also donated to provider)
210 * o status of reply (length, success or not)
211 * o bookkeeping state to get run by reply handler (list, etc)
212 * 184 *
213 * These are allocated during initialization, per-transport instance. 185 * o receive buffer and ib_sge (donated to provider)
186 * o status of receive (success or not, length, inv rkey)
187 * o bookkeeping state to get run by reply handler (XDR stream)
214 * 188 *
215 * N of these are associated with a transport instance, and stored in 189 * These structures are allocated during transport initialization.
216 * struct rpcrdma_buffer. N is the max number of outstanding requests. 190 * N of these are associated with a transport instance, managed by
191 * struct rpcrdma_buffer. N is the max number of outstanding RPCs.
217 */ 192 */
218 193
219struct rpcrdma_rep { 194struct rpcrdma_rep {
220 struct ib_cqe rr_cqe; 195 struct ib_cqe rr_cqe;
196 __be32 rr_xid;
197 __be32 rr_vers;
198 __be32 rr_proc;
221 int rr_wc_flags; 199 int rr_wc_flags;
222 u32 rr_inv_rkey; 200 u32 rr_inv_rkey;
223 struct rpcrdma_regbuf *rr_rdmabuf; 201 struct rpcrdma_regbuf *rr_rdmabuf;
@@ -225,10 +203,34 @@ struct rpcrdma_rep {
225 struct work_struct rr_work; 203 struct work_struct rr_work;
226 struct xdr_buf rr_hdrbuf; 204 struct xdr_buf rr_hdrbuf;
227 struct xdr_stream rr_stream; 205 struct xdr_stream rr_stream;
206 struct rpc_rqst *rr_rqst;
228 struct list_head rr_list; 207 struct list_head rr_list;
229 struct ib_recv_wr rr_recv_wr; 208 struct ib_recv_wr rr_recv_wr;
230}; 209};
231 210
211/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
212 */
213struct rpcrdma_req;
214struct rpcrdma_xprt;
215struct rpcrdma_sendctx {
216 struct ib_send_wr sc_wr;
217 struct ib_cqe sc_cqe;
218 struct rpcrdma_xprt *sc_xprt;
219 struct rpcrdma_req *sc_req;
220 unsigned int sc_unmap_count;
221 struct ib_sge sc_sges[];
222};
223
224/* Limit the number of SGEs that can be unmapped during one
225 * Send completion. This caps the amount of work a single
226 * completion can do before returning to the provider.
227 *
228 * Setting this to zero disables Send completion batching.
229 */
230enum {
231 RPCRDMA_MAX_SEND_BATCH = 7,
232};
233
232/* 234/*
233 * struct rpcrdma_mw - external memory region metadata 235 * struct rpcrdma_mw - external memory region metadata
234 * 236 *
@@ -340,26 +342,30 @@ enum {
340struct rpcrdma_buffer; 342struct rpcrdma_buffer;
341struct rpcrdma_req { 343struct rpcrdma_req {
342 struct list_head rl_list; 344 struct list_head rl_list;
343 unsigned int rl_mapped_sges;
344 unsigned int rl_connect_cookie; 345 unsigned int rl_connect_cookie;
345 struct rpcrdma_buffer *rl_buffer; 346 struct rpcrdma_buffer *rl_buffer;
346 struct rpcrdma_rep *rl_reply; 347 struct rpcrdma_rep *rl_reply;
347 struct xdr_stream rl_stream; 348 struct xdr_stream rl_stream;
348 struct xdr_buf rl_hdrbuf; 349 struct xdr_buf rl_hdrbuf;
349 struct ib_send_wr rl_send_wr; 350 struct rpcrdma_sendctx *rl_sendctx;
350 struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
351 struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ 351 struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
352 struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ 352 struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
353 struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ 353 struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
354 354
355 struct ib_cqe rl_cqe;
356 struct list_head rl_all; 355 struct list_head rl_all;
357 bool rl_backchannel; 356 unsigned long rl_flags;
358 357
359 struct list_head rl_registered; /* registered segments */ 358 struct list_head rl_registered; /* registered segments */
360 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; 359 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
361}; 360};
362 361
362/* rl_flags */
363enum {
364 RPCRDMA_REQ_F_BACKCHANNEL = 0,
365 RPCRDMA_REQ_F_PENDING,
366 RPCRDMA_REQ_F_TX_RESOURCES,
367};
368
363static inline void 369static inline void
364rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) 370rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
365{ 371{
@@ -399,12 +405,17 @@ struct rpcrdma_buffer {
399 struct list_head rb_mws; 405 struct list_head rb_mws;
400 struct list_head rb_all; 406 struct list_head rb_all;
401 407
408 unsigned long rb_sc_head;
409 unsigned long rb_sc_tail;
410 unsigned long rb_sc_last;
411 struct rpcrdma_sendctx **rb_sc_ctxs;
412
402 spinlock_t rb_lock; /* protect buf lists */ 413 spinlock_t rb_lock; /* protect buf lists */
403 int rb_send_count, rb_recv_count; 414 int rb_send_count, rb_recv_count;
404 struct list_head rb_send_bufs; 415 struct list_head rb_send_bufs;
405 struct list_head rb_recv_bufs; 416 struct list_head rb_recv_bufs;
406 u32 rb_max_requests; 417 u32 rb_max_requests;
407 atomic_t rb_credits; /* most recent credit grant */ 418 u32 rb_credits; /* most recent credit grant */
408 419
409 u32 rb_bc_srv_max_requests; 420 u32 rb_bc_srv_max_requests;
410 spinlock_t rb_reqslock; /* protect rb_allreqs */ 421 spinlock_t rb_reqslock; /* protect rb_allreqs */
@@ -453,10 +464,12 @@ struct rpcrdma_stats {
453 unsigned long mrs_recovered; 464 unsigned long mrs_recovered;
454 unsigned long mrs_orphaned; 465 unsigned long mrs_orphaned;
455 unsigned long mrs_allocated; 466 unsigned long mrs_allocated;
467 unsigned long empty_sendctx_q;
456 468
457 /* accessed when receiving a reply */ 469 /* accessed when receiving a reply */
458 unsigned long long total_rdma_reply; 470 unsigned long long total_rdma_reply;
459 unsigned long long fixup_copy_count; 471 unsigned long long fixup_copy_count;
472 unsigned long reply_waits_for_send;
460 unsigned long local_inv_needed; 473 unsigned long local_inv_needed;
461 unsigned long nomsg_call_count; 474 unsigned long nomsg_call_count;
462 unsigned long bcall_count; 475 unsigned long bcall_count;
@@ -473,8 +486,6 @@ struct rpcrdma_memreg_ops {
473 struct rpcrdma_mw **); 486 struct rpcrdma_mw **);
474 void (*ro_unmap_sync)(struct rpcrdma_xprt *, 487 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
475 struct list_head *); 488 struct list_head *);
476 void (*ro_unmap_safe)(struct rpcrdma_xprt *,
477 struct rpcrdma_req *, bool);
478 void (*ro_recover_mr)(struct rpcrdma_mw *); 489 void (*ro_recover_mr)(struct rpcrdma_mw *);
479 int (*ro_open)(struct rpcrdma_ia *, 490 int (*ro_open)(struct rpcrdma_ia *,
480 struct rpcrdma_ep *, 491 struct rpcrdma_ep *,
@@ -532,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
532bool frwr_is_supported(struct rpcrdma_ia *); 543bool frwr_is_supported(struct rpcrdma_ia *);
533bool fmr_is_supported(struct rpcrdma_ia *); 544bool fmr_is_supported(struct rpcrdma_ia *);
534 545
546extern struct workqueue_struct *rpcrdma_receive_wq;
547
535/* 548/*
536 * Endpoint calls - xprtrdma/verbs.c 549 * Endpoint calls - xprtrdma/verbs.c
537 */ 550 */
@@ -554,6 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
554void rpcrdma_destroy_req(struct rpcrdma_req *); 567void rpcrdma_destroy_req(struct rpcrdma_req *);
555int rpcrdma_buffer_create(struct rpcrdma_xprt *); 568int rpcrdma_buffer_create(struct rpcrdma_xprt *);
556void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 569void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
570struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
571void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
557 572
558struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); 573struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
559void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); 574void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
@@ -610,12 +625,18 @@ enum rpcrdma_chunktype {
610 rpcrdma_replych 625 rpcrdma_replych
611}; 626};
612 627
613bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, 628int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
614 u32, struct xdr_buf *, enum rpcrdma_chunktype); 629 struct rpcrdma_req *req, u32 hdrlen,
615void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); 630 struct xdr_buf *xdr,
631 enum rpcrdma_chunktype rtype);
632void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
616int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); 633int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
617void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 634void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
618void rpcrdma_reply_handler(struct work_struct *work); 635void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
636void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
637void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
638 struct rpcrdma_req *req);
639void rpcrdma_deferred_completion(struct work_struct *work);
619 640
620static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) 641static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
621{ 642{
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4dad5da388d6..9cc850c2719e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -552,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task)
552 default: 552 default:
553 dprintk("RPC: sendmsg returned unrecognized error %d\n", 553 dprintk("RPC: sendmsg returned unrecognized error %d\n",
554 -status); 554 -status);
555 /* fall through */
555 case -EPIPE: 556 case -EPIPE:
556 xs_close(xprt); 557 xs_close(xprt);
557 status = -ENOTCONN; 558 status = -ENOTCONN;
@@ -1611,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk)
1611 xprt->connect_cookie++; 1612 xprt->connect_cookie++;
1612 clear_bit(XPRT_CONNECTED, &xprt->state); 1613 clear_bit(XPRT_CONNECTED, &xprt->state);
1613 xs_tcp_force_close(xprt); 1614 xs_tcp_force_close(xprt);
1615 /* fall through */
1614 case TCP_CLOSING: 1616 case TCP_CLOSING:
1615 /* 1617 /*
1616 * If the server closed down the connection, make sure that 1618 * If the server closed down the connection, make sure that
@@ -2368,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2368 switch (ret) { 2370 switch (ret) {
2369 case 0: 2371 case 0:
2370 xs_set_srcport(transport, sock); 2372 xs_set_srcport(transport, sock);
2373 /* fall through */
2371 case -EINPROGRESS: 2374 case -EINPROGRESS:
2372 /* SYN_SENT! */ 2375 /* SYN_SENT! */
2373 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 2376 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2419,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2419 default: 2422 default:
2420 printk("%s: connect returned unhandled error %d\n", 2423 printk("%s: connect returned unhandled error %d\n",
2421 __func__, status); 2424 __func__, status);
2425 /* fall through */
2422 case -EADDRNOTAVAIL: 2426 case -EADDRNOTAVAIL:
2423 /* We're probably in TIME_WAIT. Get rid of existing socket, 2427 /* We're probably in TIME_WAIT. Get rid of existing socket,
2424 * and retry 2428 * and retry
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 0531b41d1f2d..74b9d916a58b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
345 return sizeof(struct switchdev_obj_port_vlan); 345 return sizeof(struct switchdev_obj_port_vlan);
346 case SWITCHDEV_OBJ_ID_PORT_MDB: 346 case SWITCHDEV_OBJ_ID_PORT_MDB:
347 return sizeof(struct switchdev_obj_port_mdb); 347 return sizeof(struct switchdev_obj_port_mdb);
348 case SWITCHDEV_OBJ_ID_HOST_MDB:
349 return sizeof(struct switchdev_obj_port_mdb);
348 default: 350 default:
349 BUG(); 351 BUG();
350 } 352 }
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 2bfaa9d4b403..37bb0bfbd936 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \
9 core.o link.o discover.o msg.o \ 9 core.o link.o discover.o msg.o \
10 name_distr.o subscr.o monitor.o name_table.o net.o \ 10 name_distr.o subscr.o monitor.o name_table.o net.o \
11 netlink.o netlink_compat.o node.o socket.o eth_media.o \ 11 netlink.o netlink_compat.o node.o socket.o eth_media.o \
12 server.o socket.o 12 server.o socket.o group.o
13 13
14tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o 14tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
15tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o 15tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a140dd4a84af..329325bd553e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -258,20 +258,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
258static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, 258static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
259 struct tipc_nlist *dests, u16 *cong_link_cnt) 259 struct tipc_nlist *dests, u16 *cong_link_cnt)
260{ 260{
261 struct tipc_dest *dst, *tmp;
261 struct sk_buff_head _pkts; 262 struct sk_buff_head _pkts;
262 struct u32_item *n, *tmp; 263 u32 dnode, selector;
263 u32 dst, selector;
264 264
265 selector = msg_link_selector(buf_msg(skb_peek(pkts))); 265 selector = msg_link_selector(buf_msg(skb_peek(pkts)));
266 skb_queue_head_init(&_pkts); 266 skb_queue_head_init(&_pkts);
267 267
268 list_for_each_entry_safe(n, tmp, &dests->list, list) { 268 list_for_each_entry_safe(dst, tmp, &dests->list, list) {
269 dst = n->value; 269 dnode = dst->node;
270 if (!tipc_msg_pskb_copy(dst, pkts, &_pkts)) 270 if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts))
271 return -ENOMEM; 271 return -ENOMEM;
272 272
273 /* Any other return value than -ELINKCONG is ignored */ 273 /* Any other return value than -ELINKCONG is ignored */
274 if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG) 274 if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG)
275 (*cong_link_cnt)++; 275 (*cong_link_cnt)++;
276 } 276 }
277 return 0; 277 return 0;
@@ -554,7 +554,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node)
554{ 554{
555 if (node == nl->self) 555 if (node == nl->self)
556 nl->local = true; 556 nl->local = true;
557 else if (u32_push(&nl->list, node)) 557 else if (tipc_dest_push(&nl->list, node, 0))
558 nl->remote++; 558 nl->remote++;
559} 559}
560 560
@@ -562,13 +562,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node)
562{ 562{
563 if (node == nl->self) 563 if (node == nl->self)
564 nl->local = false; 564 nl->local = false;
565 else if (u32_del(&nl->list, node)) 565 else if (tipc_dest_del(&nl->list, node, 0))
566 nl->remote--; 566 nl->remote--;
567} 567}
568 568
569void tipc_nlist_purge(struct tipc_nlist *nl) 569void tipc_nlist_purge(struct tipc_nlist *nl)
570{ 570{
571 u32_list_purge(&nl->list); 571 tipc_dest_list_purge(&nl->list);
572 nl->remote = 0; 572 nl->remote = 0;
573 nl->local = 0; 573 nl->local = 0;
574} 574}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5cc5398be722..964342689f2c 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -132,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
132 return &tipc_net(net)->node_list; 132 return &tipc_net(net)->node_list;
133} 133}
134 134
135static inline struct tipc_server *tipc_topsrv(struct net *net)
136{
137 return tipc_net(net)->topsrv;
138}
139
135static inline unsigned int tipc_hashfn(u32 addr) 140static inline unsigned int tipc_hashfn(u32 addr)
136{ 141{
137 return addr & (NODE_HTABLE_SIZE - 1); 142 return addr & (NODE_HTABLE_SIZE - 1);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 02462d67d191..92e4828c6b09 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -224,9 +224,9 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
224 * 224 *
225 * Called whenever a link setup request timer associated with a bearer expires. 225 * Called whenever a link setup request timer associated with a bearer expires.
226 */ 226 */
227static void disc_timeout(unsigned long data) 227static void disc_timeout(struct timer_list *t)
228{ 228{
229 struct tipc_link_req *req = (struct tipc_link_req *)data; 229 struct tipc_link_req *req = from_timer(req, t, timer);
230 struct sk_buff *skb; 230 struct sk_buff *skb;
231 int max_delay; 231 int max_delay;
232 232
@@ -292,7 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
292 req->num_nodes = 0; 292 req->num_nodes = 0;
293 req->timer_intv = TIPC_LINK_REQ_INIT; 293 req->timer_intv = TIPC_LINK_REQ_INIT;
294 spin_lock_init(&req->lock); 294 spin_lock_init(&req->lock);
295 setup_timer(&req->timer, disc_timeout, (unsigned long)req); 295 timer_setup(&req->timer, disc_timeout, 0);
296 mod_timer(&req->timer, jiffies + req->timer_intv); 296 mod_timer(&req->timer, jiffies + req->timer_intv);
297 b->link_req = req; 297 b->link_req = req;
298 *skb = skb_clone(req->buf, GFP_ATOMIC); 298 *skb = skb_clone(req->buf, GFP_ATOMIC);
diff --git a/net/tipc/group.c b/net/tipc/group.c
new file mode 100644
index 000000000000..12777cac638a
--- /dev/null
+++ b/net/tipc/group.c
@@ -0,0 +1,871 @@
1/*
2 * net/tipc/group.c: TIPC group messaging code
3 *
4 * Copyright (c) 2017, Ericsson AB
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "core.h"
37#include "addr.h"
38#include "group.h"
39#include "bcast.h"
40#include "server.h"
41#include "msg.h"
42#include "socket.h"
43#include "node.h"
44#include "name_table.h"
45#include "subscr.h"
46
47#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
48#define ADV_IDLE ADV_UNIT
49#define ADV_ACTIVE (ADV_UNIT * 12)
50
51enum mbr_state {
52 MBR_QUARANTINED,
53 MBR_DISCOVERED,
54 MBR_JOINING,
55 MBR_PUBLISHED,
56 MBR_JOINED,
57 MBR_PENDING,
58 MBR_ACTIVE,
59 MBR_RECLAIMING,
60 MBR_REMITTED,
61 MBR_LEAVING
62};
63
64struct tipc_member {
65 struct rb_node tree_node;
66 struct list_head list;
67 struct list_head congested;
68 struct sk_buff *event_msg;
69 struct sk_buff_head deferredq;
70 struct tipc_group *group;
71 u32 node;
72 u32 port;
73 u32 instance;
74 enum mbr_state state;
75 u16 advertised;
76 u16 window;
77 u16 bc_rcv_nxt;
78 u16 bc_syncpt;
79 u16 bc_acked;
80 bool usr_pending;
81};
82
83struct tipc_group {
84 struct rb_root members;
85 struct list_head congested;
86 struct list_head pending;
87 struct list_head active;
88 struct list_head reclaiming;
89 struct tipc_nlist dests;
90 struct net *net;
91 int subid;
92 u32 type;
93 u32 instance;
94 u32 domain;
95 u32 scope;
96 u32 portid;
97 u16 member_cnt;
98 u16 active_cnt;
99 u16 max_active;
100 u16 bc_snd_nxt;
101 u16 bc_ackers;
102 bool loopback;
103 bool events;
104};
105
106static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
107 int mtyp, struct sk_buff_head *xmitq);
108
109static void tipc_group_decr_active(struct tipc_group *grp,
110 struct tipc_member *m)
111{
112 if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
113 grp->active_cnt--;
114}
115
116static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
117{
118 int max_active, active_pool, idle_pool;
119 int mcnt = grp->member_cnt + 1;
120
121 /* Limit simultaneous reception from other members */
122 max_active = min(mcnt / 8, 64);
123 max_active = max(max_active, 16);
124 grp->max_active = max_active;
125
126 /* Reserve blocks for active and idle members */
127 active_pool = max_active * ADV_ACTIVE;
128 idle_pool = (mcnt - max_active) * ADV_IDLE;
129
130 /* Scale to bytes, considering worst-case truesize/msgsize ratio */
131 return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
132}
133
134u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
135{
136 return grp->bc_snd_nxt;
137}
138
139static bool tipc_group_is_enabled(struct tipc_member *m)
140{
141 return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
142}
143
144static bool tipc_group_is_receiver(struct tipc_member *m)
145{
146 return m && m->state >= MBR_JOINED;
147}
148
149u32 tipc_group_exclude(struct tipc_group *grp)
150{
151 if (!grp->loopback)
152 return grp->portid;
153 return 0;
154}
155
156int tipc_group_size(struct tipc_group *grp)
157{
158 return grp->member_cnt;
159}
160
161struct tipc_group *tipc_group_create(struct net *net, u32 portid,
162 struct tipc_group_req *mreq)
163{
164 struct tipc_group *grp;
165 u32 type = mreq->type;
166
167 grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
168 if (!grp)
169 return NULL;
170 tipc_nlist_init(&grp->dests, tipc_own_addr(net));
171 INIT_LIST_HEAD(&grp->congested);
172 INIT_LIST_HEAD(&grp->active);
173 INIT_LIST_HEAD(&grp->pending);
174 INIT_LIST_HEAD(&grp->reclaiming);
175 grp->members = RB_ROOT;
176 grp->net = net;
177 grp->portid = portid;
178 grp->domain = addr_domain(net, mreq->scope);
179 grp->type = type;
180 grp->instance = mreq->instance;
181 grp->scope = mreq->scope;
182 grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
183 grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
184 if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
185 return grp;
186 kfree(grp);
187 return NULL;
188}
189
190void tipc_group_delete(struct net *net, struct tipc_group *grp)
191{
192 struct rb_root *tree = &grp->members;
193 struct tipc_member *m, *tmp;
194 struct sk_buff_head xmitq;
195
196 __skb_queue_head_init(&xmitq);
197
198 rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
199 tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
200 list_del(&m->list);
201 kfree(m);
202 }
203 tipc_node_distr_xmit(net, &xmitq);
204 tipc_nlist_purge(&grp->dests);
205 tipc_topsrv_kern_unsubscr(net, grp->subid);
206 kfree(grp);
207}
208
209struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
210 u32 node, u32 port)
211{
212 struct rb_node *n = grp->members.rb_node;
213 u64 nkey, key = (u64)node << 32 | port;
214 struct tipc_member *m;
215
216 while (n) {
217 m = container_of(n, struct tipc_member, tree_node);
218 nkey = (u64)m->node << 32 | m->port;
219 if (key < nkey)
220 n = n->rb_left;
221 else if (key > nkey)
222 n = n->rb_right;
223 else
224 return m;
225 }
226 return NULL;
227}
228
229static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
230 u32 node, u32 port)
231{
232 struct tipc_member *m;
233
234 m = tipc_group_find_member(grp, node, port);
235 if (m && tipc_group_is_enabled(m))
236 return m;
237 return NULL;
238}
239
240static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
241 u32 node)
242{
243 struct tipc_member *m;
244 struct rb_node *n;
245
246 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
247 m = container_of(n, struct tipc_member, tree_node);
248 if (m->node == node)
249 return m;
250 }
251 return NULL;
252}
253
254static void tipc_group_add_to_tree(struct tipc_group *grp,
255 struct tipc_member *m)
256{
257 u64 nkey, key = (u64)m->node << 32 | m->port;
258 struct rb_node **n, *parent = NULL;
259 struct tipc_member *tmp;
260
261 n = &grp->members.rb_node;
262 while (*n) {
263 tmp = container_of(*n, struct tipc_member, tree_node);
264 parent = *n;
265 tmp = container_of(parent, struct tipc_member, tree_node);
266 nkey = (u64)tmp->node << 32 | tmp->port;
267 if (key < nkey)
268 n = &(*n)->rb_left;
269 else if (key > nkey)
270 n = &(*n)->rb_right;
271 else
272 return;
273 }
274 rb_link_node(&m->tree_node, parent, n);
275 rb_insert_color(&m->tree_node, &grp->members);
276}
277
278static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
279 u32 node, u32 port,
280 int state)
281{
282 struct tipc_member *m;
283
284 m = kzalloc(sizeof(*m), GFP_ATOMIC);
285 if (!m)
286 return NULL;
287 INIT_LIST_HEAD(&m->list);
288 INIT_LIST_HEAD(&m->congested);
289 __skb_queue_head_init(&m->deferredq);
290 m->group = grp;
291 m->node = node;
292 m->port = port;
293 m->bc_acked = grp->bc_snd_nxt - 1;
294 grp->member_cnt++;
295 tipc_group_add_to_tree(grp, m);
296 tipc_nlist_add(&grp->dests, m->node);
297 m->state = state;
298 return m;
299}
300
301void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
302{
303 tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
304}
305
306static void tipc_group_delete_member(struct tipc_group *grp,
307 struct tipc_member *m)
308{
309 rb_erase(&m->tree_node, &grp->members);
310 grp->member_cnt--;
311
312 /* Check if we were waiting for replicast ack from this member */
313 if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
314 grp->bc_ackers--;
315
316 list_del_init(&m->list);
317 list_del_init(&m->congested);
318 tipc_group_decr_active(grp, m);
319
320 /* If last member on a node, remove node from dest list */
321 if (!tipc_group_find_node(grp, m->node))
322 tipc_nlist_del(&grp->dests, m->node);
323
324 kfree(m);
325}
326
327struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
328{
329 return &grp->dests;
330}
331
332void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
333 int *scope)
334{
335 seq->type = grp->type;
336 seq->lower = grp->instance;
337 seq->upper = grp->instance;
338 *scope = grp->scope;
339}
340
341void tipc_group_update_member(struct tipc_member *m, int len)
342{
343 struct tipc_group *grp = m->group;
344 struct tipc_member *_m, *tmp;
345
346 if (!tipc_group_is_enabled(m))
347 return;
348
349 m->window -= len;
350
351 if (m->window >= ADV_IDLE)
352 return;
353
354 if (!list_empty(&m->congested))
355 return;
356
357 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
359 if (m->window > _m->window)
360 continue;
361 list_add_tail(&m->congested, &_m->congested);
362 return;
363 }
364 list_add_tail(&m->congested, &grp->congested);
365}
366
367void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
368{
369 u16 prev = grp->bc_snd_nxt - 1;
370 struct tipc_member *m;
371 struct rb_node *n;
372
373 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
374 m = container_of(n, struct tipc_member, tree_node);
375 if (tipc_group_is_enabled(m)) {
376 tipc_group_update_member(m, len);
377 m->bc_acked = prev;
378 }
379 }
380
381 /* Mark number of acknowledges to expect, if any */
382 if (ack)
383 grp->bc_ackers = grp->member_cnt;
384 grp->bc_snd_nxt++;
385}
386
387bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
388 int len, struct tipc_member **mbr)
389{
390 struct sk_buff_head xmitq;
391 struct tipc_member *m;
392 int adv, state;
393
394 m = tipc_group_find_dest(grp, dnode, dport);
395 *mbr = m;
396 if (!m)
397 return false;
398 if (m->usr_pending)
399 return true;
400 if (m->window >= len)
401 return false;
402 m->usr_pending = true;
403
404 /* If not fully advertised, do it now to prevent mutual blocking */
405 adv = m->advertised;
406 state = m->state;
407 if (state < MBR_JOINED)
408 return true;
409 if (state == MBR_JOINED && adv == ADV_IDLE)
410 return true;
411 if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
412 return true;
413 if (state == MBR_PENDING && adv == ADV_IDLE)
414 return true;
415 skb_queue_head_init(&xmitq);
416 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
417 tipc_node_distr_xmit(grp->net, &xmitq);
418 return true;
419}
420
421bool tipc_group_bc_cong(struct tipc_group *grp, int len)
422{
423 struct tipc_member *m = NULL;
424
425 /* If prev bcast was replicast, reject until all receivers have acked */
426 if (grp->bc_ackers)
427 return true;
428
429 if (list_empty(&grp->congested))
430 return false;
431
432 m = list_first_entry(&grp->congested, struct tipc_member, congested);
433 if (m->window >= len)
434 return false;
435
436 return tipc_group_cong(grp, m->node, m->port, len, &m);
437}
438
439/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
440 */
441static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
442{
443 struct tipc_msg *_hdr, *hdr = buf_msg(skb);
444 u16 bc_seqno = msg_grp_bc_seqno(hdr);
445 struct sk_buff *_skb, *tmp;
446 int mtyp = msg_type(hdr);
447
448 /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
449 if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
450 skb_queue_walk_safe(defq, _skb, tmp) {
451 _hdr = buf_msg(_skb);
452 if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
453 continue;
454 __skb_queue_before(defq, _skb, skb);
455 return;
456 }
457 /* Bcast was not bypassed, - add to tail */
458 }
459 /* Unicasts are never bypassed, - always add to tail */
460 __skb_queue_tail(defq, skb);
461}
462
463/* tipc_group_filter_msg() - determine if we should accept arriving message
464 */
465void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
466 struct sk_buff_head *xmitq)
467{
468 struct sk_buff *skb = __skb_dequeue(inputq);
469 bool ack, deliver, update, leave = false;
470 struct sk_buff_head *defq;
471 struct tipc_member *m;
472 struct tipc_msg *hdr;
473 u32 node, port;
474 int mtyp, blks;
475
476 if (!skb)
477 return;
478
479 hdr = buf_msg(skb);
480 node = msg_orignode(hdr);
481 port = msg_origport(hdr);
482
483 if (!msg_in_group(hdr))
484 goto drop;
485
486 m = tipc_group_find_member(grp, node, port);
487 if (!tipc_group_is_receiver(m))
488 goto drop;
489
490 if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
491 goto drop;
492
493 TIPC_SKB_CB(skb)->orig_member = m->instance;
494 defq = &m->deferredq;
495 tipc_group_sort_msg(skb, defq);
496
497 while ((skb = skb_peek(defq))) {
498 hdr = buf_msg(skb);
499 mtyp = msg_type(hdr);
500 deliver = true;
501 ack = false;
502 update = false;
503
504 if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
505 break;
506
507 /* Decide what to do with message */
508 switch (mtyp) {
509 case TIPC_GRP_MCAST_MSG:
510 if (msg_nameinst(hdr) != grp->instance) {
511 update = true;
512 deliver = false;
513 }
514 /* Fall thru */
515 case TIPC_GRP_BCAST_MSG:
516 m->bc_rcv_nxt++;
517 ack = msg_grp_bc_ack_req(hdr);
518 break;
519 case TIPC_GRP_UCAST_MSG:
520 break;
521 case TIPC_GRP_MEMBER_EVT:
522 if (m->state == MBR_LEAVING)
523 leave = true;
524 if (!grp->events)
525 deliver = false;
526 break;
527 default:
528 break;
529 }
530
531 /* Execute decisions */
532 __skb_dequeue(defq);
533 if (deliver)
534 __skb_queue_tail(inputq, skb);
535 else
536 kfree_skb(skb);
537
538 if (ack)
539 tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
540
541 if (leave) {
542 __skb_queue_purge(defq);
543 tipc_group_delete_member(grp, m);
544 break;
545 }
546 if (!update)
547 continue;
548
549 blks = msg_blocks(hdr);
550 tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
551 }
552 return;
553drop:
554 kfree_skb(skb);
555}
556
557void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
558 u32 port, struct sk_buff_head *xmitq)
559{
560 struct list_head *active = &grp->active;
561 int max_active = grp->max_active;
562 int reclaim_limit = max_active * 3 / 4;
563 int active_cnt = grp->active_cnt;
564 struct tipc_member *m, *rm;
565
566 m = tipc_group_find_member(grp, node, port);
567 if (!m)
568 return;
569
570 m->advertised -= blks;
571
572 switch (m->state) {
573 case MBR_JOINED:
574 /* Reclaim advertised space from least active member */
575 if (!list_empty(active) && active_cnt >= reclaim_limit) {
576 rm = list_first_entry(active, struct tipc_member, list);
577 rm->state = MBR_RECLAIMING;
578 list_move_tail(&rm->list, &grp->reclaiming);
579 tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
580 }
581 /* If max active, become pending and wait for reclaimed space */
582 if (active_cnt >= max_active) {
583 m->state = MBR_PENDING;
584 list_add_tail(&m->list, &grp->pending);
585 break;
586 }
587 /* Otherwise become active */
588 m->state = MBR_ACTIVE;
589 list_add_tail(&m->list, &grp->active);
590 grp->active_cnt++;
591 /* Fall through */
592 case MBR_ACTIVE:
593 if (!list_is_last(&m->list, &grp->active))
594 list_move_tail(&m->list, &grp->active);
595 if (m->advertised > (ADV_ACTIVE * 3 / 4))
596 break;
597 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
598 break;
599 case MBR_REMITTED:
600 if (m->advertised > ADV_IDLE)
601 break;
602 m->state = MBR_JOINED;
603 if (m->advertised < ADV_IDLE) {
604 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
605 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
606 }
607 break;
608 case MBR_RECLAIMING:
609 case MBR_DISCOVERED:
610 case MBR_JOINING:
611 case MBR_LEAVING:
612 default:
613 break;
614 }
615}
616
617static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
618 int mtyp, struct sk_buff_head *xmitq)
619{
620 struct tipc_msg *hdr;
621 struct sk_buff *skb;
622 int adv = 0;
623
624 skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
625 m->node, tipc_own_addr(grp->net),
626 m->port, grp->portid, 0);
627 if (!skb)
628 return;
629
630 if (m->state == MBR_ACTIVE)
631 adv = ADV_ACTIVE - m->advertised;
632 else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
633 adv = ADV_IDLE - m->advertised;
634
635 hdr = buf_msg(skb);
636
637 if (mtyp == GRP_JOIN_MSG) {
638 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
639 msg_set_adv_win(hdr, adv);
640 m->advertised += adv;
641 } else if (mtyp == GRP_LEAVE_MSG) {
642 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
643 } else if (mtyp == GRP_ADV_MSG) {
644 msg_set_adv_win(hdr, adv);
645 m->advertised += adv;
646 } else if (mtyp == GRP_ACK_MSG) {
647 msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
648 } else if (mtyp == GRP_REMIT_MSG) {
649 msg_set_grp_remitted(hdr, m->window);
650 }
651 __skb_queue_tail(xmitq, skb);
652}
653
654void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
655 struct tipc_msg *hdr, struct sk_buff_head *inputq,
656 struct sk_buff_head *xmitq)
657{
658 u32 node = msg_orignode(hdr);
659 u32 port = msg_origport(hdr);
660 struct tipc_member *m, *pm;
661 struct tipc_msg *ehdr;
662 u16 remitted, in_flight;
663
664 if (!grp)
665 return;
666
667 m = tipc_group_find_member(grp, node, port);
668
669 switch (msg_type(hdr)) {
670 case GRP_JOIN_MSG:
671 if (!m)
672 m = tipc_group_create_member(grp, node, port,
673 MBR_QUARANTINED);
674 if (!m)
675 return;
676 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
677 m->bc_rcv_nxt = m->bc_syncpt;
678 m->window += msg_adv_win(hdr);
679
680 /* Wait until PUBLISH event is received */
681 if (m->state == MBR_DISCOVERED) {
682 m->state = MBR_JOINING;
683 } else if (m->state == MBR_PUBLISHED) {
684 m->state = MBR_JOINED;
685 *usr_wakeup = true;
686 m->usr_pending = false;
687 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
688 ehdr = buf_msg(m->event_msg);
689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
690 __skb_queue_tail(inputq, m->event_msg);
691 }
692 if (m->window < ADV_IDLE)
693 tipc_group_update_member(m, 0);
694 else
695 list_del_init(&m->congested);
696 return;
697 case GRP_LEAVE_MSG:
698 if (!m)
699 return;
700 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
701
702 /* Wait until WITHDRAW event is received */
703 if (m->state != MBR_LEAVING) {
704 tipc_group_decr_active(grp, m);
705 m->state = MBR_LEAVING;
706 return;
707 }
708 /* Otherwise deliver already received WITHDRAW event */
709 ehdr = buf_msg(m->event_msg);
710 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
711 __skb_queue_tail(inputq, m->event_msg);
712 *usr_wakeup = true;
713 list_del_init(&m->congested);
714 return;
715 case GRP_ADV_MSG:
716 if (!m)
717 return;
718 m->window += msg_adv_win(hdr);
719 *usr_wakeup = m->usr_pending;
720 m->usr_pending = false;
721 list_del_init(&m->congested);
722 return;
723 case GRP_ACK_MSG:
724 if (!m)
725 return;
726 m->bc_acked = msg_grp_bc_acked(hdr);
727 if (--grp->bc_ackers)
728 break;
729 *usr_wakeup = true;
730 m->usr_pending = false;
731 return;
732 case GRP_RECLAIM_MSG:
733 if (!m)
734 return;
735 *usr_wakeup = m->usr_pending;
736 m->usr_pending = false;
737 tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
738 m->window = ADV_IDLE;
739 return;
740 case GRP_REMIT_MSG:
741 if (!m || m->state != MBR_RECLAIMING)
742 return;
743
744 list_del_init(&m->list);
745 grp->active_cnt--;
746 remitted = msg_grp_remitted(hdr);
747
748 /* Messages preceding the REMIT still in receive queue */
749 if (m->advertised > remitted) {
750 m->state = MBR_REMITTED;
751 in_flight = m->advertised - remitted;
752 }
753 /* All messages preceding the REMIT have been read */
754 if (m->advertised <= remitted) {
755 m->state = MBR_JOINED;
756 in_flight = 0;
757 }
758 /* ..and the REMIT overtaken by more messages => re-advertise */
759 if (m->advertised < remitted)
760 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
761
762 m->advertised = ADV_IDLE + in_flight;
763
764 /* Set oldest pending member to active and advertise */
765 if (list_empty(&grp->pending))
766 return;
767 pm = list_first_entry(&grp->pending, struct tipc_member, list);
768 pm->state = MBR_ACTIVE;
769 list_move_tail(&pm->list, &grp->active);
770 grp->active_cnt++;
771 if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
772 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
773 return;
774 default:
775 pr_warn("Received unknown GROUP_PROTO message\n");
776 }
777}
778
779/* tipc_group_member_evt() - receive and handle a member up/down event
780 */
781void tipc_group_member_evt(struct tipc_group *grp,
782 bool *usr_wakeup,
783 int *sk_rcvbuf,
784 struct sk_buff *skb,
785 struct sk_buff_head *inputq,
786 struct sk_buff_head *xmitq)
787{
788 struct tipc_msg *hdr = buf_msg(skb);
789 struct tipc_event *evt = (void *)msg_data(hdr);
790 u32 instance = evt->found_lower;
791 u32 node = evt->port.node;
792 u32 port = evt->port.ref;
793 int event = evt->event;
794 struct tipc_member *m;
795 struct net *net;
796 bool node_up;
797 u32 self;
798
799 if (!grp)
800 goto drop;
801
802 net = grp->net;
803 self = tipc_own_addr(net);
804 if (!grp->loopback && node == self && port == grp->portid)
805 goto drop;
806
807 /* Convert message before delivery to user */
808 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
809 msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
810 msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
811 msg_set_origport(hdr, port);
812 msg_set_orignode(hdr, node);
813 msg_set_nametype(hdr, grp->type);
814 msg_set_grp_evt(hdr, event);
815
816 m = tipc_group_find_member(grp, node, port);
817
818 if (event == TIPC_PUBLISHED) {
819 if (!m)
820 m = tipc_group_create_member(grp, node, port,
821 MBR_DISCOVERED);
822 if (!m)
823 goto drop;
824
825 /* Hold back event if JOIN message not yet received */
826 if (m->state == MBR_DISCOVERED) {
827 m->event_msg = skb;
828 m->state = MBR_PUBLISHED;
829 } else {
830 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
831 __skb_queue_tail(inputq, skb);
832 m->state = MBR_JOINED;
833 *usr_wakeup = true;
834 m->usr_pending = false;
835 }
836 m->instance = instance;
837 TIPC_SKB_CB(skb)->orig_member = m->instance;
838 tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
839 if (m->window < ADV_IDLE)
840 tipc_group_update_member(m, 0);
841 else
842 list_del_init(&m->congested);
843 } else if (event == TIPC_WITHDRAWN) {
844 if (!m)
845 goto drop;
846
847 TIPC_SKB_CB(skb)->orig_member = m->instance;
848
849 *usr_wakeup = true;
850 m->usr_pending = false;
851 node_up = tipc_node_is_up(net, node);
852
853 /* Hold back event if more messages might be expected */
854 if (m->state != MBR_LEAVING && node_up) {
855 m->event_msg = skb;
856 tipc_group_decr_active(grp, m);
857 m->state = MBR_LEAVING;
858 } else {
859 if (node_up)
860 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
861 else
862 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
863 __skb_queue_tail(inputq, skb);
864 }
865 list_del_init(&m->congested);
866 }
867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
868 return;
869drop:
870 kfree_skb(skb);
871}
diff --git a/net/tipc/group.h b/net/tipc/group.h
new file mode 100644
index 000000000000..d525e1cd7de5
--- /dev/null
+++ b/net/tipc/group.h
@@ -0,0 +1,73 @@
1/*
2 * net/tipc/group.h: Include file for TIPC group unicast/multicast functions
3 *
4 * Copyright (c) 2017, Ericsson AB
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#ifndef _TIPC_GROUP_H
37#define _TIPC_GROUP_H
38
39#include "core.h"
40
41struct tipc_group;
42struct tipc_member;
43struct tipc_msg;
44
45struct tipc_group *tipc_group_create(struct net *net, u32 portid,
46 struct tipc_group_req *mreq);
47void tipc_group_delete(struct net *net, struct tipc_group *grp);
48void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
49struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
50void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
51 int *scope);
52u32 tipc_group_exclude(struct tipc_group *grp);
53void tipc_group_filter_msg(struct tipc_group *grp,
54 struct sk_buff_head *inputq,
55 struct sk_buff_head *xmitq);
56void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
57 int *sk_rcvbuf, struct sk_buff *skb,
58 struct sk_buff_head *inputq,
59 struct sk_buff_head *xmitq);
60void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
61 struct tipc_msg *hdr,
62 struct sk_buff_head *inputq,
63 struct sk_buff_head *xmitq);
64void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
65bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
66 int len, struct tipc_member **m);
67bool tipc_group_bc_cong(struct tipc_group *grp, int len);
68void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
69 u32 port, struct sk_buff_head *xmitq);
70u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
71void tipc_group_update_member(struct tipc_member *m, int len);
72int tipc_group_size(struct tipc_group *grp);
73#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ac0144f532aa..6bce0b1117bd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l)
239static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, 239static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
240 struct sk_buff_head *xmitq); 240 struct sk_buff_head *xmitq);
241static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, 241static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
242 u16 rcvgap, int tolerance, int priority, 242 bool probe_reply, u16 rcvgap,
243 int tolerance, int priority,
243 struct sk_buff_head *xmitq); 244 struct sk_buff_head *xmitq);
244static void link_print(struct tipc_link *l, const char *str); 245static void link_print(struct tipc_link *l, const char *str);
245static int tipc_link_build_nack_msg(struct tipc_link *l, 246static int tipc_link_build_nack_msg(struct tipc_link *l,
@@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
773 } 774 }
774 775
775 if (state || probe || setup) 776 if (state || probe || setup)
776 tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); 777 tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq);
777 778
778 return rc; 779 return rc;
779} 780}
@@ -1039,6 +1040,7 @@ int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
1039static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, 1040static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
1040 struct sk_buff_head *inputq) 1041 struct sk_buff_head *inputq)
1041{ 1042{
1043 struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq;
1042 struct tipc_msg *hdr = buf_msg(skb); 1044 struct tipc_msg *hdr = buf_msg(skb);
1043 1045
1044 switch (msg_user(hdr)) { 1046 switch (msg_user(hdr)) {
@@ -1046,13 +1048,16 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
1046 case TIPC_MEDIUM_IMPORTANCE: 1048 case TIPC_MEDIUM_IMPORTANCE:
1047 case TIPC_HIGH_IMPORTANCE: 1049 case TIPC_HIGH_IMPORTANCE:
1048 case TIPC_CRITICAL_IMPORTANCE: 1050 case TIPC_CRITICAL_IMPORTANCE:
1049 if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) { 1051 if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) {
1050 skb_queue_tail(l->bc_rcvlink->inputq, skb); 1052 skb_queue_tail(mc_inputq, skb);
1051 return true; 1053 return true;
1052 } 1054 }
1053 case CONN_MANAGER: 1055 case CONN_MANAGER:
1054 skb_queue_tail(inputq, skb); 1056 skb_queue_tail(inputq, skb);
1055 return true; 1057 return true;
1058 case GROUP_PROTOCOL:
1059 skb_queue_tail(mc_inputq, skb);
1060 return true;
1056 case NAME_DISTRIBUTOR: 1061 case NAME_DISTRIBUTOR:
1057 l->bc_rcvlink->state = LINK_ESTABLISHED; 1062 l->bc_rcvlink->state = LINK_ESTABLISHED;
1058 skb_queue_tail(l->namedq, skb); 1063 skb_queue_tail(l->namedq, skb);
@@ -1170,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
1170 /* Unicast ACK */ 1175 /* Unicast ACK */
1171 l->rcv_unacked = 0; 1176 l->rcv_unacked = 0;
1172 l->stats.sent_acks++; 1177 l->stats.sent_acks++;
1173 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); 1178 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
1174 return 0; 1179 return 0;
1175} 1180}
1176 1181
@@ -1184,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
1184 if (l->state == LINK_ESTABLISHING) 1189 if (l->state == LINK_ESTABLISHING)
1185 mtyp = ACTIVATE_MSG; 1190 mtyp = ACTIVATE_MSG;
1186 1191
1187 tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); 1192 tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq);
1188 1193
1189 /* Inform peer that this endpoint is going down if applicable */ 1194 /* Inform peer that this endpoint is going down if applicable */
1190 skb = skb_peek_tail(xmitq); 1195 skb = skb_peek_tail(xmitq);
@@ -1211,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
1211 } 1216 }
1212 1217
1213 if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) 1218 if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
1214 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); 1219 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
1215 return 0; 1220 return 0;
1216} 1221}
1217 1222
@@ -1285,7 +1290,8 @@ drop:
1285} 1290}
1286 1291
1287static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, 1292static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1288 u16 rcvgap, int tolerance, int priority, 1293 bool probe_reply, u16 rcvgap,
1294 int tolerance, int priority,
1289 struct sk_buff_head *xmitq) 1295 struct sk_buff_head *xmitq)
1290{ 1296{
1291 struct tipc_link *bcl = l->bc_rcvlink; 1297 struct tipc_link *bcl = l->bc_rcvlink;
@@ -1333,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1333 msg_set_seq_gap(hdr, rcvgap); 1339 msg_set_seq_gap(hdr, rcvgap);
1334 msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); 1340 msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
1335 msg_set_probe(hdr, probe); 1341 msg_set_probe(hdr, probe);
1342 msg_set_is_keepalive(hdr, probe || probe_reply);
1336 tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); 1343 tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
1337 msg_set_size(hdr, INT_H_SIZE + dlen); 1344 msg_set_size(hdr, INT_H_SIZE + dlen);
1338 skb_trim(skb, INT_H_SIZE + dlen); 1345 skb_trim(skb, INT_H_SIZE + dlen);
@@ -1438,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1438 u16 rcv_nxt = l->rcv_nxt; 1445 u16 rcv_nxt = l->rcv_nxt;
1439 u16 dlen = msg_data_sz(hdr); 1446 u16 dlen = msg_data_sz(hdr);
1440 int mtyp = msg_type(hdr); 1447 int mtyp = msg_type(hdr);
1448 bool reply = msg_probe(hdr);
1441 void *data; 1449 void *data;
1442 char *if_name; 1450 char *if_name;
1443 int rc = 0; 1451 int rc = 0;
@@ -1524,9 +1532,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1524 /* Send NACK if peer has sent pkts we haven't received yet */ 1532 /* Send NACK if peer has sent pkts we haven't received yet */
1525 if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) 1533 if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
1526 rcvgap = peers_snd_nxt - l->rcv_nxt; 1534 rcvgap = peers_snd_nxt - l->rcv_nxt;
1527 if (rcvgap || (msg_probe(hdr))) 1535 if (rcvgap || reply)
1528 tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, 1536 tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
1529 0, 0, xmitq); 1537 rcvgap, 0, 0, xmitq);
1530 tipc_link_release_pkts(l, ack); 1538 tipc_link_release_pkts(l, ack);
1531 1539
1532 /* If NACK, retransmit will now start at right position */ 1540 /* If NACK, retransmit will now start at right position */
@@ -2118,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
2118 struct sk_buff_head *xmitq) 2126 struct sk_buff_head *xmitq)
2119{ 2127{
2120 l->tolerance = tol; 2128 l->tolerance = tol;
2121 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq); 2129 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
2122} 2130}
2123 2131
2124void tipc_link_set_prio(struct tipc_link *l, u32 prio, 2132void tipc_link_set_prio(struct tipc_link *l, u32 prio,
2125 struct sk_buff_head *xmitq) 2133 struct sk_buff_head *xmitq)
2126{ 2134{
2127 l->priority = prio; 2135 l->priority = prio;
2128 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq); 2136 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq);
2129} 2137}
2130 2138
2131void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) 2139void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 9e109bb1a207..8e884ed06d4b 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -530,8 +530,11 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen,
530 u16 gen = mon->dom_gen; 530 u16 gen = mon->dom_gen;
531 u16 len; 531 u16 len;
532 532
533 if (!tipc_mon_is_active(net, mon)) 533 /* Send invalid record if not active */
534 if (!tipc_mon_is_active(net, mon)) {
535 dom->len = 0;
534 return; 536 return;
537 }
535 538
536 /* Send only a dummy record with ack if peer has acked our last sent */ 539 /* Send only a dummy record with ack if peer has acked our last sent */
537 if (likely(state->acked_gen == gen)) { 540 if (likely(state->acked_gen == gen)) {
@@ -559,6 +562,12 @@ void tipc_mon_get_state(struct net *net, u32 addr,
559 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 562 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
560 struct tipc_peer *peer; 563 struct tipc_peer *peer;
561 564
565 if (!tipc_mon_is_active(net, mon)) {
566 state->probing = false;
567 state->monitoring = true;
568 return;
569 }
570
562 /* Used cached state if table has not changed */ 571 /* Used cached state if table has not changed */
563 if (!state->probing && 572 if (!state->probing &&
564 (state->list_gen == mon->list_gen) && 573 (state->list_gen == mon->list_gen) &&
@@ -578,9 +587,9 @@ void tipc_mon_get_state(struct net *net, u32 addr,
578 read_unlock_bh(&mon->lock); 587 read_unlock_bh(&mon->lock);
579} 588}
580 589
581static void mon_timeout(unsigned long m) 590static void mon_timeout(struct timer_list *t)
582{ 591{
583 struct tipc_monitor *mon = (void *)m; 592 struct tipc_monitor *mon = from_timer(mon, t, timer);
584 struct tipc_peer *self; 593 struct tipc_peer *self;
585 int best_member_cnt = dom_size(mon->peer_cnt) - 1; 594 int best_member_cnt = dom_size(mon->peer_cnt) - 1;
586 595
@@ -623,7 +632,7 @@ int tipc_mon_create(struct net *net, int bearer_id)
623 self->is_up = true; 632 self->is_up = true;
624 self->is_head = true; 633 self->is_head = true;
625 INIT_LIST_HEAD(&self->list); 634 INIT_LIST_HEAD(&self->list);
626 setup_timer(&mon->timer, mon_timeout, (unsigned long)mon); 635 timer_setup(&mon->timer, mon_timeout, 0);
627 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); 636 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
628 mod_timer(&mon->timer, jiffies + mon->timer_intv); 637 mod_timer(&mon->timer, jiffies + mon->timer_intv);
629 return 0; 638 return 0;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 17146c16ee2d..b0d07b35909d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
174 174
175 if (fragid == LAST_FRAGMENT) { 175 if (fragid == LAST_FRAGMENT) {
176 TIPC_SKB_CB(head)->validated = false; 176 TIPC_SKB_CB(head)->validated = false;
177 if (unlikely(!tipc_msg_validate(head))) 177 if (unlikely(!tipc_msg_validate(&head)))
178 goto err; 178 goto err;
179 *buf = head; 179 *buf = head;
180 TIPC_SKB_CB(head)->tail = NULL; 180 TIPC_SKB_CB(head)->tail = NULL;
@@ -201,11 +201,21 @@ err:
201 * TIPC will ignore the excess, under the assumption that it is optional info 201 * TIPC will ignore the excess, under the assumption that it is optional info
202 * introduced by a later release of the protocol. 202 * introduced by a later release of the protocol.
203 */ 203 */
204bool tipc_msg_validate(struct sk_buff *skb) 204bool tipc_msg_validate(struct sk_buff **_skb)
205{ 205{
206 struct tipc_msg *msg; 206 struct sk_buff *skb = *_skb;
207 struct tipc_msg *hdr;
207 int msz, hsz; 208 int msz, hsz;
208 209
210 /* Ensure that flow control ratio condition is satisfied */
211 if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) {
212 skb = skb_copy(skb, GFP_ATOMIC);
213 if (!skb)
214 return false;
215 kfree_skb(*_skb);
216 *_skb = skb;
217 }
218
209 if (unlikely(TIPC_SKB_CB(skb)->validated)) 219 if (unlikely(TIPC_SKB_CB(skb)->validated))
210 return true; 220 return true;
211 if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) 221 if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
@@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb)
217 if (unlikely(!pskb_may_pull(skb, hsz))) 227 if (unlikely(!pskb_may_pull(skb, hsz)))
218 return false; 228 return false;
219 229
220 msg = buf_msg(skb); 230 hdr = buf_msg(skb);
221 if (unlikely(msg_version(msg) != TIPC_VERSION)) 231 if (unlikely(msg_version(hdr) != TIPC_VERSION))
222 return false; 232 return false;
223 233
224 msz = msg_size(msg); 234 msz = msg_size(hdr);
225 if (unlikely(msz < hsz)) 235 if (unlikely(msz < hsz))
226 return false; 236 return false;
227 if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) 237 if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
@@ -411,7 +421,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
411 skb_pull(*iskb, offset); 421 skb_pull(*iskb, offset);
412 imsz = msg_size(buf_msg(*iskb)); 422 imsz = msg_size(buf_msg(*iskb));
413 skb_trim(*iskb, imsz); 423 skb_trim(*iskb, imsz);
414 if (unlikely(!tipc_msg_validate(*iskb))) 424 if (unlikely(!tipc_msg_validate(iskb)))
415 goto none; 425 goto none;
416 *pos += align(imsz); 426 *pos += align(imsz);
417 return true; 427 return true;
@@ -666,3 +676,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
666 } 676 }
667 kfree_skb(skb); 677 kfree_skb(skb);
668} 678}
679
680void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
681 struct sk_buff_head *xmitq)
682{
683 if (tipc_msg_reverse(tipc_own_addr(net), &skb, err))
684 __skb_queue_tail(xmitq, skb);
685}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c843fd2bc48d..3e4384c222f7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/msg.h: Include file for TIPC message header routines 2 * net/tipc/msg.h: Include file for TIPC message header routines
3 * 3 *
4 * Copyright (c) 2000-2007, 2014-2015 Ericsson AB 4 * Copyright (c) 2000-2007, 2014-2017 Ericsson AB
5 * Copyright (c) 2005-2008, 2010-2011, Wind River Systems 5 * Copyright (c) 2005-2008, 2010-2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -61,10 +61,14 @@ struct plist;
61/* 61/*
62 * Payload message types 62 * Payload message types
63 */ 63 */
64#define TIPC_CONN_MSG 0 64#define TIPC_CONN_MSG 0
65#define TIPC_MCAST_MSG 1 65#define TIPC_MCAST_MSG 1
66#define TIPC_NAMED_MSG 2 66#define TIPC_NAMED_MSG 2
67#define TIPC_DIRECT_MSG 3 67#define TIPC_DIRECT_MSG 3
68#define TIPC_GRP_MEMBER_EVT 4
69#define TIPC_GRP_BCAST_MSG 5
70#define TIPC_GRP_MCAST_MSG 6
71#define TIPC_GRP_UCAST_MSG 7
68 72
69/* 73/*
70 * Internal message users 74 * Internal message users
@@ -73,11 +77,13 @@ struct plist;
73#define MSG_BUNDLER 6 77#define MSG_BUNDLER 6
74#define LINK_PROTOCOL 7 78#define LINK_PROTOCOL 7
75#define CONN_MANAGER 8 79#define CONN_MANAGER 8
80#define GROUP_PROTOCOL 9
76#define TUNNEL_PROTOCOL 10 81#define TUNNEL_PROTOCOL 10
77#define NAME_DISTRIBUTOR 11 82#define NAME_DISTRIBUTOR 11
78#define MSG_FRAGMENTER 12 83#define MSG_FRAGMENTER 12
79#define LINK_CONFIG 13 84#define LINK_CONFIG 13
80#define SOCK_WAKEUP 14 /* pseudo user */ 85#define SOCK_WAKEUP 14 /* pseudo user */
86#define TOP_SRV 15 /* pseudo user */
81 87
82/* 88/*
83 * Message header sizes 89 * Message header sizes
@@ -86,6 +92,7 @@ struct plist;
86#define BASIC_H_SIZE 32 /* Basic payload message */ 92#define BASIC_H_SIZE 32 /* Basic payload message */
87#define NAMED_H_SIZE 40 /* Named payload message */ 93#define NAMED_H_SIZE 40 /* Named payload message */
88#define MCAST_H_SIZE 44 /* Multicast payload message */ 94#define MCAST_H_SIZE 44 /* Multicast payload message */
95#define GROUP_H_SIZE 44 /* Group payload message */
89#define INT_H_SIZE 40 /* Internal messages */ 96#define INT_H_SIZE 40 /* Internal messages */
90#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ 97#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */
91#define MAX_H_SIZE 60 /* Largest possible TIPC header size */ 98#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
@@ -96,6 +103,7 @@ struct plist;
96 103
97struct tipc_skb_cb { 104struct tipc_skb_cb {
98 u32 bytes_read; 105 u32 bytes_read;
106 u32 orig_member;
99 struct sk_buff *tail; 107 struct sk_buff *tail;
100 bool validated; 108 bool validated;
101 u16 chain_imp; 109 u16 chain_imp;
@@ -188,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m)
188 return msg_bits(m, 0, 0, 0x1ffff); 196 return msg_bits(m, 0, 0, 0x1ffff);
189} 197}
190 198
199static inline u32 msg_blocks(struct tipc_msg *m)
200{
201 return (msg_size(m) / 1024) + 1;
202}
203
191static inline u32 msg_data_sz(struct tipc_msg *m) 204static inline u32 msg_data_sz(struct tipc_msg *m)
192{ 205{
193 return msg_size(m) - msg_hdr_sz(m); 206 return msg_size(m) - msg_hdr_sz(m);
@@ -213,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d)
213 msg_set_bits(m, 0, 19, 1, d); 226 msg_set_bits(m, 0, 19, 1, d);
214} 227}
215 228
229static inline int msg_is_keepalive(struct tipc_msg *m)
230{
231 return msg_bits(m, 0, 19, 1);
232}
233
234static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d)
235{
236 msg_set_bits(m, 0, 19, 1, d);
237}
238
216static inline int msg_src_droppable(struct tipc_msg *m) 239static inline int msg_src_droppable(struct tipc_msg *m)
217{ 240{
218 return msg_bits(m, 0, 18, 1); 241 return msg_bits(m, 0, 18, 1);
@@ -251,6 +274,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n)
251 msg_set_bits(m, 1, 29, 0x7, n); 274 msg_set_bits(m, 1, 29, 0x7, n);
252} 275}
253 276
277static inline int msg_in_group(struct tipc_msg *m)
278{
279 int mtyp = msg_type(m);
280
281 return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG;
282}
283
284static inline bool msg_is_grp_evt(struct tipc_msg *m)
285{
286 return msg_type(m) == TIPC_GRP_MEMBER_EVT;
287}
288
254static inline u32 msg_named(struct tipc_msg *m) 289static inline u32 msg_named(struct tipc_msg *m)
255{ 290{
256 return msg_type(m) == TIPC_NAMED_MSG; 291 return msg_type(m) == TIPC_NAMED_MSG;
@@ -258,7 +293,10 @@ static inline u32 msg_named(struct tipc_msg *m)
258 293
259static inline u32 msg_mcast(struct tipc_msg *m) 294static inline u32 msg_mcast(struct tipc_msg *m)
260{ 295{
261 return msg_type(m) == TIPC_MCAST_MSG; 296 int mtyp = msg_type(m);
297
298 return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) ||
299 (mtyp == TIPC_GRP_MCAST_MSG));
262} 300}
263 301
264static inline u32 msg_connected(struct tipc_msg *m) 302static inline u32 msg_connected(struct tipc_msg *m)
@@ -514,6 +552,16 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
514#define DSC_RESP_MSG 1 552#define DSC_RESP_MSG 1
515 553
516/* 554/*
555 * Group protocol message types
556 */
557#define GRP_JOIN_MSG 0
558#define GRP_LEAVE_MSG 1
559#define GRP_ADV_MSG 2
560#define GRP_ACK_MSG 3
561#define GRP_RECLAIM_MSG 4
562#define GRP_REMIT_MSG 5
563
564/*
517 * Word 1 565 * Word 1
518 */ 566 */
519static inline u32 msg_seq_gap(struct tipc_msg *m) 567static inline u32 msg_seq_gap(struct tipc_msg *m)
@@ -764,12 +812,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
764 msg_set_bits(m, 9, 16, 0xffff, n); 812 msg_set_bits(m, 9, 16, 0xffff, n);
765} 813}
766 814
767static inline u32 msg_adv_win(struct tipc_msg *m) 815static inline u16 msg_adv_win(struct tipc_msg *m)
768{ 816{
769 return msg_bits(m, 9, 0, 0xffff); 817 return msg_bits(m, 9, 0, 0xffff);
770} 818}
771 819
772static inline void msg_set_adv_win(struct tipc_msg *m, u32 n) 820static inline void msg_set_adv_win(struct tipc_msg *m, u16 n)
773{ 821{
774 msg_set_bits(m, 9, 0, 0xffff, n); 822 msg_set_bits(m, 9, 0, 0xffff, n);
775} 823}
@@ -794,6 +842,68 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
794 msg_set_bits(m, 9, 0, 0xffff, n); 842 msg_set_bits(m, 9, 0, 0xffff, n);
795} 843}
796 844
845static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m)
846{
847 return msg_bits(m, 9, 16, 0xffff);
848}
849
850static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n)
851{
852 msg_set_bits(m, 9, 16, 0xffff, n);
853}
854
855static inline u16 msg_grp_bc_acked(struct tipc_msg *m)
856{
857 return msg_bits(m, 9, 16, 0xffff);
858}
859
860static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
861{
862 msg_set_bits(m, 9, 16, 0xffff, n);
863}
864
865static inline u16 msg_grp_remitted(struct tipc_msg *m)
866{
867 return msg_bits(m, 9, 16, 0xffff);
868}
869
870static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n)
871{
872 msg_set_bits(m, 9, 16, 0xffff, n);
873}
874
875/* Word 10
876 */
877static inline u16 msg_grp_evt(struct tipc_msg *m)
878{
879 return msg_bits(m, 10, 0, 0x3);
880}
881
882static inline void msg_set_grp_evt(struct tipc_msg *m, int n)
883{
884 msg_set_bits(m, 10, 0, 0x3, n);
885}
886
887static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m)
888{
889 return msg_bits(m, 10, 0, 0x1);
890}
891
892static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n)
893{
894 msg_set_bits(m, 10, 0, 0x1, n);
895}
896
897static inline u16 msg_grp_bc_seqno(struct tipc_msg *m)
898{
899 return msg_bits(m, 10, 16, 0xffff);
900}
901
902static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n)
903{
904 msg_set_bits(m, 10, 16, 0xffff, n);
905}
906
797static inline bool msg_peer_link_is_up(struct tipc_msg *m) 907static inline bool msg_peer_link_is_up(struct tipc_msg *m)
798{ 908{
799 if (likely(msg_user(m) != LINK_PROTOCOL)) 909 if (likely(msg_user(m) != LINK_PROTOCOL))
@@ -816,8 +926,10 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
816} 926}
817 927
818struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); 928struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
819bool tipc_msg_validate(struct sk_buff *skb); 929bool tipc_msg_validate(struct sk_buff **_skb);
820bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); 930bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
931void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
932 struct sk_buff_head *xmitq);
821void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, 933void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
822 u32 hsize, u32 destnode); 934 u32 hsize, u32 destnode);
823struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, 935struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
@@ -842,6 +954,11 @@ static inline u16 buf_seqno(struct sk_buff *skb)
842 return msg_seqno(buf_msg(skb)); 954 return msg_seqno(buf_msg(skb));
843} 955}
844 956
957static inline int buf_roundup_len(struct sk_buff *skb)
958{
959 return (skb->len / 1024 + 1) * 1024;
960}
961
845/* tipc_skb_peek(): peek and reserve first buffer in list 962/* tipc_skb_peek(): peek and reserve first buffer in list
846 * @list: list to be peeked in 963 * @list: list to be peeked in
847 * Returns pointer to first buffer in list, if any 964 * Returns pointer to first buffer in list, if any
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bd0aac87b41a..b3829bcf63c7 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -43,6 +43,7 @@
43#include "bcast.h" 43#include "bcast.h"
44#include "addr.h" 44#include "addr.h"
45#include "node.h" 45#include "node.h"
46#include "group.h"
46#include <net/genetlink.h> 47#include <net/genetlink.h>
47 48
48#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ 49#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
@@ -596,18 +597,47 @@ not_found:
596 return ref; 597 return ref;
597} 598}
598 599
599/** 600bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
600 * tipc_nametbl_mc_translate - find multicast destinations 601 struct list_head *dsts, int *dstcnt, u32 exclude,
601 * 602 bool all)
602 * Creates list of all local ports that overlap the given multicast address; 603{
603 * also determines if any off-node ports overlap. 604 u32 self = tipc_own_addr(net);
604 * 605 struct publication *publ;
605 * Note: Publications with a scope narrower than 'limit' are ignored. 606 struct name_info *info;
606 * (i.e. local node-scope publications mustn't receive messages arriving 607 struct name_seq *seq;
607 * from another node, even if the multcast link brought it here) 608 struct sub_seq *sseq;
608 * 609
609 * Returns non-zero if any off-node ports overlap 610 if (!tipc_in_scope(domain, self))
610 */ 611 return false;
612
613 *dstcnt = 0;
614 rcu_read_lock();
615 seq = nametbl_find_seq(net, type);
616 if (unlikely(!seq))
617 goto exit;
618 spin_lock_bh(&seq->lock);
619 sseq = nameseq_find_subseq(seq, instance);
620 if (likely(sseq)) {
621 info = sseq->info;
622 list_for_each_entry(publ, &info->zone_list, zone_list) {
623 if (!tipc_in_scope(domain, publ->node))
624 continue;
625 if (publ->ref == exclude && publ->node == self)
626 continue;
627 tipc_dest_push(dsts, publ->node, publ->ref);
628 (*dstcnt)++;
629 if (all)
630 continue;
631 list_move_tail(&publ->zone_list, &info->zone_list);
632 break;
633 }
634 }
635 spin_unlock_bh(&seq->lock);
636exit:
637 rcu_read_unlock();
638 return !list_empty(dsts);
639}
640
611int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, 641int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
612 u32 limit, struct list_head *dports) 642 u32 limit, struct list_head *dports)
613{ 643{
@@ -634,7 +664,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
634 info = sseq->info; 664 info = sseq->info;
635 list_for_each_entry(publ, &info->node_list, node_list) { 665 list_for_each_entry(publ, &info->node_list, node_list) {
636 if (publ->scope <= limit) 666 if (publ->scope <= limit)
637 u32_push(dports, publ->ref); 667 tipc_dest_push(dports, 0, publ->ref);
638 } 668 }
639 669
640 if (info->cluster_list_size != info->node_list_size) 670 if (info->cluster_list_size != info->node_list_size)
@@ -667,7 +697,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
667 spin_lock_bh(&seq->lock); 697 spin_lock_bh(&seq->lock);
668 sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); 698 sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
669 stop = seq->sseqs + seq->first_free; 699 stop = seq->sseqs + seq->first_free;
670 for (; sseq->lower <= upper && sseq != stop; sseq++) { 700 for (; sseq != stop && sseq->lower <= upper; sseq++) {
671 info = sseq->info; 701 info = sseq->info;
672 list_for_each_entry(publ, &info->zone_list, zone_list) { 702 list_for_each_entry(publ, &info->zone_list, zone_list) {
673 if (tipc_in_scope(domain, publ->node)) 703 if (tipc_in_scope(domain, publ->node))
@@ -679,6 +709,37 @@ exit:
679 rcu_read_unlock(); 709 rcu_read_unlock();
680} 710}
681 711
712/* tipc_nametbl_build_group - build list of communication group members
713 */
714void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
715 u32 type, u32 domain)
716{
717 struct sub_seq *sseq, *stop;
718 struct name_info *info;
719 struct publication *p;
720 struct name_seq *seq;
721
722 rcu_read_lock();
723 seq = nametbl_find_seq(net, type);
724 if (!seq)
725 goto exit;
726
727 spin_lock_bh(&seq->lock);
728 sseq = seq->sseqs;
729 stop = seq->sseqs + seq->first_free;
730 for (; sseq != stop; sseq++) {
731 info = sseq->info;
732 list_for_each_entry(p, &info->zone_list, zone_list) {
733 if (!tipc_in_scope(domain, p->node))
734 continue;
735 tipc_group_add_member(grp, p->node, p->ref);
736 }
737 }
738 spin_unlock_bh(&seq->lock);
739exit:
740 rcu_read_unlock();
741}
742
682/* 743/*
683 * tipc_nametbl_publish - add name publication to network name tables 744 * tipc_nametbl_publish - add name publication to network name tables
684 */ 745 */
@@ -1057,78 +1118,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
1057 return skb->len; 1118 return skb->len;
1058} 1119}
1059 1120
1060bool u32_find(struct list_head *l, u32 value) 1121struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
1061{ 1122{
1062 struct u32_item *item; 1123 u64 value = (u64)node << 32 | port;
1124 struct tipc_dest *dst;
1063 1125
1064 list_for_each_entry(item, l, list) { 1126 list_for_each_entry(dst, l, list) {
1065 if (item->value == value) 1127 if (dst->value != value)
1066 return true; 1128 continue;
1129 return dst;
1067 } 1130 }
1068 return false; 1131 return NULL;
1069} 1132}
1070 1133
1071bool u32_push(struct list_head *l, u32 value) 1134bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
1072{ 1135{
1073 struct u32_item *item; 1136 u64 value = (u64)node << 32 | port;
1137 struct tipc_dest *dst;
1074 1138
1075 list_for_each_entry(item, l, list) { 1139 if (tipc_dest_find(l, node, port))
1076 if (item->value == value)
1077 return false;
1078 }
1079 item = kmalloc(sizeof(*item), GFP_ATOMIC);
1080 if (unlikely(!item))
1081 return false; 1140 return false;
1082 1141
1083 item->value = value; 1142 dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
1084 list_add(&item->list, l); 1143 if (unlikely(!dst))
1144 return false;
1145 dst->value = value;
1146 list_add(&dst->list, l);
1085 return true; 1147 return true;
1086} 1148}
1087 1149
1088u32 u32_pop(struct list_head *l) 1150bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port)
1089{ 1151{
1090 struct u32_item *item; 1152 struct tipc_dest *dst;
1091 u32 value = 0;
1092 1153
1093 if (list_empty(l)) 1154 if (list_empty(l))
1094 return 0; 1155 return false;
1095 item = list_first_entry(l, typeof(*item), list); 1156 dst = list_first_entry(l, typeof(*dst), list);
1096 value = item->value; 1157 if (port)
1097 list_del(&item->list); 1158 *port = dst->port;
1098 kfree(item); 1159 if (node)
1099 return value; 1160 *node = dst->node;
1161 list_del(&dst->list);
1162 kfree(dst);
1163 return true;
1100} 1164}
1101 1165
1102bool u32_del(struct list_head *l, u32 value) 1166bool tipc_dest_del(struct list_head *l, u32 node, u32 port)
1103{ 1167{
1104 struct u32_item *item, *tmp; 1168 struct tipc_dest *dst;
1105 1169
1106 list_for_each_entry_safe(item, tmp, l, list) { 1170 dst = tipc_dest_find(l, node, port);
1107 if (item->value != value) 1171 if (!dst)
1108 continue; 1172 return false;
1109 list_del(&item->list); 1173 list_del(&dst->list);
1110 kfree(item); 1174 kfree(dst);
1111 return true; 1175 return true;
1112 }
1113 return false;
1114} 1176}
1115 1177
1116void u32_list_purge(struct list_head *l) 1178void tipc_dest_list_purge(struct list_head *l)
1117{ 1179{
1118 struct u32_item *item, *tmp; 1180 struct tipc_dest *dst, *tmp;
1119 1181
1120 list_for_each_entry_safe(item, tmp, l, list) { 1182 list_for_each_entry_safe(dst, tmp, l, list) {
1121 list_del(&item->list); 1183 list_del(&dst->list);
1122 kfree(item); 1184 kfree(dst);
1123 } 1185 }
1124} 1186}
1125 1187
1126int u32_list_len(struct list_head *l) 1188int tipc_dest_list_len(struct list_head *l)
1127{ 1189{
1128 struct u32_item *item; 1190 struct tipc_dest *dst;
1129 int i = 0; 1191 int i = 0;
1130 1192
1131 list_for_each_entry(item, l, list) { 1193 list_for_each_entry(dst, l, list) {
1132 i++; 1194 i++;
1133 } 1195 }
1134 return i; 1196 return i;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 6ebdeb1d84a5..71926e429446 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -40,6 +40,7 @@
40struct tipc_subscription; 40struct tipc_subscription;
41struct tipc_plist; 41struct tipc_plist;
42struct tipc_nlist; 42struct tipc_nlist;
43struct tipc_group;
43 44
44/* 45/*
45 * TIPC name types reserved for internal TIPC use (both current and planned) 46 * TIPC name types reserved for internal TIPC use (both current and planned)
@@ -101,9 +102,14 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
101u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); 102u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
102int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, 103int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
103 u32 limit, struct list_head *dports); 104 u32 limit, struct list_head *dports);
105void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
106 u32 type, u32 domain);
104void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, 107void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
105 u32 upper, u32 domain, 108 u32 upper, u32 domain,
106 struct tipc_nlist *nodes); 109 struct tipc_nlist *nodes);
110bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
111 struct list_head *dsts, int *dstcnt, u32 exclude,
112 bool all);
107struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, 113struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
108 u32 upper, u32 scope, u32 port_ref, 114 u32 upper, u32 scope, u32 port_ref,
109 u32 key); 115 u32 key);
@@ -120,16 +126,22 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
120int tipc_nametbl_init(struct net *net); 126int tipc_nametbl_init(struct net *net);
121void tipc_nametbl_stop(struct net *net); 127void tipc_nametbl_stop(struct net *net);
122 128
123struct u32_item { 129struct tipc_dest {
124 struct list_head list; 130 struct list_head list;
125 u32 value; 131 union {
132 struct {
133 u32 port;
134 u32 node;
135 };
136 u64 value;
137 };
126}; 138};
127 139
128bool u32_push(struct list_head *l, u32 value); 140struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
129u32 u32_pop(struct list_head *l); 141bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
130bool u32_find(struct list_head *l, u32 value); 142bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
131bool u32_del(struct list_head *l, u32 value); 143bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
132void u32_list_purge(struct list_head *l); 144void tipc_dest_list_purge(struct list_head *l);
133int u32_list_len(struct list_head *l); 145int tipc_dest_list_len(struct list_head *l);
134 146
135#endif 147#endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 198dbc7adbe1..507017fe0f1b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -153,11 +153,11 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
153 bool delete); 153 bool delete);
154static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); 154static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
155static void tipc_node_delete(struct tipc_node *node); 155static void tipc_node_delete(struct tipc_node *node);
156static void tipc_node_timeout(unsigned long data); 156static void tipc_node_timeout(struct timer_list *t);
157static void tipc_node_fsm_evt(struct tipc_node *n, int evt); 157static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
158static struct tipc_node *tipc_node_find(struct net *net, u32 addr); 158static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
159static void tipc_node_put(struct tipc_node *node); 159static void tipc_node_put(struct tipc_node *node);
160static bool tipc_node_is_up(struct tipc_node *n); 160static bool node_is_up(struct tipc_node *n);
161 161
162struct tipc_sock_conn { 162struct tipc_sock_conn {
163 u32 port; 163 u32 port;
@@ -361,7 +361,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
361 goto exit; 361 goto exit;
362 } 362 }
363 tipc_node_get(n); 363 tipc_node_get(n);
364 setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); 364 timer_setup(&n->timer, tipc_node_timeout, 0);
365 n->keepalive_intv = U32_MAX; 365 n->keepalive_intv = U32_MAX;
366 hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); 366 hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
367 list_for_each_entry_rcu(temp_node, &tn->node_list, list) { 367 list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -500,9 +500,9 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
500 500
501/* tipc_node_timeout - handle expiration of node timer 501/* tipc_node_timeout - handle expiration of node timer
502 */ 502 */
503static void tipc_node_timeout(unsigned long data) 503static void tipc_node_timeout(struct timer_list *t)
504{ 504{
505 struct tipc_node *n = (struct tipc_node *)data; 505 struct tipc_node *n = from_timer(n, t, timer);
506 struct tipc_link_entry *le; 506 struct tipc_link_entry *le;
507 struct sk_buff_head xmitq; 507 struct sk_buff_head xmitq;
508 int bearer_id; 508 int bearer_id;
@@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
657 *slot1 = i; 657 *slot1 = i;
658 } 658 }
659 659
660 if (!tipc_node_is_up(n)) { 660 if (!node_is_up(n)) {
661 if (tipc_link_peer_is_down(l)) 661 if (tipc_link_peer_is_down(l))
662 tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); 662 tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
663 tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); 663 tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
@@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
717 tipc_sk_rcv(n->net, &le->inputq); 717 tipc_sk_rcv(n->net, &le->inputq);
718} 718}
719 719
720static bool tipc_node_is_up(struct tipc_node *n) 720static bool node_is_up(struct tipc_node *n)
721{ 721{
722 return n->active_links[0] != INVALID_BEARER_ID; 722 return n->active_links[0] != INVALID_BEARER_ID;
723} 723}
724 724
725bool tipc_node_is_up(struct net *net, u32 addr)
726{
727 struct tipc_node *n;
728 bool retval = false;
729
730 if (in_own_node(net, addr))
731 return true;
732
733 n = tipc_node_find(net, addr);
734 if (!n)
735 return false;
736 retval = node_is_up(n);
737 tipc_node_put(n);
738 return retval;
739}
740
725void tipc_node_check_dest(struct net *net, u32 onode, 741void tipc_node_check_dest(struct net *net, u32 onode,
726 struct tipc_bearer *b, 742 struct tipc_bearer *b,
727 u16 capabilities, u32 signature, 743 u16 capabilities, u32 signature,
@@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
1149 1165
1150 if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) 1166 if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
1151 goto attr_msg_full; 1167 goto attr_msg_full;
1152 if (tipc_node_is_up(node)) 1168 if (node_is_up(node))
1153 if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) 1169 if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
1154 goto attr_msg_full; 1170 goto attr_msg_full;
1155 1171
@@ -1238,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
1238 return 0; 1254 return 0;
1239} 1255}
1240 1256
1257/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations
1258 * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected
1259 */
1260int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
1261{
1262 struct sk_buff *skb;
1263 u32 selector, dnode;
1264
1265 while ((skb = __skb_dequeue(xmitq))) {
1266 selector = msg_origport(buf_msg(skb));
1267 dnode = msg_destnode(buf_msg(skb));
1268 tipc_node_xmit_skb(net, skb, dnode, selector);
1269 }
1270 return 0;
1271}
1272
1241void tipc_node_broadcast(struct net *net, struct sk_buff *skb) 1273void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
1242{ 1274{
1243 struct sk_buff *txskb; 1275 struct sk_buff *txskb;
@@ -1249,7 +1281,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
1249 dst = n->addr; 1281 dst = n->addr;
1250 if (in_own_node(net, dst)) 1282 if (in_own_node(net, dst))
1251 continue; 1283 continue;
1252 if (!tipc_node_is_up(n)) 1284 if (!node_is_up(n))
1253 continue; 1285 continue;
1254 txskb = pskb_copy(skb, GFP_ATOMIC); 1286 txskb = pskb_copy(skb, GFP_ATOMIC);
1255 if (!txskb) 1287 if (!txskb)
@@ -1507,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1507 __skb_queue_head_init(&xmitq); 1539 __skb_queue_head_init(&xmitq);
1508 1540
1509 /* Ensure message is well-formed before touching the header */ 1541 /* Ensure message is well-formed before touching the header */
1510 if (unlikely(!tipc_msg_validate(skb))) 1542 if (unlikely(!tipc_msg_validate(&skb)))
1511 goto discard; 1543 goto discard;
1512 hdr = buf_msg(skb); 1544 hdr = buf_msg(skb);
1513 usr = msg_user(hdr); 1545 usr = msg_user(hdr);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 898c22916984..acd58d23a70e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -48,7 +48,8 @@ enum {
48 TIPC_BCAST_SYNCH = (1 << 1), 48 TIPC_BCAST_SYNCH = (1 << 1),
49 TIPC_BCAST_STATE_NACK = (1 << 2), 49 TIPC_BCAST_STATE_NACK = (1 << 2),
50 TIPC_BLOCK_FLOWCTL = (1 << 3), 50 TIPC_BLOCK_FLOWCTL = (1 << 3),
51 TIPC_BCAST_RCAST = (1 << 4) 51 TIPC_BCAST_RCAST = (1 << 4),
52 TIPC_MCAST_GROUPS = (1 << 5)
52}; 53};
53 54
54#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ 55#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
@@ -68,6 +69,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
68 char *linkname, size_t len); 69 char *linkname, size_t len);
69int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, 70int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
70 int selector); 71 int selector);
72int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list);
71int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, 73int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
72 u32 selector); 74 u32 selector);
73void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); 75void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
@@ -76,6 +78,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
76int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); 78int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
77void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); 79void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
78int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); 80int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
81bool tipc_node_is_up(struct net *net, u32 addr);
79u16 tipc_node_get_capabilities(struct net *net, u32 addr); 82u16 tipc_node_get_capabilities(struct net *net, u32 addr);
80int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); 83int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
81int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); 84int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 3cd6402e812c..acaef80fb88c 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -36,6 +36,8 @@
36#include "server.h" 36#include "server.h"
37#include "core.h" 37#include "core.h"
38#include "socket.h" 38#include "socket.h"
39#include "addr.h"
40#include "msg.h"
39#include <net/sock.h> 41#include <net/sock.h>
40#include <linux/module.h> 42#include <linux/module.h>
41 43
@@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref)
105 kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); 107 kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
106 sock_release(sock); 108 sock_release(sock);
107 con->sock = NULL; 109 con->sock = NULL;
108
109 spin_lock_bh(&s->idr_lock);
110 idr_remove(&s->conn_idr, con->conid);
111 s->idr_in_use--;
112 spin_unlock_bh(&s->idr_lock);
113 } 110 }
114 111 spin_lock_bh(&s->idr_lock);
112 idr_remove(&s->conn_idr, con->conid);
113 s->idr_in_use--;
114 spin_unlock_bh(&s->idr_lock);
115 tipc_clean_outqueues(con); 115 tipc_clean_outqueues(con);
116 kfree(con); 116 kfree(con);
117} 117}
@@ -197,7 +197,8 @@ static void tipc_close_conn(struct tipc_conn *con)
197 struct tipc_server *s = con->server; 197 struct tipc_server *s = con->server;
198 198
199 if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { 199 if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
200 tipc_unregister_callbacks(con); 200 if (con->sock)
201 tipc_unregister_callbacks(con);
201 202
202 if (con->conid) 203 if (con->conid)
203 s->tipc_conn_release(con->conid, con->usr_data); 204 s->tipc_conn_release(con->conid, con->usr_data);
@@ -207,8 +208,8 @@ static void tipc_close_conn(struct tipc_conn *con)
207 * are harmless for us here as we have already deleted this 208 * are harmless for us here as we have already deleted this
208 * connection from server connection list. 209 * connection from server connection list.
209 */ 210 */
210 kernel_sock_shutdown(con->sock, SHUT_RDWR); 211 if (con->sock)
211 212 kernel_sock_shutdown(con->sock, SHUT_RDWR);
212 conn_put(con); 213 conn_put(con);
213 } 214 }
214} 215}
@@ -487,38 +488,104 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
487 } 488 }
488} 489}
489 490
491bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
492 u32 lower, u32 upper, int *conid)
493{
494 struct tipc_subscriber *scbr;
495 struct tipc_subscr sub;
496 struct tipc_server *s;
497 struct tipc_conn *con;
498
499 sub.seq.type = type;
500 sub.seq.lower = lower;
501 sub.seq.upper = upper;
502 sub.timeout = TIPC_WAIT_FOREVER;
503 sub.filter = TIPC_SUB_PORTS;
504 *(u32 *)&sub.usr_handle = port;
505
506 con = tipc_alloc_conn(tipc_topsrv(net));
507 if (IS_ERR(con))
508 return false;
509
510 *conid = con->conid;
511 s = con->server;
512 scbr = s->tipc_conn_new(*conid);
513 if (!scbr) {
514 tipc_close_conn(con);
515 return false;
516 }
517
518 con->usr_data = scbr;
519 con->sock = NULL;
520 s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
521 return true;
522}
523
524void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
525{
526 struct tipc_conn *con;
527
528 con = tipc_conn_lookup(tipc_topsrv(net), conid);
529 if (!con)
530 return;
531 tipc_close_conn(con);
532 conn_put(con);
533}
534
535static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
536{
537 u32 port = *(u32 *)&evt->s.usr_handle;
538 u32 self = tipc_own_addr(net);
539 struct sk_buff_head evtq;
540 struct sk_buff *skb;
541
542 skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
543 self, self, port, port, 0);
544 if (!skb)
545 return;
546 msg_set_dest_droppable(buf_msg(skb), true);
547 memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
548 skb_queue_head_init(&evtq);
549 __skb_queue_tail(&evtq, skb);
550 tipc_sk_rcv(net, &evtq);
551}
552
490static void tipc_send_to_sock(struct tipc_conn *con) 553static void tipc_send_to_sock(struct tipc_conn *con)
491{ 554{
492 int count = 0;
493 struct tipc_server *s = con->server; 555 struct tipc_server *s = con->server;
494 struct outqueue_entry *e; 556 struct outqueue_entry *e;
557 struct tipc_event *evt;
495 struct msghdr msg; 558 struct msghdr msg;
559 int count = 0;
496 int ret; 560 int ret;
497 561
498 spin_lock_bh(&con->outqueue_lock); 562 spin_lock_bh(&con->outqueue_lock);
499 while (test_bit(CF_CONNECTED, &con->flags)) { 563 while (test_bit(CF_CONNECTED, &con->flags)) {
500 e = list_entry(con->outqueue.next, struct outqueue_entry, 564 e = list_entry(con->outqueue.next, struct outqueue_entry, list);
501 list);
502 if ((struct list_head *) e == &con->outqueue) 565 if ((struct list_head *) e == &con->outqueue)
503 break; 566 break;
504 spin_unlock_bh(&con->outqueue_lock);
505 567
506 memset(&msg, 0, sizeof(msg)); 568 spin_unlock_bh(&con->outqueue_lock);
507 msg.msg_flags = MSG_DONTWAIT;
508 569
509 if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { 570 if (con->sock) {
510 msg.msg_name = &e->dest; 571 memset(&msg, 0, sizeof(msg));
511 msg.msg_namelen = sizeof(struct sockaddr_tipc); 572 msg.msg_flags = MSG_DONTWAIT;
512 } 573 if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
513 ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, 574 msg.msg_name = &e->dest;
514 e->iov.iov_len); 575 msg.msg_namelen = sizeof(struct sockaddr_tipc);
515 if (ret == -EWOULDBLOCK || ret == 0) { 576 }
516 cond_resched(); 577 ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
517 goto out; 578 e->iov.iov_len);
518 } else if (ret < 0) { 579 if (ret == -EWOULDBLOCK || ret == 0) {
519 goto send_err; 580 cond_resched();
581 goto out;
582 } else if (ret < 0) {
583 goto send_err;
584 }
585 } else {
586 evt = e->iov.iov_base;
587 tipc_send_kern_top_evt(s->net, evt);
520 } 588 }
521
522 /* Don't starve users filling buffers */ 589 /* Don't starve users filling buffers */
523 if (++count >= MAX_SEND_MSG_COUNT) { 590 if (++count >= MAX_SEND_MSG_COUNT) {
524 cond_resched(); 591 cond_resched();
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 34f8055afa3b..2113c9192633 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -83,13 +83,16 @@ struct tipc_server {
83int tipc_conn_sendmsg(struct tipc_server *s, int conid, 83int tipc_conn_sendmsg(struct tipc_server *s, int conid,
84 struct sockaddr_tipc *addr, void *data, size_t len); 84 struct sockaddr_tipc *addr, void *data, size_t len);
85 85
86bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
87 u32 lower, u32 upper, int *conid);
88void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
89
86/** 90/**
87 * tipc_conn_terminate - terminate connection with server 91 * tipc_conn_terminate - terminate connection with server
88 * 92 *
89 * Note: Must call it in process context since it might sleep 93 * Note: Must call it in process context since it might sleep
90 */ 94 */
91void tipc_conn_terminate(struct tipc_server *s, int conid); 95void tipc_conn_terminate(struct tipc_server *s, int conid);
92
93int tipc_server_start(struct tipc_server *s); 96int tipc_server_start(struct tipc_server *s);
94 97
95void tipc_server_stop(struct tipc_server *s); 98void tipc_server_stop(struct tipc_server *s);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index d50edd6e0019..5d18c0caa92b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/socket.c: TIPC socket API 2 * net/tipc/socket.c: TIPC socket API
3 * 3 *
4 * Copyright (c) 2001-2007, 2012-2016, Ericsson AB 4 * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems 5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -45,9 +45,10 @@
45#include "socket.h" 45#include "socket.h"
46#include "bcast.h" 46#include "bcast.h"
47#include "netlink.h" 47#include "netlink.h"
48#include "group.h"
48 49
49#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ 50#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
50#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ 51#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
51#define TIPC_FWD_MSG 1 52#define TIPC_FWD_MSG 1
52#define TIPC_MAX_PORT 0xffffffff 53#define TIPC_MAX_PORT 0xffffffff
53#define TIPC_MIN_PORT 1 54#define TIPC_MIN_PORT 1
@@ -61,6 +62,11 @@ enum {
61 TIPC_CONNECTING = TCP_SYN_SENT, 62 TIPC_CONNECTING = TCP_SYN_SENT,
62}; 63};
63 64
65struct sockaddr_pair {
66 struct sockaddr_tipc sock;
67 struct sockaddr_tipc member;
68};
69
64/** 70/**
65 * struct tipc_sock - TIPC socket structure 71 * struct tipc_sock - TIPC socket structure
66 * @sk: socket - interacts with 'port' and with user via the socket API 72 * @sk: socket - interacts with 'port' and with user via the socket API
@@ -78,7 +84,7 @@ enum {
78 * @conn_timeout: the time we can wait for an unresponded setup request 84 * @conn_timeout: the time we can wait for an unresponded setup request
79 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue 85 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
80 * @cong_link_cnt: number of congested links 86 * @cong_link_cnt: number of congested links
81 * @sent_unacked: # messages sent by socket, and not yet acked by peer 87 * @snt_unacked: # messages sent by socket, and not yet acked by peer
82 * @rcv_unacked: # messages read by user, but not yet acked back to peer 88 * @rcv_unacked: # messages read by user, but not yet acked back to peer
83 * @peer: 'connected' peer for dgram/rdm 89 * @peer: 'connected' peer for dgram/rdm
84 * @node: hash table node 90 * @node: hash table node
@@ -109,20 +115,22 @@ struct tipc_sock {
109 struct rhash_head node; 115 struct rhash_head node;
110 struct tipc_mc_method mc_method; 116 struct tipc_mc_method mc_method;
111 struct rcu_head rcu; 117 struct rcu_head rcu;
118 struct tipc_group *group;
112}; 119};
113 120
114static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); 121static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
115static void tipc_data_ready(struct sock *sk); 122static void tipc_data_ready(struct sock *sk);
116static void tipc_write_space(struct sock *sk); 123static void tipc_write_space(struct sock *sk);
117static void tipc_sock_destruct(struct sock *sk); 124static void tipc_sock_destruct(struct sock *sk);
118static int tipc_release(struct socket *sock); 125static int tipc_release(struct socket *sock);
119static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, 126static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
120 bool kern); 127 bool kern);
121static void tipc_sk_timeout(unsigned long data); 128static void tipc_sk_timeout(struct timer_list *t);
122static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 129static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
123 struct tipc_name_seq const *seq); 130 struct tipc_name_seq const *seq);
124static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, 131static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
125 struct tipc_name_seq const *seq); 132 struct tipc_name_seq const *seq);
133static int tipc_sk_leave(struct tipc_sock *tsk);
126static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); 134static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
127static int tipc_sk_insert(struct tipc_sock *tsk); 135static int tipc_sk_insert(struct tipc_sock *tsk);
128static void tipc_sk_remove(struct tipc_sock *tsk); 136static void tipc_sk_remove(struct tipc_sock *tsk);
@@ -193,6 +201,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk)
193 return tsk->snt_unacked > tsk->snd_win; 201 return tsk->snt_unacked > tsk->snd_win;
194} 202}
195 203
204static u16 tsk_blocks(int len)
205{
206 return ((len / FLOWCTL_BLK_SZ) + 1);
207}
208
196/* tsk_blocks(): translate a buffer size in bytes to number of 209/* tsk_blocks(): translate a buffer size in bytes to number of
197 * advertisable blocks, taking into account the ratio truesize(len)/len 210 * advertisable blocks, taking into account the ratio truesize(len)/len
198 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ 211 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
@@ -451,9 +464,9 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
451 NAMED_H_SIZE, 0); 464 NAMED_H_SIZE, 0);
452 465
453 msg_set_origport(msg, tsk->portid); 466 msg_set_origport(msg, tsk->portid);
454 setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); 467 timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
455 sk->sk_shutdown = 0; 468 sk->sk_shutdown = 0;
456 sk->sk_backlog_rcv = tipc_backlog_rcv; 469 sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
457 sk->sk_rcvbuf = sysctl_tipc_rmem[1]; 470 sk->sk_rcvbuf = sysctl_tipc_rmem[1];
458 sk->sk_data_ready = tipc_data_ready; 471 sk->sk_data_ready = tipc_data_ready;
459 sk->sk_write_space = tipc_write_space; 472 sk->sk_write_space = tipc_write_space;
@@ -559,13 +572,14 @@ static int tipc_release(struct socket *sock)
559 572
560 __tipc_shutdown(sock, TIPC_ERR_NO_PORT); 573 __tipc_shutdown(sock, TIPC_ERR_NO_PORT);
561 sk->sk_shutdown = SHUTDOWN_MASK; 574 sk->sk_shutdown = SHUTDOWN_MASK;
575 tipc_sk_leave(tsk);
562 tipc_sk_withdraw(tsk, 0, NULL); 576 tipc_sk_withdraw(tsk, 0, NULL);
563 sk_stop_timer(sk, &sk->sk_timer); 577 sk_stop_timer(sk, &sk->sk_timer);
564 tipc_sk_remove(tsk); 578 tipc_sk_remove(tsk);
565 579
566 /* Reject any messages that accumulated in backlog queue */ 580 /* Reject any messages that accumulated in backlog queue */
567 release_sock(sk); 581 release_sock(sk);
568 u32_list_purge(&tsk->cong_links); 582 tipc_dest_list_purge(&tsk->cong_links);
569 tsk->cong_link_cnt = 0; 583 tsk->cong_link_cnt = 0;
570 call_rcu(&tsk->rcu, tipc_sk_callback); 584 call_rcu(&tsk->rcu, tipc_sk_callback);
571 sock->sk = NULL; 585 sock->sk = NULL;
@@ -601,7 +615,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
601 res = tipc_sk_withdraw(tsk, 0, NULL); 615 res = tipc_sk_withdraw(tsk, 0, NULL);
602 goto exit; 616 goto exit;
603 } 617 }
604 618 if (tsk->group) {
619 res = -EACCES;
620 goto exit;
621 }
605 if (uaddr_len < sizeof(struct sockaddr_tipc)) { 622 if (uaddr_len < sizeof(struct sockaddr_tipc)) {
606 res = -EINVAL; 623 res = -EINVAL;
607 goto exit; 624 goto exit;
@@ -698,38 +715,41 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
698{ 715{
699 struct sock *sk = sock->sk; 716 struct sock *sk = sock->sk;
700 struct tipc_sock *tsk = tipc_sk(sk); 717 struct tipc_sock *tsk = tipc_sk(sk);
701 u32 mask = 0; 718 struct tipc_group *grp = tsk->group;
719 u32 revents = 0;
702 720
703 sock_poll_wait(file, sk_sleep(sk), wait); 721 sock_poll_wait(file, sk_sleep(sk), wait);
704 722
705 if (sk->sk_shutdown & RCV_SHUTDOWN) 723 if (sk->sk_shutdown & RCV_SHUTDOWN)
706 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 724 revents |= POLLRDHUP | POLLIN | POLLRDNORM;
707 if (sk->sk_shutdown == SHUTDOWN_MASK) 725 if (sk->sk_shutdown == SHUTDOWN_MASK)
708 mask |= POLLHUP; 726 revents |= POLLHUP;
709 727
710 switch (sk->sk_state) { 728 switch (sk->sk_state) {
711 case TIPC_ESTABLISHED: 729 case TIPC_ESTABLISHED:
712 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) 730 if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
713 mask |= POLLOUT; 731 revents |= POLLOUT;
714 /* fall thru' */ 732 /* fall thru' */
715 case TIPC_LISTEN: 733 case TIPC_LISTEN:
716 case TIPC_CONNECTING: 734 case TIPC_CONNECTING:
717 if (!skb_queue_empty(&sk->sk_receive_queue)) 735 if (!skb_queue_empty(&sk->sk_receive_queue))
718 mask |= (POLLIN | POLLRDNORM); 736 revents |= POLLIN | POLLRDNORM;
719 break; 737 break;
720 case TIPC_OPEN: 738 case TIPC_OPEN:
721 if (!tsk->cong_link_cnt) 739 if (!grp || tipc_group_size(grp))
722 mask |= POLLOUT; 740 if (!tsk->cong_link_cnt)
723 if (tipc_sk_type_connectionless(sk) && 741 revents |= POLLOUT;
724 (!skb_queue_empty(&sk->sk_receive_queue))) 742 if (!tipc_sk_type_connectionless(sk))
725 mask |= (POLLIN | POLLRDNORM); 743 break;
744 if (skb_queue_empty(&sk->sk_receive_queue))
745 break;
746 revents |= POLLIN | POLLRDNORM;
726 break; 747 break;
727 case TIPC_DISCONNECTING: 748 case TIPC_DISCONNECTING:
728 mask = (POLLIN | POLLRDNORM | POLLHUP); 749 revents = POLLIN | POLLRDNORM | POLLHUP;
729 break; 750 break;
730 } 751 }
731 752 return revents;
732 return mask;
733} 753}
734 754
735/** 755/**
@@ -757,6 +777,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
757 struct tipc_nlist dsts; 777 struct tipc_nlist dsts;
758 int rc; 778 int rc;
759 779
780 if (tsk->group)
781 return -EACCES;
782
760 /* Block or return if any destination link is congested */ 783 /* Block or return if any destination link is congested */
761 rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); 784 rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
762 if (unlikely(rc)) 785 if (unlikely(rc))
@@ -794,6 +817,296 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
794} 817}
795 818
796/** 819/**
820 * tipc_send_group_msg - send a message to a member in the group
821 * @net: network namespace
822 * @m: message to send
823 * @mb: group member
824 * @dnode: destination node
825 * @dport: destination port
826 * @dlen: total length of message data
827 */
828static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
829 struct msghdr *m, struct tipc_member *mb,
830 u32 dnode, u32 dport, int dlen)
831{
832 u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
833 struct tipc_mc_method *method = &tsk->mc_method;
834 int blks = tsk_blocks(GROUP_H_SIZE + dlen);
835 struct tipc_msg *hdr = &tsk->phdr;
836 struct sk_buff_head pkts;
837 int mtu, rc;
838
839 /* Complete message header */
840 msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
841 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
842 msg_set_destport(hdr, dport);
843 msg_set_destnode(hdr, dnode);
844 msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
845
846 /* Build message as chain of buffers */
847 skb_queue_head_init(&pkts);
848 mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
849 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
850 if (unlikely(rc != dlen))
851 return rc;
852
853 /* Send message */
854 rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
855 if (unlikely(rc == -ELINKCONG)) {
856 tipc_dest_push(&tsk->cong_links, dnode, 0);
857 tsk->cong_link_cnt++;
858 }
859
860 /* Update send window */
861 tipc_group_update_member(mb, blks);
862
863 /* A broadcast sent within next EXPIRE period must follow same path */
864 method->rcast = true;
865 method->mandatory = true;
866 return dlen;
867}
868
869/**
870 * tipc_send_group_unicast - send message to a member in the group
871 * @sock: socket structure
872 * @m: message to send
873 * @dlen: total length of message data
874 * @timeout: timeout to wait for wakeup
875 *
876 * Called from function tipc_sendmsg(), which has done all sanity checks
877 * Returns the number of bytes sent on success, or errno
878 */
879static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
880 int dlen, long timeout)
881{
882 struct sock *sk = sock->sk;
883 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
884 int blks = tsk_blocks(GROUP_H_SIZE + dlen);
885 struct tipc_sock *tsk = tipc_sk(sk);
886 struct tipc_group *grp = tsk->group;
887 struct net *net = sock_net(sk);
888 struct tipc_member *mb = NULL;
889 u32 node, port;
890 int rc;
891
892 node = dest->addr.id.node;
893 port = dest->addr.id.ref;
894 if (!port && !node)
895 return -EHOSTUNREACH;
896
897 /* Block or return if destination link or member is congested */
898 rc = tipc_wait_for_cond(sock, &timeout,
899 !tipc_dest_find(&tsk->cong_links, node, 0) &&
900 !tipc_group_cong(grp, node, port, blks, &mb));
901 if (unlikely(rc))
902 return rc;
903
904 if (unlikely(!mb))
905 return -EHOSTUNREACH;
906
907 rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);
908
909 return rc ? rc : dlen;
910}
911
912/**
913 * tipc_send_group_anycast - send message to any member with given identity
914 * @sock: socket structure
915 * @m: message to send
916 * @dlen: total length of message data
917 * @timeout: timeout to wait for wakeup
918 *
919 * Called from function tipc_sendmsg(), which has done all sanity checks
920 * Returns the number of bytes sent on success, or errno
921 */
922static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
923 int dlen, long timeout)
924{
925 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
926 struct sock *sk = sock->sk;
927 struct tipc_sock *tsk = tipc_sk(sk);
928 struct list_head *cong_links = &tsk->cong_links;
929 int blks = tsk_blocks(GROUP_H_SIZE + dlen);
930 struct tipc_group *grp = tsk->group;
931 struct tipc_member *first = NULL;
932 struct tipc_member *mbr = NULL;
933 struct net *net = sock_net(sk);
934 u32 node, port, exclude;
935 u32 type, inst, domain;
936 struct list_head dsts;
937 int lookups = 0;
938 int dstcnt, rc;
939 bool cong;
940
941 INIT_LIST_HEAD(&dsts);
942
943 type = dest->addr.name.name.type;
944 inst = dest->addr.name.name.instance;
945 domain = addr_domain(net, dest->scope);
946 exclude = tipc_group_exclude(grp);
947
948 while (++lookups < 4) {
949 first = NULL;
950
951 /* Look for a non-congested destination member, if any */
952 while (1) {
953 if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
954 &dstcnt, exclude, false))
955 return -EHOSTUNREACH;
956 tipc_dest_pop(&dsts, &node, &port);
957 cong = tipc_group_cong(grp, node, port, blks, &mbr);
958 if (!cong)
959 break;
960 if (mbr == first)
961 break;
962 if (!first)
963 first = mbr;
964 }
965
966 /* Start over if destination was not in member list */
967 if (unlikely(!mbr))
968 continue;
969
970 if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
971 break;
972
973 /* Block or return if destination link or member is congested */
974 rc = tipc_wait_for_cond(sock, &timeout,
975 !tipc_dest_find(cong_links, node, 0) &&
976 !tipc_group_cong(grp, node, port,
977 blks, &mbr));
978 if (unlikely(rc))
979 return rc;
980
981 /* Send, unless destination disappeared while waiting */
982 if (likely(mbr))
983 break;
984 }
985
986 if (unlikely(lookups >= 4))
987 return -EHOSTUNREACH;
988
989 rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);
990
991 return rc ? rc : dlen;
992}
993
994/**
995 * tipc_send_group_bcast - send message to all members in communication group
996 * @sk: socket structure
997 * @m: message to send
998 * @dlen: total length of message data
999 * @timeout: timeout to wait for wakeup
1000 *
1001 * Called from function tipc_sendmsg(), which has done all sanity checks
1002 * Returns the number of bytes sent on success, or errno
1003 */
1004static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
1005 int dlen, long timeout)
1006{
1007 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1008 struct sock *sk = sock->sk;
1009 struct net *net = sock_net(sk);
1010 struct tipc_sock *tsk = tipc_sk(sk);
1011 struct tipc_group *grp = tsk->group;
1012 struct tipc_nlist *dsts = tipc_group_dests(grp);
1013 struct tipc_mc_method *method = &tsk->mc_method;
1014 bool ack = method->mandatory && method->rcast;
1015 int blks = tsk_blocks(MCAST_H_SIZE + dlen);
1016 struct tipc_msg *hdr = &tsk->phdr;
1017 int mtu = tipc_bcast_get_mtu(net);
1018 struct sk_buff_head pkts;
1019 int rc = -EHOSTUNREACH;
1020
1021 if (!dsts->local && !dsts->remote)
1022 return -EHOSTUNREACH;
1023
1024 /* Block or return if any destination link or member is congested */
1025 rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt &&
1026 !tipc_group_bc_cong(grp, blks));
1027 if (unlikely(rc))
1028 return rc;
1029
1030 /* Complete message header */
1031 if (dest) {
1032 msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
1033 msg_set_nameinst(hdr, dest->addr.name.name.instance);
1034 } else {
1035 msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
1036 msg_set_nameinst(hdr, 0);
1037 }
1038 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
1039 msg_set_destport(hdr, 0);
1040 msg_set_destnode(hdr, 0);
1041 msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
1042
1043 /* Avoid getting stuck with repeated forced replicasts */
1044 msg_set_grp_bc_ack_req(hdr, ack);
1045
1046 /* Build message as chain of buffers */
1047 skb_queue_head_init(&pkts);
1048 rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
1049 if (unlikely(rc != dlen))
1050 return rc;
1051
1052 /* Send message */
1053 rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
1054 if (unlikely(rc))
1055 return rc;
1056
1057 /* Update broadcast sequence number and send windows */
1058 tipc_group_update_bc_members(tsk->group, blks, ack);
1059
1060 /* Broadcast link is now free to choose method for next broadcast */
1061 method->mandatory = false;
1062 method->expires = jiffies;
1063
1064 return dlen;
1065}
1066
1067/**
1068 * tipc_send_group_mcast - send message to all members with given identity
1069 * @sock: socket structure
1070 * @m: message to send
1071 * @dlen: total length of message data
1072 * @timeout: timeout to wait for wakeup
1073 *
1074 * Called from function tipc_sendmsg(), which has done all sanity checks
1075 * Returns the number of bytes sent on success, or errno
1076 */
1077static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
1078 int dlen, long timeout)
1079{
1080 struct sock *sk = sock->sk;
1081 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1082 struct tipc_name_seq *seq = &dest->addr.nameseq;
1083 struct tipc_sock *tsk = tipc_sk(sk);
1084 struct tipc_group *grp = tsk->group;
1085 struct net *net = sock_net(sk);
1086 u32 domain, exclude, dstcnt;
1087 struct list_head dsts;
1088
1089 INIT_LIST_HEAD(&dsts);
1090
1091 if (seq->lower != seq->upper)
1092 return -ENOTSUPP;
1093
1094 domain = addr_domain(net, dest->scope);
1095 exclude = tipc_group_exclude(grp);
1096 if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
1097 &dsts, &dstcnt, exclude, true))
1098 return -EHOSTUNREACH;
1099
1100 if (dstcnt == 1) {
1101 tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
1102 return tipc_send_group_unicast(sock, m, dlen, timeout);
1103 }
1104
1105 tipc_dest_list_purge(&dsts);
1106 return tipc_send_group_bcast(sock, m, dlen, timeout);
1107}
1108
1109/**
797 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets 1110 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
798 * @arrvq: queue with arriving messages, to be cloned after destination lookup 1111 * @arrvq: queue with arriving messages, to be cloned after destination lookup
799 * @inputq: queue with cloned messages, delivered to socket after dest lookup 1112 * @inputq: queue with cloned messages, delivered to socket after dest lookup
@@ -803,13 +1116,15 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
803void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, 1116void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
804 struct sk_buff_head *inputq) 1117 struct sk_buff_head *inputq)
805{ 1118{
806 struct tipc_msg *msg;
807 struct list_head dports;
808 u32 portid;
809 u32 scope = TIPC_CLUSTER_SCOPE; 1119 u32 scope = TIPC_CLUSTER_SCOPE;
810 struct sk_buff_head tmpq; 1120 u32 self = tipc_own_addr(net);
811 uint hsz;
812 struct sk_buff *skb, *_skb; 1121 struct sk_buff *skb, *_skb;
1122 u32 lower = 0, upper = ~0;
1123 struct sk_buff_head tmpq;
1124 u32 portid, oport, onode;
1125 struct list_head dports;
1126 struct tipc_msg *msg;
1127 int user, mtyp, hsz;
813 1128
814 __skb_queue_head_init(&tmpq); 1129 __skb_queue_head_init(&tmpq);
815 INIT_LIST_HEAD(&dports); 1130 INIT_LIST_HEAD(&dports);
@@ -817,17 +1132,32 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
817 skb = tipc_skb_peek(arrvq, &inputq->lock); 1132 skb = tipc_skb_peek(arrvq, &inputq->lock);
818 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { 1133 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
819 msg = buf_msg(skb); 1134 msg = buf_msg(skb);
1135 user = msg_user(msg);
1136 mtyp = msg_type(msg);
1137 if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
1138 spin_lock_bh(&inputq->lock);
1139 if (skb_peek(arrvq) == skb) {
1140 __skb_dequeue(arrvq);
1141 __skb_queue_tail(inputq, skb);
1142 }
1143 refcount_dec(&skb->users);
1144 spin_unlock_bh(&inputq->lock);
1145 continue;
1146 }
820 hsz = skb_headroom(skb) + msg_hdr_sz(msg); 1147 hsz = skb_headroom(skb) + msg_hdr_sz(msg);
821 1148 oport = msg_origport(msg);
822 if (in_own_node(net, msg_orignode(msg))) 1149 onode = msg_orignode(msg);
1150 if (onode == self)
823 scope = TIPC_NODE_SCOPE; 1151 scope = TIPC_NODE_SCOPE;
824 1152
825 /* Create destination port list and message clones: */ 1153 /* Create destination port list and message clones: */
826 tipc_nametbl_mc_translate(net, 1154 if (!msg_in_group(msg)) {
827 msg_nametype(msg), msg_namelower(msg), 1155 lower = msg_namelower(msg);
828 msg_nameupper(msg), scope, &dports); 1156 upper = msg_nameupper(msg);
829 portid = u32_pop(&dports); 1157 }
830 for (; portid; portid = u32_pop(&dports)) { 1158 tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
1159 scope, &dports);
1160 while (tipc_dest_pop(&dports, NULL, &portid)) {
831 _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); 1161 _skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
832 if (_skb) { 1162 if (_skb) {
833 msg_set_destport(buf_msg(_skb), portid); 1163 msg_set_destport(buf_msg(_skb), portid);
@@ -850,16 +1180,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
850} 1180}
851 1181
852/** 1182/**
853 * tipc_sk_proto_rcv - receive a connection mng protocol message 1183 * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
854 * @tsk: receiving socket 1184 * @tsk: receiving socket
855 * @skb: pointer to message buffer. 1185 * @skb: pointer to message buffer.
856 */ 1186 */
857static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, 1187static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
858 struct sk_buff_head *xmitq) 1188 struct sk_buff_head *xmitq)
859{ 1189{
860 struct sock *sk = &tsk->sk;
861 u32 onode = tsk_own_node(tsk);
862 struct tipc_msg *hdr = buf_msg(skb); 1190 struct tipc_msg *hdr = buf_msg(skb);
1191 u32 onode = tsk_own_node(tsk);
1192 struct sock *sk = &tsk->sk;
863 int mtyp = msg_type(hdr); 1193 int mtyp = msg_type(hdr);
864 bool conn_cong; 1194 bool conn_cong;
865 1195
@@ -931,6 +1261,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
931 long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); 1261 long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
932 struct list_head *clinks = &tsk->cong_links; 1262 struct list_head *clinks = &tsk->cong_links;
933 bool syn = !tipc_sk_type_connectionless(sk); 1263 bool syn = !tipc_sk_type_connectionless(sk);
1264 struct tipc_group *grp = tsk->group;
934 struct tipc_msg *hdr = &tsk->phdr; 1265 struct tipc_msg *hdr = &tsk->phdr;
935 struct tipc_name_seq *seq; 1266 struct tipc_name_seq *seq;
936 struct sk_buff_head pkts; 1267 struct sk_buff_head pkts;
@@ -941,18 +1272,31 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
941 if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) 1272 if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
942 return -EMSGSIZE; 1273 return -EMSGSIZE;
943 1274
1275 if (likely(dest)) {
1276 if (unlikely(m->msg_namelen < sizeof(*dest)))
1277 return -EINVAL;
1278 if (unlikely(dest->family != AF_TIPC))
1279 return -EINVAL;
1280 }
1281
1282 if (grp) {
1283 if (!dest)
1284 return tipc_send_group_bcast(sock, m, dlen, timeout);
1285 if (dest->addrtype == TIPC_ADDR_NAME)
1286 return tipc_send_group_anycast(sock, m, dlen, timeout);
1287 if (dest->addrtype == TIPC_ADDR_ID)
1288 return tipc_send_group_unicast(sock, m, dlen, timeout);
1289 if (dest->addrtype == TIPC_ADDR_MCAST)
1290 return tipc_send_group_mcast(sock, m, dlen, timeout);
1291 return -EINVAL;
1292 }
1293
944 if (unlikely(!dest)) { 1294 if (unlikely(!dest)) {
945 dest = &tsk->peer; 1295 dest = &tsk->peer;
946 if (!syn || dest->family != AF_TIPC) 1296 if (!syn || dest->family != AF_TIPC)
947 return -EDESTADDRREQ; 1297 return -EDESTADDRREQ;
948 } 1298 }
949 1299
950 if (unlikely(m->msg_namelen < sizeof(*dest)))
951 return -EINVAL;
952
953 if (unlikely(dest->family != AF_TIPC))
954 return -EINVAL;
955
956 if (unlikely(syn)) { 1300 if (unlikely(syn)) {
957 if (sk->sk_state == TIPC_LISTEN) 1301 if (sk->sk_state == TIPC_LISTEN)
958 return -EPIPE; 1302 return -EPIPE;
@@ -985,7 +1329,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
985 msg_set_destport(hdr, dport); 1329 msg_set_destport(hdr, dport);
986 if (unlikely(!dport && !dnode)) 1330 if (unlikely(!dport && !dnode))
987 return -EHOSTUNREACH; 1331 return -EHOSTUNREACH;
988
989 } else if (dest->addrtype == TIPC_ADDR_ID) { 1332 } else if (dest->addrtype == TIPC_ADDR_ID) {
990 dnode = dest->addr.id.node; 1333 dnode = dest->addr.id.node;
991 msg_set_type(hdr, TIPC_DIRECT_MSG); 1334 msg_set_type(hdr, TIPC_DIRECT_MSG);
@@ -996,7 +1339,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
996 } 1339 }
997 1340
998 /* Block or return if destination link is congested */ 1341 /* Block or return if destination link is congested */
999 rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode)); 1342 rc = tipc_wait_for_cond(sock, &timeout,
1343 !tipc_dest_find(clinks, dnode, 0));
1000 if (unlikely(rc)) 1344 if (unlikely(rc))
1001 return rc; 1345 return rc;
1002 1346
@@ -1008,7 +1352,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
1008 1352
1009 rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); 1353 rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
1010 if (unlikely(rc == -ELINKCONG)) { 1354 if (unlikely(rc == -ELINKCONG)) {
1011 u32_push(clinks, dnode); 1355 tipc_dest_push(clinks, dnode, 0);
1012 tsk->cong_link_cnt++; 1356 tsk->cong_link_cnt++;
1013 rc = 0; 1357 rc = 0;
1014 } 1358 }
@@ -1128,7 +1472,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1128 msg_set_lookup_scope(msg, 0); 1472 msg_set_lookup_scope(msg, 0);
1129 msg_set_hdr_sz(msg, SHORT_H_SIZE); 1473 msg_set_hdr_sz(msg, SHORT_H_SIZE);
1130 1474
1131 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); 1475 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
1132 tipc_set_sk_state(sk, TIPC_ESTABLISHED); 1476 tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1133 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); 1477 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
1134 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); 1478 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
@@ -1142,26 +1486,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1142} 1486}
1143 1487
1144/** 1488/**
1145 * set_orig_addr - capture sender's address for received message 1489 * tipc_sk_set_orig_addr - capture sender's address for received message
1146 * @m: descriptor for message info 1490 * @m: descriptor for message info
1147 * @msg: received message header 1491 * @hdr: received message header
1148 * 1492 *
1149 * Note: Address is not captured if not requested by receiver. 1493 * Note: Address is not captured if not requested by receiver.
1150 */ 1494 */
1151static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) 1495static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
1152{ 1496{
1153 DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name); 1497 DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
1498 struct tipc_msg *hdr = buf_msg(skb);
1154 1499
1155 if (addr) { 1500 if (!srcaddr)
1156 addr->family = AF_TIPC; 1501 return;
1157 addr->addrtype = TIPC_ADDR_ID; 1502
1158 memset(&addr->addr, 0, sizeof(addr->addr)); 1503 srcaddr->sock.family = AF_TIPC;
1159 addr->addr.id.ref = msg_origport(msg); 1504 srcaddr->sock.addrtype = TIPC_ADDR_ID;
1160 addr->addr.id.node = msg_orignode(msg); 1505 srcaddr->sock.addr.id.ref = msg_origport(hdr);
1161 addr->addr.name.domain = 0; /* could leave uninitialized */ 1506 srcaddr->sock.addr.id.node = msg_orignode(hdr);
1162 addr->scope = 0; /* could leave uninitialized */ 1507 srcaddr->sock.addr.name.domain = 0;
1163 m->msg_namelen = sizeof(struct sockaddr_tipc); 1508 srcaddr->sock.scope = 0;
1164 } 1509 m->msg_namelen = sizeof(struct sockaddr_tipc);
1510
1511 if (!msg_in_group(hdr))
1512 return;
1513
1514 /* Group message users may also want to know sending member's id */
1515 srcaddr->member.family = AF_TIPC;
1516 srcaddr->member.addrtype = TIPC_ADDR_NAME;
1517 srcaddr->member.addr.name.name.type = msg_nametype(hdr);
1518 srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
1519 srcaddr->member.addr.name.domain = 0;
1520 m->msg_namelen = sizeof(*srcaddr);
1165} 1521}
1166 1522
1167/** 1523/**
@@ -1318,11 +1674,13 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1318 size_t buflen, int flags) 1674 size_t buflen, int flags)
1319{ 1675{
1320 struct sock *sk = sock->sk; 1676 struct sock *sk = sock->sk;
1321 struct tipc_sock *tsk = tipc_sk(sk);
1322 struct sk_buff *skb;
1323 struct tipc_msg *hdr;
1324 bool connected = !tipc_sk_type_connectionless(sk); 1677 bool connected = !tipc_sk_type_connectionless(sk);
1678 struct tipc_sock *tsk = tipc_sk(sk);
1325 int rc, err, hlen, dlen, copy; 1679 int rc, err, hlen, dlen, copy;
1680 struct sk_buff_head xmitq;
1681 struct tipc_msg *hdr;
1682 struct sk_buff *skb;
1683 bool grp_evt;
1326 long timeout; 1684 long timeout;
1327 1685
1328 /* Catch invalid receive requests */ 1686 /* Catch invalid receive requests */
@@ -1336,8 +1694,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1336 } 1694 }
1337 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1695 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1338 1696
1697 /* Step rcv queue to first msg with data or error; wait if necessary */
1339 do { 1698 do {
1340 /* Look at first msg in receive queue; wait if necessary */
1341 rc = tipc_wait_for_rcvmsg(sock, &timeout); 1699 rc = tipc_wait_for_rcvmsg(sock, &timeout);
1342 if (unlikely(rc)) 1700 if (unlikely(rc))
1343 goto exit; 1701 goto exit;
@@ -1346,13 +1704,14 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1346 dlen = msg_data_sz(hdr); 1704 dlen = msg_data_sz(hdr);
1347 hlen = msg_hdr_sz(hdr); 1705 hlen = msg_hdr_sz(hdr);
1348 err = msg_errcode(hdr); 1706 err = msg_errcode(hdr);
1707 grp_evt = msg_is_grp_evt(hdr);
1349 if (likely(dlen || err)) 1708 if (likely(dlen || err))
1350 break; 1709 break;
1351 tsk_advance_rx_queue(sk); 1710 tsk_advance_rx_queue(sk);
1352 } while (1); 1711 } while (1);
1353 1712
1354 /* Collect msg meta data, including error code and rejected data */ 1713 /* Collect msg meta data, including error code and rejected data */
1355 set_orig_addr(m, hdr); 1714 tipc_sk_set_orig_addr(m, skb);
1356 rc = tipc_sk_anc_data_recv(m, hdr, tsk); 1715 rc = tipc_sk_anc_data_recv(m, hdr, tsk);
1357 if (unlikely(rc)) 1716 if (unlikely(rc))
1358 goto exit; 1717 goto exit;
@@ -1372,15 +1731,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1372 if (unlikely(rc)) 1731 if (unlikely(rc))
1373 goto exit; 1732 goto exit;
1374 1733
1734 /* Mark message as group event if applicable */
1735 if (unlikely(grp_evt)) {
1736 if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
1737 m->msg_flags |= MSG_EOR;
1738 m->msg_flags |= MSG_OOB;
1739 copy = 0;
1740 }
1741
1375 /* Caption of data or error code/rejected data was successful */ 1742 /* Caption of data or error code/rejected data was successful */
1376 if (unlikely(flags & MSG_PEEK)) 1743 if (unlikely(flags & MSG_PEEK))
1377 goto exit; 1744 goto exit;
1378 1745
1746 /* Send group flow control advertisement when applicable */
1747 if (tsk->group && msg_in_group(hdr) && !grp_evt) {
1748 skb_queue_head_init(&xmitq);
1749 tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
1750 msg_orignode(hdr), msg_origport(hdr),
1751 &xmitq);
1752 tipc_node_distr_xmit(sock_net(sk), &xmitq);
1753 }
1754
1379 tsk_advance_rx_queue(sk); 1755 tsk_advance_rx_queue(sk);
1756
1380 if (likely(!connected)) 1757 if (likely(!connected))
1381 goto exit; 1758 goto exit;
1382 1759
1383 /* Send connection flow control ack when applicable */ 1760 /* Send connection flow control advertisement when applicable */
1384 tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); 1761 tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
1385 if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) 1762 if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
1386 tipc_sk_send_ack(tsk); 1763 tipc_sk_send_ack(tsk);
@@ -1446,7 +1823,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
1446 1823
1447 /* Collect msg meta data, incl. error code and rejected data */ 1824 /* Collect msg meta data, incl. error code and rejected data */
1448 if (!copied) { 1825 if (!copied) {
1449 set_orig_addr(m, hdr); 1826 tipc_sk_set_orig_addr(m, skb);
1450 rc = tipc_sk_anc_data_recv(m, hdr, tsk); 1827 rc = tipc_sk_anc_data_recv(m, hdr, tsk);
1451 if (rc) 1828 if (rc)
1452 break; 1829 break;
@@ -1532,14 +1909,51 @@ static void tipc_sock_destruct(struct sock *sk)
1532 __skb_queue_purge(&sk->sk_receive_queue); 1909 __skb_queue_purge(&sk->sk_receive_queue);
1533} 1910}
1534 1911
1912static void tipc_sk_proto_rcv(struct sock *sk,
1913 struct sk_buff_head *inputq,
1914 struct sk_buff_head *xmitq)
1915{
1916 struct sk_buff *skb = __skb_dequeue(inputq);
1917 struct tipc_sock *tsk = tipc_sk(sk);
1918 struct tipc_msg *hdr = buf_msg(skb);
1919 struct tipc_group *grp = tsk->group;
1920 bool wakeup = false;
1921
1922 switch (msg_user(hdr)) {
1923 case CONN_MANAGER:
1924 tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
1925 return;
1926 case SOCK_WAKEUP:
1927 tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
1928 tsk->cong_link_cnt--;
1929 wakeup = true;
1930 break;
1931 case GROUP_PROTOCOL:
1932 tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
1933 break;
1934 case TOP_SRV:
1935 tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
1936 skb, inputq, xmitq);
1937 skb = NULL;
1938 break;
1939 default:
1940 break;
1941 }
1942
1943 if (wakeup)
1944 sk->sk_write_space(sk);
1945
1946 kfree_skb(skb);
1947}
1948
1535/** 1949/**
1536 * filter_connect - Handle all incoming messages for a connection-based socket 1950 * tipc_filter_connect - Handle incoming message for a connection-based socket
1537 * @tsk: TIPC socket 1951 * @tsk: TIPC socket
1538 * @skb: pointer to message buffer. Set to NULL if buffer is consumed 1952 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1539 * 1953 *
1540 * Returns true if everything ok, false otherwise 1954 * Returns true if everything ok, false otherwise
1541 */ 1955 */
1542static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) 1956static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1543{ 1957{
1544 struct sock *sk = &tsk->sk; 1958 struct sock *sk = &tsk->sk;
1545 struct net *net = sock_net(sk); 1959 struct net *net = sock_net(sk);
@@ -1643,6 +2057,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1643 struct tipc_sock *tsk = tipc_sk(sk); 2057 struct tipc_sock *tsk = tipc_sk(sk);
1644 struct tipc_msg *hdr = buf_msg(skb); 2058 struct tipc_msg *hdr = buf_msg(skb);
1645 2059
2060 if (unlikely(msg_in_group(hdr)))
2061 return sk->sk_rcvbuf;
2062
1646 if (unlikely(!msg_connected(hdr))) 2063 if (unlikely(!msg_connected(hdr)))
1647 return sk->sk_rcvbuf << msg_importance(hdr); 2064 return sk->sk_rcvbuf << msg_importance(hdr);
1648 2065
@@ -1653,7 +2070,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1653} 2070}
1654 2071
1655/** 2072/**
1656 * filter_rcv - validate incoming message 2073 * tipc_sk_filter_rcv - validate incoming message
1657 * @sk: socket 2074 * @sk: socket
1658 * @skb: pointer to message. 2075 * @skb: pointer to message.
1659 * 2076 *
@@ -1662,99 +2079,71 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1662 * 2079 *
1663 * Called with socket lock already taken 2080 * Called with socket lock already taken
1664 * 2081 *
1665 * Returns true if message was added to socket receive queue, otherwise false
1666 */ 2082 */
1667static bool filter_rcv(struct sock *sk, struct sk_buff *skb, 2083static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
1668 struct sk_buff_head *xmitq) 2084 struct sk_buff_head *xmitq)
1669{ 2085{
2086 bool sk_conn = !tipc_sk_type_connectionless(sk);
1670 struct tipc_sock *tsk = tipc_sk(sk); 2087 struct tipc_sock *tsk = tipc_sk(sk);
2088 struct tipc_group *grp = tsk->group;
1671 struct tipc_msg *hdr = buf_msg(skb); 2089 struct tipc_msg *hdr = buf_msg(skb);
1672 unsigned int limit = rcvbuf_limit(sk, skb); 2090 struct net *net = sock_net(sk);
1673 int err = TIPC_OK; 2091 struct sk_buff_head inputq;
1674 int usr = msg_user(hdr); 2092 int limit, err = TIPC_OK;
1675 u32 onode;
1676 2093
1677 if (unlikely(msg_user(hdr) == CONN_MANAGER)) { 2094 TIPC_SKB_CB(skb)->bytes_read = 0;
1678 tipc_sk_proto_rcv(tsk, skb, xmitq); 2095 __skb_queue_head_init(&inputq);
1679 return false; 2096 __skb_queue_tail(&inputq, skb);
1680 }
1681 2097
1682 if (unlikely(usr == SOCK_WAKEUP)) { 2098 if (unlikely(!msg_isdata(hdr)))
1683 onode = msg_orignode(hdr); 2099 tipc_sk_proto_rcv(sk, &inputq, xmitq);
1684 kfree_skb(skb);
1685 u32_del(&tsk->cong_links, onode);
1686 tsk->cong_link_cnt--;
1687 sk->sk_write_space(sk);
1688 return false;
1689 }
1690 2100
1691 /* Drop if illegal message type */ 2101 if (unlikely(grp))
1692 if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) { 2102 tipc_group_filter_msg(grp, &inputq, xmitq);
1693 kfree_skb(skb);
1694 return false;
1695 }
1696 2103
1697 /* Reject if wrong message type for current socket state */ 2104 /* Validate and add to receive buffer if there is space */
1698 if (tipc_sk_type_connectionless(sk)) { 2105 while ((skb = __skb_dequeue(&inputq))) {
1699 if (msg_connected(hdr)) { 2106 hdr = buf_msg(skb);
2107 limit = rcvbuf_limit(sk, skb);
2108 if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
2109 (!sk_conn && msg_connected(hdr)) ||
2110 (!grp && msg_in_group(hdr)))
1700 err = TIPC_ERR_NO_PORT; 2111 err = TIPC_ERR_NO_PORT;
1701 goto reject; 2112 else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
1702 } 2113 err = TIPC_ERR_OVERLOAD;
1703 } else if (unlikely(!filter_connect(tsk, skb))) {
1704 err = TIPC_ERR_NO_PORT;
1705 goto reject;
1706 }
1707 2114
1708 /* Reject message if there isn't room to queue it */ 2115 if (unlikely(err)) {
1709 if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) { 2116 tipc_skb_reject(net, err, skb, xmitq);
1710 err = TIPC_ERR_OVERLOAD; 2117 err = TIPC_OK;
1711 goto reject; 2118 continue;
2119 }
2120 __skb_queue_tail(&sk->sk_receive_queue, skb);
2121 skb_set_owner_r(skb, sk);
2122 sk->sk_data_ready(sk);
1712 } 2123 }
1713
1714 /* Enqueue message */
1715 TIPC_SKB_CB(skb)->bytes_read = 0;
1716 __skb_queue_tail(&sk->sk_receive_queue, skb);
1717 skb_set_owner_r(skb, sk);
1718
1719 sk->sk_data_ready(sk);
1720 return true;
1721
1722reject:
1723 if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
1724 __skb_queue_tail(xmitq, skb);
1725 return false;
1726} 2124}
1727 2125
1728/** 2126/**
1729 * tipc_backlog_rcv - handle incoming message from backlog queue 2127 * tipc_sk_backlog_rcv - handle incoming message from backlog queue
1730 * @sk: socket 2128 * @sk: socket
1731 * @skb: message 2129 * @skb: message
1732 * 2130 *
1733 * Caller must hold socket lock 2131 * Caller must hold socket lock
1734 *
1735 * Returns 0
1736 */ 2132 */
1737static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) 2133static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1738{ 2134{
1739 unsigned int truesize = skb->truesize; 2135 unsigned int before = sk_rmem_alloc_get(sk);
1740 struct sk_buff_head xmitq; 2136 struct sk_buff_head xmitq;
1741 u32 dnode, selector; 2137 unsigned int added;
1742 2138
1743 __skb_queue_head_init(&xmitq); 2139 __skb_queue_head_init(&xmitq);
1744 2140
1745 if (likely(filter_rcv(sk, skb, &xmitq))) { 2141 tipc_sk_filter_rcv(sk, skb, &xmitq);
1746 atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); 2142 added = sk_rmem_alloc_get(sk) - before;
1747 return 0; 2143 atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
1748 }
1749 2144
1750 if (skb_queue_empty(&xmitq)) 2145 /* Send pending response/rejected messages, if any */
1751 return 0; 2146 tipc_node_distr_xmit(sock_net(sk), &xmitq);
1752
1753 /* Send response/rejected message */
1754 skb = __skb_dequeue(&xmitq);
1755 dnode = msg_destnode(buf_msg(skb));
1756 selector = msg_origport(buf_msg(skb));
1757 tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
1758 return 0; 2147 return 0;
1759} 2148}
1760 2149
@@ -1786,7 +2175,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
1786 2175
1787 /* Add message directly to receive queue if possible */ 2176 /* Add message directly to receive queue if possible */
1788 if (!sock_owned_by_user(sk)) { 2177 if (!sock_owned_by_user(sk)) {
1789 filter_rcv(sk, skb, xmitq); 2178 tipc_sk_filter_rcv(sk, skb, xmitq);
1790 continue; 2179 continue;
1791 } 2180 }
1792 2181
@@ -1833,14 +2222,10 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1833 spin_unlock_bh(&sk->sk_lock.slock); 2222 spin_unlock_bh(&sk->sk_lock.slock);
1834 } 2223 }
1835 /* Send pending response/rejected messages, if any */ 2224 /* Send pending response/rejected messages, if any */
1836 while ((skb = __skb_dequeue(&xmitq))) { 2225 tipc_node_distr_xmit(sock_net(sk), &xmitq);
1837 dnode = msg_destnode(buf_msg(skb));
1838 tipc_node_xmit_skb(net, skb, dnode, dport);
1839 }
1840 sock_put(sk); 2226 sock_put(sk);
1841 continue; 2227 continue;
1842 } 2228 }
1843
1844 /* No destination socket => dequeue skb if still there */ 2229 /* No destination socket => dequeue skb if still there */
1845 skb = tipc_skb_dequeue(inputq, dport); 2230 skb = tipc_skb_dequeue(inputq, dport);
1846 if (!skb) 2231 if (!skb)
@@ -1903,28 +2288,32 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
1903 int previous; 2288 int previous;
1904 int res = 0; 2289 int res = 0;
1905 2290
2291 if (destlen != sizeof(struct sockaddr_tipc))
2292 return -EINVAL;
2293
1906 lock_sock(sk); 2294 lock_sock(sk);
1907 2295
1908 /* DGRAM/RDM connect(), just save the destaddr */ 2296 if (tsk->group) {
1909 if (tipc_sk_type_connectionless(sk)) { 2297 res = -EINVAL;
1910 if (dst->family == AF_UNSPEC) {
1911 memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1912 } else if (destlen != sizeof(struct sockaddr_tipc)) {
1913 res = -EINVAL;
1914 } else {
1915 memcpy(&tsk->peer, dest, destlen);
1916 }
1917 goto exit; 2298 goto exit;
1918 } 2299 }
1919 2300
1920 /* 2301 if (dst->family == AF_UNSPEC) {
1921 * Reject connection attempt using multicast address 2302 memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1922 * 2303 if (!tipc_sk_type_connectionless(sk))
1923 * Note: send_msg() validates the rest of the address fields, 2304 res = -EINVAL;
1924 * so there's no need to do it here 2305 goto exit;
1925 */ 2306 } else if (dst->family != AF_TIPC) {
1926 if (dst->addrtype == TIPC_ADDR_MCAST) {
1927 res = -EINVAL; 2307 res = -EINVAL;
2308 }
2309 if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
2310 res = -EINVAL;
2311 if (res)
2312 goto exit;
2313
2314 /* DGRAM/RDM connect(), just save the destaddr */
2315 if (tipc_sk_type_connectionless(sk)) {
2316 memcpy(&tsk->peer, dest, destlen);
1928 goto exit; 2317 goto exit;
1929 } 2318 }
1930 2319
@@ -2141,46 +2530,43 @@ static int tipc_shutdown(struct socket *sock, int how)
2141 return res; 2530 return res;
2142} 2531}
2143 2532
2144static void tipc_sk_timeout(unsigned long data) 2533static void tipc_sk_timeout(struct timer_list *t)
2145{ 2534{
2146 struct tipc_sock *tsk = (struct tipc_sock *)data; 2535 struct sock *sk = from_timer(sk, t, sk_timer);
2147 struct sock *sk = &tsk->sk; 2536 struct tipc_sock *tsk = tipc_sk(sk);
2148 struct sk_buff *skb = NULL; 2537 u32 peer_port = tsk_peer_port(tsk);
2149 u32 peer_port, peer_node; 2538 u32 peer_node = tsk_peer_node(tsk);
2150 u32 own_node = tsk_own_node(tsk); 2539 u32 own_node = tsk_own_node(tsk);
2540 u32 own_port = tsk->portid;
2541 struct net *net = sock_net(sk);
2542 struct sk_buff *skb = NULL;
2151 2543
2152 bh_lock_sock(sk); 2544 bh_lock_sock(sk);
2153 if (!tipc_sk_connected(sk)) { 2545 if (!tipc_sk_connected(sk))
2154 bh_unlock_sock(sk); 2546 goto exit;
2547
2548 /* Try again later if socket is busy */
2549 if (sock_owned_by_user(sk)) {
2550 sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
2155 goto exit; 2551 goto exit;
2156 } 2552 }
2157 peer_port = tsk_peer_port(tsk);
2158 peer_node = tsk_peer_node(tsk);
2159 2553
2160 if (tsk->probe_unacked) { 2554 if (tsk->probe_unacked) {
2161 if (!sock_owned_by_user(sk)) { 2555 tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2162 tipc_set_sk_state(sk, TIPC_DISCONNECTING); 2556 tipc_node_remove_conn(net, peer_node, peer_port);
2163 tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), 2557 sk->sk_state_change(sk);
2164 tsk_peer_port(tsk));
2165 sk->sk_state_change(sk);
2166 } else {
2167 /* Try again later */
2168 sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
2169 }
2170
2171 bh_unlock_sock(sk);
2172 goto exit; 2558 goto exit;
2173 } 2559 }
2174 2560 /* Send new probe */
2175 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, 2561 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
2176 INT_H_SIZE, 0, peer_node, own_node, 2562 peer_node, own_node, peer_port, own_port,
2177 peer_port, tsk->portid, TIPC_OK); 2563 TIPC_OK);
2178 tsk->probe_unacked = true; 2564 tsk->probe_unacked = true;
2179 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); 2565 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
2566exit:
2180 bh_unlock_sock(sk); 2567 bh_unlock_sock(sk);
2181 if (skb) 2568 if (skb)
2182 tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); 2569 tipc_node_xmit_skb(net, skb, peer_node, own_port);
2183exit:
2184 sock_put(sk); 2570 sock_put(sk);
2185} 2571}
2186 2572
@@ -2345,6 +2731,58 @@ void tipc_sk_rht_destroy(struct net *net)
2345 rhashtable_destroy(&tn->sk_rht); 2731 rhashtable_destroy(&tn->sk_rht);
2346} 2732}
2347 2733
2734static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
2735{
2736 struct net *net = sock_net(&tsk->sk);
2737 u32 domain = addr_domain(net, mreq->scope);
2738 struct tipc_group *grp = tsk->group;
2739 struct tipc_msg *hdr = &tsk->phdr;
2740 struct tipc_name_seq seq;
2741 int rc;
2742
2743 if (mreq->type < TIPC_RESERVED_TYPES)
2744 return -EACCES;
2745 if (grp)
2746 return -EACCES;
2747 grp = tipc_group_create(net, tsk->portid, mreq);
2748 if (!grp)
2749 return -ENOMEM;
2750 tsk->group = grp;
2751 msg_set_lookup_scope(hdr, mreq->scope);
2752 msg_set_nametype(hdr, mreq->type);
2753 msg_set_dest_droppable(hdr, true);
2754 seq.type = mreq->type;
2755 seq.lower = mreq->instance;
2756 seq.upper = seq.lower;
2757 tipc_nametbl_build_group(net, grp, mreq->type, domain);
2758 rc = tipc_sk_publish(tsk, mreq->scope, &seq);
2759 if (rc) {
2760 tipc_group_delete(net, grp);
2761 tsk->group = NULL;
2762 }
2763
2764 /* Eliminate any risk that a broadcast overtakes the sent JOIN */
2765 tsk->mc_method.rcast = true;
2766 tsk->mc_method.mandatory = true;
2767 return rc;
2768}
2769
2770static int tipc_sk_leave(struct tipc_sock *tsk)
2771{
2772 struct net *net = sock_net(&tsk->sk);
2773 struct tipc_group *grp = tsk->group;
2774 struct tipc_name_seq seq;
2775 int scope;
2776
2777 if (!grp)
2778 return -EINVAL;
2779 tipc_group_self(grp, &seq, &scope);
2780 tipc_group_delete(net, grp);
2781 tsk->group = NULL;
2782 tipc_sk_withdraw(tsk, scope, &seq);
2783 return 0;
2784}
2785
2348/** 2786/**
2349 * tipc_setsockopt - set socket option 2787 * tipc_setsockopt - set socket option
2350 * @sock: socket structure 2788 * @sock: socket structure
@@ -2363,6 +2801,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
2363{ 2801{
2364 struct sock *sk = sock->sk; 2802 struct sock *sk = sock->sk;
2365 struct tipc_sock *tsk = tipc_sk(sk); 2803 struct tipc_sock *tsk = tipc_sk(sk);
2804 struct tipc_group_req mreq;
2366 u32 value = 0; 2805 u32 value = 0;
2367 int res = 0; 2806 int res = 0;
2368 2807
@@ -2378,9 +2817,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
2378 case TIPC_CONN_TIMEOUT: 2817 case TIPC_CONN_TIMEOUT:
2379 if (ol < sizeof(value)) 2818 if (ol < sizeof(value))
2380 return -EINVAL; 2819 return -EINVAL;
2381 res = get_user(value, (u32 __user *)ov); 2820 if (get_user(value, (u32 __user *)ov))
2382 if (res) 2821 return -EFAULT;
2383 return res; 2822 break;
2823 case TIPC_GROUP_JOIN:
2824 if (ol < sizeof(mreq))
2825 return -EINVAL;
2826 if (copy_from_user(&mreq, ov, sizeof(mreq)))
2827 return -EFAULT;
2384 break; 2828 break;
2385 default: 2829 default:
2386 if (ov || ol) 2830 if (ov || ol)
@@ -2413,6 +2857,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
2413 tsk->mc_method.rcast = true; 2857 tsk->mc_method.rcast = true;
2414 tsk->mc_method.mandatory = true; 2858 tsk->mc_method.mandatory = true;
2415 break; 2859 break;
2860 case TIPC_GROUP_JOIN:
2861 res = tipc_sk_join(tsk, &mreq);
2862 break;
2863 case TIPC_GROUP_LEAVE:
2864 res = tipc_sk_leave(tsk);
2865 break;
2416 default: 2866 default:
2417 res = -EINVAL; 2867 res = -EINVAL;
2418 } 2868 }
@@ -2440,7 +2890,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
2440{ 2890{
2441 struct sock *sk = sock->sk; 2891 struct sock *sk = sock->sk;
2442 struct tipc_sock *tsk = tipc_sk(sk); 2892 struct tipc_sock *tsk = tipc_sk(sk);
2443 int len; 2893 struct tipc_name_seq seq;
2894 int len, scope;
2444 u32 value; 2895 u32 value;
2445 int res; 2896 int res;
2446 2897
@@ -2474,6 +2925,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
2474 case TIPC_SOCK_RECVQ_DEPTH: 2925 case TIPC_SOCK_RECVQ_DEPTH:
2475 value = skb_queue_len(&sk->sk_receive_queue); 2926 value = skb_queue_len(&sk->sk_receive_queue);
2476 break; 2927 break;
2928 case TIPC_GROUP_JOIN:
2929 seq.type = 0;
2930 if (tsk->group)
2931 tipc_group_self(tsk->group, &seq, &scope);
2932 value = seq.type;
2933 break;
2477 default: 2934 default:
2478 res = -EINVAL; 2935 res = -EINVAL;
2479 } 2936 }
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index be3d9e3183dc..251065dfd8df 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -133,9 +133,9 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
133 node); 133 node);
134} 134}
135 135
136static void tipc_subscrp_timeout(unsigned long data) 136static void tipc_subscrp_timeout(struct timer_list *t)
137{ 137{
138 struct tipc_subscription *sub = (struct tipc_subscription *)data; 138 struct tipc_subscription *sub = from_timer(sub, t, timer);
139 struct tipc_subscriber *subscriber = sub->subscriber; 139 struct tipc_subscriber *subscriber = sub->subscriber;
140 140
141 spin_lock_bh(&subscriber->lock); 141 spin_lock_bh(&subscriber->lock);
@@ -303,7 +303,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
303 tipc_subscrb_get(subscriber); 303 tipc_subscrb_get(subscriber);
304 spin_unlock_bh(&subscriber->lock); 304 spin_unlock_bh(&subscriber->lock);
305 305
306 setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); 306 timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
307 timeout = htohl(sub->evt.s.timeout, swap); 307 timeout = htohl(sub->evt.s.timeout, swap);
308 308
309 if (timeout != TIPC_WAIT_FOREVER) 309 if (timeout != TIPC_WAIT_FOREVER)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 60aff60e30ad..e07ee3ae0023 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -45,8 +45,18 @@ MODULE_AUTHOR("Mellanox Technologies");
45MODULE_DESCRIPTION("Transport Layer Security Support"); 45MODULE_DESCRIPTION("Transport Layer Security Support");
46MODULE_LICENSE("Dual BSD/GPL"); 46MODULE_LICENSE("Dual BSD/GPL");
47 47
48static struct proto tls_base_prot; 48enum {
49static struct proto tls_sw_prot; 49 TLS_BASE_TX,
50 TLS_SW_TX,
51 TLS_NUM_CONFIG,
52};
53
54static struct proto tls_prots[TLS_NUM_CONFIG];
55
56static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
57{
58 sk->sk_prot = &tls_prots[ctx->tx_conf];
59}
50 60
51int wait_on_pending_writer(struct sock *sk, long *timeo) 61int wait_on_pending_writer(struct sock *sk, long *timeo)
52{ 62{
@@ -216,6 +226,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
216 void (*sk_proto_close)(struct sock *sk, long timeout); 226 void (*sk_proto_close)(struct sock *sk, long timeout);
217 227
218 lock_sock(sk); 228 lock_sock(sk);
229 sk_proto_close = ctx->sk_proto_close;
230
231 if (ctx->tx_conf == TLS_BASE_TX) {
232 kfree(ctx);
233 goto skip_tx_cleanup;
234 }
219 235
220 if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) 236 if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
221 tls_handle_open_record(sk, 0); 237 tls_handle_open_record(sk, 0);
@@ -232,13 +248,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
232 sg++; 248 sg++;
233 } 249 }
234 } 250 }
235 ctx->free_resources(sk); 251
236 kfree(ctx->rec_seq); 252 kfree(ctx->rec_seq);
237 kfree(ctx->iv); 253 kfree(ctx->iv);
238 254
239 sk_proto_close = ctx->sk_proto_close; 255 if (ctx->tx_conf == TLS_SW_TX)
240 kfree(ctx); 256 tls_sw_free_tx_resources(sk);
241 257
258skip_tx_cleanup:
242 release_sock(sk); 259 release_sock(sk);
243 sk_proto_close(sk, timeout); 260 sk_proto_close(sk, timeout);
244} 261}
@@ -338,46 +355,41 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
338static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, 355static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
339 unsigned int optlen) 356 unsigned int optlen)
340{ 357{
341 struct tls_crypto_info *crypto_info, tmp_crypto_info; 358 struct tls_crypto_info *crypto_info;
342 struct tls_context *ctx = tls_get_ctx(sk); 359 struct tls_context *ctx = tls_get_ctx(sk);
343 struct proto *prot = NULL;
344 int rc = 0; 360 int rc = 0;
361 int tx_conf;
345 362
346 if (!optval || (optlen < sizeof(*crypto_info))) { 363 if (!optval || (optlen < sizeof(*crypto_info))) {
347 rc = -EINVAL; 364 rc = -EINVAL;
348 goto out; 365 goto out;
349 } 366 }
350 367
351 rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info)); 368 crypto_info = &ctx->crypto_send;
369 /* Currently we don't support set crypto info more than one time */
370 if (TLS_CRYPTO_INFO_READY(crypto_info))
371 goto out;
372
373 rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
352 if (rc) { 374 if (rc) {
353 rc = -EFAULT; 375 rc = -EFAULT;
354 goto out; 376 goto out;
355 } 377 }
356 378
357 /* check version */ 379 /* check version */
358 if (tmp_crypto_info.version != TLS_1_2_VERSION) { 380 if (crypto_info->version != TLS_1_2_VERSION) {
359 rc = -ENOTSUPP; 381 rc = -ENOTSUPP;
360 goto out; 382 goto err_crypto_info;
361 } 383 }
362 384
363 /* get user crypto info */ 385 switch (crypto_info->cipher_type) {
364 crypto_info = &ctx->crypto_send;
365
366 /* Currently we don't support set crypto info more than one time */
367 if (TLS_CRYPTO_INFO_READY(crypto_info))
368 goto out;
369
370 switch (tmp_crypto_info.cipher_type) {
371 case TLS_CIPHER_AES_GCM_128: { 386 case TLS_CIPHER_AES_GCM_128: {
372 if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { 387 if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
373 rc = -EINVAL; 388 rc = -EINVAL;
374 goto out; 389 goto out;
375 } 390 }
376 rc = copy_from_user( 391 rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
377 crypto_info, 392 optlen - sizeof(*crypto_info));
378 optval,
379 sizeof(struct tls12_crypto_info_aes_gcm_128));
380
381 if (rc) { 393 if (rc) {
382 rc = -EFAULT; 394 rc = -EFAULT;
383 goto err_crypto_info; 395 goto err_crypto_info;
@@ -389,18 +401,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
389 goto out; 401 goto out;
390 } 402 }
391 403
392 ctx->sk_write_space = sk->sk_write_space;
393 sk->sk_write_space = tls_write_space;
394
395 ctx->sk_proto_close = sk->sk_prot->close;
396
397 /* currently SW is default, we will have ethtool in future */ 404 /* currently SW is default, we will have ethtool in future */
398 rc = tls_set_sw_offload(sk, ctx); 405 rc = tls_set_sw_offload(sk, ctx);
399 prot = &tls_sw_prot; 406 tx_conf = TLS_SW_TX;
400 if (rc) 407 if (rc)
401 goto err_crypto_info; 408 goto err_crypto_info;
402 409
403 sk->sk_prot = prot; 410 ctx->tx_conf = tx_conf;
411 update_sk_prot(sk, ctx);
412 ctx->sk_write_space = sk->sk_write_space;
413 sk->sk_write_space = tls_write_space;
404 goto out; 414 goto out;
405 415
406err_crypto_info: 416err_crypto_info:
@@ -453,7 +463,10 @@ static int tls_init(struct sock *sk)
453 icsk->icsk_ulp_data = ctx; 463 icsk->icsk_ulp_data = ctx;
454 ctx->setsockopt = sk->sk_prot->setsockopt; 464 ctx->setsockopt = sk->sk_prot->setsockopt;
455 ctx->getsockopt = sk->sk_prot->getsockopt; 465 ctx->getsockopt = sk->sk_prot->getsockopt;
456 sk->sk_prot = &tls_base_prot; 466 ctx->sk_proto_close = sk->sk_prot->close;
467
468 ctx->tx_conf = TLS_BASE_TX;
469 update_sk_prot(sk, ctx);
457out: 470out:
458 return rc; 471 return rc;
459} 472}
@@ -464,16 +477,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
464 .init = tls_init, 477 .init = tls_init,
465}; 478};
466 479
480static void build_protos(struct proto *prot, struct proto *base)
481{
482 prot[TLS_BASE_TX] = *base;
483 prot[TLS_BASE_TX].setsockopt = tls_setsockopt;
484 prot[TLS_BASE_TX].getsockopt = tls_getsockopt;
485 prot[TLS_BASE_TX].close = tls_sk_proto_close;
486
487 prot[TLS_SW_TX] = prot[TLS_BASE_TX];
488 prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
489 prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
490}
491
467static int __init tls_register(void) 492static int __init tls_register(void)
468{ 493{
469 tls_base_prot = tcp_prot; 494 build_protos(tls_prots, &tcp_prot);
470 tls_base_prot.setsockopt = tls_setsockopt;
471 tls_base_prot.getsockopt = tls_getsockopt;
472
473 tls_sw_prot = tls_base_prot;
474 tls_sw_prot.sendmsg = tls_sw_sendmsg;
475 tls_sw_prot.sendpage = tls_sw_sendpage;
476 tls_sw_prot.close = tls_sk_proto_close;
477 495
478 tcp_register_ulp(&tcp_tls_ulp_ops); 496 tcp_register_ulp(&tcp_tls_ulp_ops);
479 497
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7d80040a37b6..73d19210dd49 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -39,22 +39,6 @@
39 39
40#include <net/tls.h> 40#include <net/tls.h>
41 41
42static inline void tls_make_aad(int recv,
43 char *buf,
44 size_t size,
45 char *record_sequence,
46 int record_sequence_size,
47 unsigned char record_type)
48{
49 memcpy(buf, record_sequence, record_sequence_size);
50
51 buf[8] = record_type;
52 buf[9] = TLS_1_2_VERSION_MAJOR;
53 buf[10] = TLS_1_2_VERSION_MINOR;
54 buf[11] = size >> 8;
55 buf[12] = size & 0xFF;
56}
57
58static void trim_sg(struct sock *sk, struct scatterlist *sg, 42static void trim_sg(struct sock *sk, struct scatterlist *sg,
59 int *sg_num_elem, unsigned int *sg_size, int target_size) 43 int *sg_num_elem, unsigned int *sg_size, int target_size)
60{ 44{
@@ -219,7 +203,7 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
219 struct aead_request *aead_req; 203 struct aead_request *aead_req;
220 int rc; 204 int rc;
221 205
222 aead_req = kmalloc(req_size, flags); 206 aead_req = kzalloc(req_size, flags);
223 if (!aead_req) 207 if (!aead_req)
224 return -ENOMEM; 208 return -ENOMEM;
225 209
@@ -249,7 +233,7 @@ static int tls_push_record(struct sock *sk, int flags,
249 sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1); 233 sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
250 sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1); 234 sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
251 235
252 tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size, 236 tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
253 tls_ctx->rec_seq, tls_ctx->rec_seq_size, 237 tls_ctx->rec_seq, tls_ctx->rec_seq_size,
254 record_type); 238 record_type);
255 239
@@ -639,7 +623,7 @@ sendpage_end:
639 return ret; 623 return ret;
640} 624}
641 625
642static void tls_sw_free_resources(struct sock *sk) 626void tls_sw_free_tx_resources(struct sock *sk)
643{ 627{
644 struct tls_context *tls_ctx = tls_get_ctx(sk); 628 struct tls_context *tls_ctx = tls_get_ctx(sk);
645 struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); 629 struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
@@ -650,6 +634,7 @@ static void tls_sw_free_resources(struct sock *sk)
650 tls_free_both_sg(sk); 634 tls_free_both_sg(sk);
651 635
652 kfree(ctx); 636 kfree(ctx);
637 kfree(tls_ctx);
653} 638}
654 639
655int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) 640int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
@@ -679,7 +664,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
679 } 664 }
680 665
681 ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; 666 ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
682 ctx->free_resources = tls_sw_free_resources;
683 667
684 crypto_info = &ctx->crypto_send; 668 crypto_info = &ctx->crypto_send;
685 switch (crypto_info->cipher_type) { 669 switch (crypto_info->cipher_type) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7f46bab4ce5c..a9ee634f3c42 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
814 */ 814 */
815 case SOCK_RAW: 815 case SOCK_RAW:
816 sock->type = SOCK_DGRAM; 816 sock->type = SOCK_DGRAM;
817 /* fall through */
817 case SOCK_DGRAM: 818 case SOCK_DGRAM:
818 sock->ops = &unix_dgram_ops; 819 sock->ops = &unix_dgram_ops;
819 break; 820 break;
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index a24369d175fd..970f96489fe7 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -15,6 +15,16 @@ config VSOCKETS
15 To compile this driver as a module, choose M here: the module 15 To compile this driver as a module, choose M here: the module
16 will be called vsock. If unsure, say N. 16 will be called vsock. If unsure, say N.
17 17
18config VSOCKETS_DIAG
19 tristate "Virtual Sockets monitoring interface"
20 depends on VSOCKETS
21 default y
22 help
23 Support for PF_VSOCK sockets monitoring interface used by the ss tool.
24 If unsure, say Y.
25
26 Enable this module so userspace applications can query open sockets.
27
18config VMWARE_VMCI_VSOCKETS 28config VMWARE_VMCI_VSOCKETS
19 tristate "VMware VMCI transport for Virtual Sockets" 29 tristate "VMware VMCI transport for Virtual Sockets"
20 depends on VSOCKETS && VMWARE_VMCI 30 depends on VSOCKETS && VMWARE_VMCI
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 30a263320e4f..7c6f9a0b67b0 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,5 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-$(CONFIG_VSOCKETS) += vsock.o 2obj-$(CONFIG_VSOCKETS) += vsock.o
3obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o
3obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o 4obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
4obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o 5obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
5obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o 6obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
@@ -7,6 +8,8 @@ obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
7 8
8vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o 9vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
9 10
11vsock_diag-y += diag.o
12
10vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ 13vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
11 vmci_transport_notify_qstate.o 14 vmci_transport_notify_qstate.o
12 15
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dfc8c51e4d74..5d28abf87fbf 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,7 +36,7 @@
36 * not support simultaneous connects (two "client" sockets connecting). 36 * not support simultaneous connects (two "client" sockets connecting).
37 * 37 *
38 * - "Server" sockets are referred to as listener sockets throughout this 38 * - "Server" sockets are referred to as listener sockets throughout this
39 * implementation because they are in the VSOCK_SS_LISTEN state. When a 39 * implementation because they are in the TCP_LISTEN state. When a
40 * connection request is received (the second kind of socket mentioned above), 40 * connection request is received (the second kind of socket mentioned above),
41 * we create a new socket and refer to it as a pending socket. These pending 41 * we create a new socket and refer to it as a pending socket. These pending
42 * sockets are placed on the pending connection list of the listener socket. 42 * sockets are placed on the pending connection list of the listener socket.
@@ -82,6 +82,15 @@
82 * argument, we must ensure the reference count is increased to ensure the 82 * argument, we must ensure the reference count is increased to ensure the
83 * socket isn't freed before the function is run; the deferred function will 83 * socket isn't freed before the function is run; the deferred function will
84 * then drop the reference. 84 * then drop the reference.
85 *
86 * - sk->sk_state uses the TCP state constants because they are widely used by
87 * other address families and exposed to userspace tools like ss(8):
88 *
89 * TCP_CLOSE - unconnected
90 * TCP_SYN_SENT - connecting
91 * TCP_ESTABLISHED - connected
92 * TCP_CLOSING - disconnecting
93 * TCP_LISTEN - listening
85 */ 94 */
86 95
87#include <linux/types.h> 96#include <linux/types.h>
@@ -153,7 +162,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
153 * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function 162 * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
154 * mods with VSOCK_HASH_SIZE to ensure this. 163 * mods with VSOCK_HASH_SIZE to ensure this.
155 */ 164 */
156#define VSOCK_HASH_SIZE 251
157#define MAX_PORT_RETRIES 24 165#define MAX_PORT_RETRIES 24
158 166
159#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) 167#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
@@ -168,9 +176,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
168#define vsock_connected_sockets_vsk(vsk) \ 176#define vsock_connected_sockets_vsk(vsk) \
169 vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) 177 vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
170 178
171static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; 179struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
172static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; 180EXPORT_SYMBOL_GPL(vsock_bind_table);
173static DEFINE_SPINLOCK(vsock_table_lock); 181struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
182EXPORT_SYMBOL_GPL(vsock_connected_table);
183DEFINE_SPINLOCK(vsock_table_lock);
184EXPORT_SYMBOL_GPL(vsock_table_lock);
174 185
175/* Autobind this socket to the local address if necessary. */ 186/* Autobind this socket to the local address if necessary. */
176static int vsock_auto_bind(struct vsock_sock *vsk) 187static int vsock_auto_bind(struct vsock_sock *vsk)
@@ -184,7 +195,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk)
184 return __vsock_bind(sk, &local_addr); 195 return __vsock_bind(sk, &local_addr);
185} 196}
186 197
187static void vsock_init_tables(void) 198static int __init vsock_init_tables(void)
188{ 199{
189 int i; 200 int i;
190 201
@@ -193,6 +204,7 @@ static void vsock_init_tables(void)
193 204
194 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) 205 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
195 INIT_LIST_HEAD(&vsock_connected_table[i]); 206 INIT_LIST_HEAD(&vsock_connected_table[i]);
207 return 0;
196} 208}
197 209
198static void __vsock_insert_bound(struct list_head *list, 210static void __vsock_insert_bound(struct list_head *list,
@@ -248,16 +260,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
248 return NULL; 260 return NULL;
249} 261}
250 262
251static bool __vsock_in_bound_table(struct vsock_sock *vsk)
252{
253 return !list_empty(&vsk->bound_table);
254}
255
256static bool __vsock_in_connected_table(struct vsock_sock *vsk)
257{
258 return !list_empty(&vsk->connected_table);
259}
260
261static void vsock_insert_unbound(struct vsock_sock *vsk) 263static void vsock_insert_unbound(struct vsock_sock *vsk)
262{ 264{
263 spin_lock_bh(&vsock_table_lock); 265 spin_lock_bh(&vsock_table_lock);
@@ -485,7 +487,7 @@ void vsock_pending_work(struct work_struct *work)
485 if (vsock_in_connected_table(vsk)) 487 if (vsock_in_connected_table(vsk))
486 vsock_remove_connected(vsk); 488 vsock_remove_connected(vsk);
487 489
488 sk->sk_state = SS_FREE; 490 sk->sk_state = TCP_CLOSE;
489 491
490out: 492out:
491 release_sock(sk); 493 release_sock(sk);
@@ -625,7 +627,6 @@ struct sock *__vsock_create(struct net *net,
625 627
626 sk->sk_destruct = vsock_sk_destruct; 628 sk->sk_destruct = vsock_sk_destruct;
627 sk->sk_backlog_rcv = vsock_queue_rcv_skb; 629 sk->sk_backlog_rcv = vsock_queue_rcv_skb;
628 sk->sk_state = 0;
629 sock_reset_flag(sk, SOCK_DONE); 630 sock_reset_flag(sk, SOCK_DONE);
630 631
631 INIT_LIST_HEAD(&vsk->bound_table); 632 INIT_LIST_HEAD(&vsk->bound_table);
@@ -899,7 +900,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
899 /* Listening sockets that have connections in their accept 900 /* Listening sockets that have connections in their accept
900 * queue can be read. 901 * queue can be read.
901 */ 902 */
902 if (sk->sk_state == VSOCK_SS_LISTEN 903 if (sk->sk_state == TCP_LISTEN
903 && !vsock_is_accept_queue_empty(sk)) 904 && !vsock_is_accept_queue_empty(sk))
904 mask |= POLLIN | POLLRDNORM; 905 mask |= POLLIN | POLLRDNORM;
905 906
@@ -928,7 +929,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
928 } 929 }
929 930
930 /* Connected sockets that can produce data can be written. */ 931 /* Connected sockets that can produce data can be written. */
931 if (sk->sk_state == SS_CONNECTED) { 932 if (sk->sk_state == TCP_ESTABLISHED) {
932 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 933 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
933 bool space_avail_now = false; 934 bool space_avail_now = false;
934 int ret = transport->notify_poll_out( 935 int ret = transport->notify_poll_out(
@@ -950,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
950 * POLLOUT|POLLWRNORM when peer is closed and nothing to read, 951 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
951 * but local send is not shutdown. 952 * but local send is not shutdown.
952 */ 953 */
953 if (sk->sk_state == SS_UNCONNECTED) { 954 if (sk->sk_state == TCP_CLOSE) {
954 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) 955 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
955 mask |= POLLOUT | POLLWRNORM; 956 mask |= POLLOUT | POLLWRNORM;
956 957
@@ -1120,9 +1121,9 @@ static void vsock_connect_timeout(struct work_struct *work)
1120 sk = sk_vsock(vsk); 1121 sk = sk_vsock(vsk);
1121 1122
1122 lock_sock(sk); 1123 lock_sock(sk);
1123 if (sk->sk_state == SS_CONNECTING && 1124 if (sk->sk_state == TCP_SYN_SENT &&
1124 (sk->sk_shutdown != SHUTDOWN_MASK)) { 1125 (sk->sk_shutdown != SHUTDOWN_MASK)) {
1125 sk->sk_state = SS_UNCONNECTED; 1126 sk->sk_state = TCP_CLOSE;
1126 sk->sk_err = ETIMEDOUT; 1127 sk->sk_err = ETIMEDOUT;
1127 sk->sk_error_report(sk); 1128 sk->sk_error_report(sk);
1128 cancel = 1; 1129 cancel = 1;
@@ -1168,7 +1169,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1168 err = -EALREADY; 1169 err = -EALREADY;
1169 break; 1170 break;
1170 default: 1171 default:
1171 if ((sk->sk_state == VSOCK_SS_LISTEN) || 1172 if ((sk->sk_state == TCP_LISTEN) ||
1172 vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { 1173 vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
1173 err = -EINVAL; 1174 err = -EINVAL;
1174 goto out; 1175 goto out;
@@ -1191,7 +1192,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1191 if (err) 1192 if (err)
1192 goto out; 1193 goto out;
1193 1194
1194 sk->sk_state = SS_CONNECTING; 1195 sk->sk_state = TCP_SYN_SENT;
1195 1196
1196 err = transport->connect(vsk); 1197 err = transport->connect(vsk);
1197 if (err < 0) 1198 if (err < 0)
@@ -1211,7 +1212,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1211 timeout = vsk->connect_timeout; 1212 timeout = vsk->connect_timeout;
1212 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1213 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1213 1214
1214 while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { 1215 while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
1215 if (flags & O_NONBLOCK) { 1216 if (flags & O_NONBLOCK) {
1216 /* If we're not going to block, we schedule a timeout 1217 /* If we're not going to block, we schedule a timeout
1217 * function to generate a timeout on the connection 1218 * function to generate a timeout on the connection
@@ -1234,13 +1235,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1234 1235
1235 if (signal_pending(current)) { 1236 if (signal_pending(current)) {
1236 err = sock_intr_errno(timeout); 1237 err = sock_intr_errno(timeout);
1237 sk->sk_state = SS_UNCONNECTED; 1238 sk->sk_state = TCP_CLOSE;
1238 sock->state = SS_UNCONNECTED; 1239 sock->state = SS_UNCONNECTED;
1239 vsock_transport_cancel_pkt(vsk); 1240 vsock_transport_cancel_pkt(vsk);
1240 goto out_wait; 1241 goto out_wait;
1241 } else if (timeout == 0) { 1242 } else if (timeout == 0) {
1242 err = -ETIMEDOUT; 1243 err = -ETIMEDOUT;
1243 sk->sk_state = SS_UNCONNECTED; 1244 sk->sk_state = TCP_CLOSE;
1244 sock->state = SS_UNCONNECTED; 1245 sock->state = SS_UNCONNECTED;
1245 vsock_transport_cancel_pkt(vsk); 1246 vsock_transport_cancel_pkt(vsk);
1246 goto out_wait; 1247 goto out_wait;
@@ -1251,7 +1252,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1251 1252
1252 if (sk->sk_err) { 1253 if (sk->sk_err) {
1253 err = -sk->sk_err; 1254 err = -sk->sk_err;
1254 sk->sk_state = SS_UNCONNECTED; 1255 sk->sk_state = TCP_CLOSE;
1255 sock->state = SS_UNCONNECTED; 1256 sock->state = SS_UNCONNECTED;
1256 } else { 1257 } else {
1257 err = 0; 1258 err = 0;
@@ -1284,7 +1285,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
1284 goto out; 1285 goto out;
1285 } 1286 }
1286 1287
1287 if (listener->sk_state != VSOCK_SS_LISTEN) { 1288 if (listener->sk_state != TCP_LISTEN) {
1288 err = -EINVAL; 1289 err = -EINVAL;
1289 goto out; 1290 goto out;
1290 } 1291 }
@@ -1374,7 +1375,7 @@ static int vsock_listen(struct socket *sock, int backlog)
1374 } 1375 }
1375 1376
1376 sk->sk_max_ack_backlog = backlog; 1377 sk->sk_max_ack_backlog = backlog;
1377 sk->sk_state = VSOCK_SS_LISTEN; 1378 sk->sk_state = TCP_LISTEN;
1378 1379
1379 err = 0; 1380 err = 0;
1380 1381
@@ -1554,7 +1555,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1554 1555
1555 /* Callers should not provide a destination with stream sockets. */ 1556 /* Callers should not provide a destination with stream sockets. */
1556 if (msg->msg_namelen) { 1557 if (msg->msg_namelen) {
1557 err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; 1558 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1558 goto out; 1559 goto out;
1559 } 1560 }
1560 1561
@@ -1565,7 +1566,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1565 goto out; 1566 goto out;
1566 } 1567 }
1567 1568
1568 if (sk->sk_state != SS_CONNECTED || 1569 if (sk->sk_state != TCP_ESTABLISHED ||
1569 !vsock_addr_bound(&vsk->local_addr)) { 1570 !vsock_addr_bound(&vsk->local_addr)) {
1570 err = -ENOTCONN; 1571 err = -ENOTCONN;
1571 goto out; 1572 goto out;
@@ -1689,7 +1690,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1689 1690
1690 lock_sock(sk); 1691 lock_sock(sk);
1691 1692
1692 if (sk->sk_state != SS_CONNECTED) { 1693 if (sk->sk_state != TCP_ESTABLISHED) {
1693 /* Recvmsg is supposed to return 0 if a peer performs an 1694 /* Recvmsg is supposed to return 0 if a peer performs an
1694 * orderly shutdown. Differentiate between that case and when a 1695 * orderly shutdown. Differentiate between that case and when a
1695 * peer has not connected or a local shutdown occured with the 1696 * peer has not connected or a local shutdown occured with the
@@ -1957,8 +1958,6 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
1957 vsock_proto.owner = owner; 1958 vsock_proto.owner = owner;
1958 transport = t; 1959 transport = t;
1959 1960
1960 vsock_init_tables();
1961
1962 vsock_device.minor = MISC_DYNAMIC_MINOR; 1961 vsock_device.minor = MISC_DYNAMIC_MINOR;
1963 err = misc_register(&vsock_device); 1962 err = misc_register(&vsock_device);
1964 if (err) { 1963 if (err) {
@@ -2019,6 +2018,8 @@ const struct vsock_transport *vsock_core_get_transport(void)
2019} 2018}
2020EXPORT_SYMBOL_GPL(vsock_core_get_transport); 2019EXPORT_SYMBOL_GPL(vsock_core_get_transport);
2021 2020
2021module_init(vsock_init_tables);
2022
2022MODULE_AUTHOR("VMware, Inc."); 2023MODULE_AUTHOR("VMware, Inc.");
2023MODULE_DESCRIPTION("VMware Virtual Socket Family"); 2024MODULE_DESCRIPTION("VMware Virtual Socket Family");
2024MODULE_VERSION("1.0.2.0-k"); 2025MODULE_VERSION("1.0.2.0-k");
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
new file mode 100644
index 000000000000..31b567652250
--- /dev/null
+++ b/net/vmw_vsock/diag.c
@@ -0,0 +1,186 @@
1/*
2 * vsock sock_diag(7) module
3 *
4 * Copyright (C) 2017 Red Hat, Inc.
5 * Author: Stefan Hajnoczi <stefanha@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the Free
9 * Software Foundation version 2 and no later version.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 */
16
17#include <linux/module.h>
18#include <linux/sock_diag.h>
19#include <linux/vm_sockets_diag.h>
20#include <net/af_vsock.h>
21
22static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
23 u32 portid, u32 seq, u32 flags)
24{
25 struct vsock_sock *vsk = vsock_sk(sk);
26 struct vsock_diag_msg *rep;
27 struct nlmsghdr *nlh;
28
29 nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
30 flags);
31 if (!nlh)
32 return -EMSGSIZE;
33
34 rep = nlmsg_data(nlh);
35 rep->vdiag_family = AF_VSOCK;
36
37 /* Lock order dictates that sk_lock is acquired before
38 * vsock_table_lock, so we cannot lock here. Simply don't take
39 * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is
40 * held.
41 */
42 rep->vdiag_type = sk->sk_type;
43 rep->vdiag_state = sk->sk_state;
44 rep->vdiag_shutdown = sk->sk_shutdown;
45 rep->vdiag_src_cid = vsk->local_addr.svm_cid;
46 rep->vdiag_src_port = vsk->local_addr.svm_port;
47 rep->vdiag_dst_cid = vsk->remote_addr.svm_cid;
48 rep->vdiag_dst_port = vsk->remote_addr.svm_port;
49 rep->vdiag_ino = sock_i_ino(sk);
50
51 sock_diag_save_cookie(sk, rep->vdiag_cookie);
52
53 return 0;
54}
55
56static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
57{
58 struct vsock_diag_req *req;
59 struct vsock_sock *vsk;
60 unsigned int bucket;
61 unsigned int last_i;
62 unsigned int table;
63 struct net *net;
64 unsigned int i;
65
66 req = nlmsg_data(cb->nlh);
67 net = sock_net(skb->sk);
68
69 /* State saved between calls: */
70 table = cb->args[0];
71 bucket = cb->args[1];
72 i = last_i = cb->args[2];
73
74 /* TODO VMCI pending sockets? */
75
76 spin_lock_bh(&vsock_table_lock);
77
78 /* Bind table (locally created sockets) */
79 if (table == 0) {
80 while (bucket < ARRAY_SIZE(vsock_bind_table)) {
81 struct list_head *head = &vsock_bind_table[bucket];
82
83 i = 0;
84 list_for_each_entry(vsk, head, bound_table) {
85 struct sock *sk = sk_vsock(vsk);
86
87 if (!net_eq(sock_net(sk), net))
88 continue;
89 if (i < last_i)
90 goto next_bind;
91 if (!(req->vdiag_states & (1 << sk->sk_state)))
92 goto next_bind;
93 if (sk_diag_fill(sk, skb,
94 NETLINK_CB(cb->skb).portid,
95 cb->nlh->nlmsg_seq,
96 NLM_F_MULTI) < 0)
97 goto done;
98next_bind:
99 i++;
100 }
101 last_i = 0;
102 bucket++;
103 }
104
105 table++;
106 bucket = 0;
107 }
108
109 /* Connected table (accepted connections) */
110 while (bucket < ARRAY_SIZE(vsock_connected_table)) {
111 struct list_head *head = &vsock_connected_table[bucket];
112
113 i = 0;
114 list_for_each_entry(vsk, head, connected_table) {
115 struct sock *sk = sk_vsock(vsk);
116
117 /* Skip sockets we've already seen above */
118 if (__vsock_in_bound_table(vsk))
119 continue;
120
121 if (!net_eq(sock_net(sk), net))
122 continue;
123 if (i < last_i)
124 goto next_connected;
125 if (!(req->vdiag_states & (1 << sk->sk_state)))
126 goto next_connected;
127 if (sk_diag_fill(sk, skb,
128 NETLINK_CB(cb->skb).portid,
129 cb->nlh->nlmsg_seq,
130 NLM_F_MULTI) < 0)
131 goto done;
132next_connected:
133 i++;
134 }
135 last_i = 0;
136 bucket++;
137 }
138
139done:
140 spin_unlock_bh(&vsock_table_lock);
141
142 cb->args[0] = table;
143 cb->args[1] = bucket;
144 cb->args[2] = i;
145
146 return skb->len;
147}
148
149static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
150{
151 int hdrlen = sizeof(struct vsock_diag_req);
152 struct net *net = sock_net(skb->sk);
153
154 if (nlmsg_len(h) < hdrlen)
155 return -EINVAL;
156
157 if (h->nlmsg_flags & NLM_F_DUMP) {
158 struct netlink_dump_control c = {
159 .dump = vsock_diag_dump,
160 };
161 return netlink_dump_start(net->diag_nlsk, skb, h, &c);
162 }
163
164 return -EOPNOTSUPP;
165}
166
167static const struct sock_diag_handler vsock_diag_handler = {
168 .family = AF_VSOCK,
169 .dump = vsock_diag_handler_dump,
170};
171
172static int __init vsock_diag_init(void)
173{
174 return sock_diag_register(&vsock_diag_handler);
175}
176
177static void __exit vsock_diag_exit(void)
178{
179 sock_diag_unregister(&vsock_diag_handler);
180}
181
182module_init(vsock_diag_init);
183module_exit(vsock_diag_exit);
184MODULE_LICENSE("GPL");
185MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
186 40 /* AF_VSOCK */);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index e21991fe883a..5583df708b8c 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -312,7 +312,7 @@ static void hvs_close_connection(struct vmbus_channel *chan)
312 312
313 lock_sock(sk); 313 lock_sock(sk);
314 314
315 sk->sk_state = SS_UNCONNECTED; 315 sk->sk_state = TCP_CLOSE;
316 sock_set_flag(sk, SOCK_DONE); 316 sock_set_flag(sk, SOCK_DONE);
317 vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; 317 vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
318 318
@@ -349,9 +349,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
349 return; 349 return;
350 350
351 lock_sock(sk); 351 lock_sock(sk);
352 352 if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
353 if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) || 353 (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
354 (!conn_from_host && sk->sk_state != SS_CONNECTING))
355 goto out; 354 goto out;
356 355
357 if (conn_from_host) { 356 if (conn_from_host) {
@@ -363,7 +362,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
363 if (!new) 362 if (!new)
364 goto out; 363 goto out;
365 364
366 new->sk_state = SS_CONNECTING; 365 new->sk_state = TCP_SYN_SENT;
367 vnew = vsock_sk(new); 366 vnew = vsock_sk(new);
368 hvs_new = vnew->trans; 367 hvs_new = vnew->trans;
369 hvs_new->chan = chan; 368 hvs_new->chan = chan;
@@ -390,7 +389,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
390 vmbus_set_chn_rescind_callback(chan, hvs_close_connection); 389 vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
391 390
392 if (conn_from_host) { 391 if (conn_from_host) {
393 new->sk_state = SS_CONNECTED; 392 new->sk_state = TCP_ESTABLISHED;
394 sk->sk_ack_backlog++; 393 sk->sk_ack_backlog++;
395 394
396 hvs_addr_init(&vnew->local_addr, if_type); 395 hvs_addr_init(&vnew->local_addr, if_type);
@@ -403,7 +402,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
403 402
404 vsock_enqueue_accept(sk, new); 403 vsock_enqueue_accept(sk, new);
405 } else { 404 } else {
406 sk->sk_state = SS_CONNECTED; 405 sk->sk_state = TCP_ESTABLISHED;
407 sk->sk_socket->state = SS_CONNECTED; 406 sk->sk_socket->state = SS_CONNECTED;
408 407
409 vsock_insert_connected(vsock_sk(sk)); 408 vsock_insert_connected(vsock_sk(sk));
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 403d86e80162..8e03bd3f3668 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
414static void virtio_vsock_reset_sock(struct sock *sk) 414static void virtio_vsock_reset_sock(struct sock *sk)
415{ 415{
416 lock_sock(sk); 416 lock_sock(sk);
417 sk->sk_state = SS_UNCONNECTED; 417 sk->sk_state = TCP_CLOSE;
418 sk->sk_err = ECONNRESET; 418 sk->sk_err = ECONNRESET;
419 sk->sk_error_report(sk); 419 sk->sk_error_report(sk);
420 release_sock(sk); 420 release_sock(sk);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index edba7ab97563..3ae3a33da70b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
708 sock_set_flag(sk, SOCK_DONE); 708 sock_set_flag(sk, SOCK_DONE);
709 vsk->peer_shutdown = SHUTDOWN_MASK; 709 vsk->peer_shutdown = SHUTDOWN_MASK;
710 if (vsock_stream_has_data(vsk) <= 0) 710 if (vsock_stream_has_data(vsk) <= 0)
711 sk->sk_state = SS_DISCONNECTING; 711 sk->sk_state = TCP_CLOSING;
712 sk->sk_state_change(sk); 712 sk->sk_state_change(sk);
713 713
714 if (vsk->close_work_scheduled && 714 if (vsk->close_work_scheduled &&
@@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
748{ 748{
749 struct sock *sk = &vsk->sk; 749 struct sock *sk = &vsk->sk;
750 750
751 if (!(sk->sk_state == SS_CONNECTED || 751 if (!(sk->sk_state == TCP_ESTABLISHED ||
752 sk->sk_state == SS_DISCONNECTING)) 752 sk->sk_state == TCP_CLOSING))
753 return true; 753 return true;
754 754
755 /* Already received SHUTDOWN from peer, reply with RST */ 755 /* Already received SHUTDOWN from peer, reply with RST */
@@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk,
801 801
802 switch (le16_to_cpu(pkt->hdr.op)) { 802 switch (le16_to_cpu(pkt->hdr.op)) {
803 case VIRTIO_VSOCK_OP_RESPONSE: 803 case VIRTIO_VSOCK_OP_RESPONSE:
804 sk->sk_state = SS_CONNECTED; 804 sk->sk_state = TCP_ESTABLISHED;
805 sk->sk_socket->state = SS_CONNECTED; 805 sk->sk_socket->state = SS_CONNECTED;
806 vsock_insert_connected(vsk); 806 vsock_insert_connected(vsk);
807 sk->sk_state_change(sk); 807 sk->sk_state_change(sk);
@@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk,
821 821
822destroy: 822destroy:
823 virtio_transport_reset(vsk, pkt); 823 virtio_transport_reset(vsk, pkt);
824 sk->sk_state = SS_UNCONNECTED; 824 sk->sk_state = TCP_CLOSE;
825 sk->sk_err = skerr; 825 sk->sk_err = skerr;
826 sk->sk_error_report(sk); 826 sk->sk_error_report(sk);
827 return err; 827 return err;
@@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk,
857 vsk->peer_shutdown |= SEND_SHUTDOWN; 857 vsk->peer_shutdown |= SEND_SHUTDOWN;
858 if (vsk->peer_shutdown == SHUTDOWN_MASK && 858 if (vsk->peer_shutdown == SHUTDOWN_MASK &&
859 vsock_stream_has_data(vsk) <= 0) 859 vsock_stream_has_data(vsk) <= 0)
860 sk->sk_state = SS_DISCONNECTING; 860 sk->sk_state = TCP_CLOSING;
861 if (le32_to_cpu(pkt->hdr.flags)) 861 if (le32_to_cpu(pkt->hdr.flags))
862 sk->sk_state_change(sk); 862 sk->sk_state_change(sk);
863 break; 863 break;
@@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
928 928
929 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 929 lock_sock_nested(child, SINGLE_DEPTH_NESTING);
930 930
931 child->sk_state = SS_CONNECTED; 931 child->sk_state = TCP_ESTABLISHED;
932 932
933 vchild = vsock_sk(child); 933 vchild = vsock_sk(child);
934 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), 934 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
@@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
1016 sk->sk_write_space(sk); 1016 sk->sk_write_space(sk);
1017 1017
1018 switch (sk->sk_state) { 1018 switch (sk->sk_state) {
1019 case VSOCK_SS_LISTEN: 1019 case TCP_LISTEN:
1020 virtio_transport_recv_listen(sk, pkt); 1020 virtio_transport_recv_listen(sk, pkt);
1021 virtio_transport_free_pkt(pkt); 1021 virtio_transport_free_pkt(pkt);
1022 break; 1022 break;
1023 case SS_CONNECTING: 1023 case TCP_SYN_SENT:
1024 virtio_transport_recv_connecting(sk, pkt); 1024 virtio_transport_recv_connecting(sk, pkt);
1025 virtio_transport_free_pkt(pkt); 1025 virtio_transport_free_pkt(pkt);
1026 break; 1026 break;
1027 case SS_CONNECTED: 1027 case TCP_ESTABLISHED:
1028 virtio_transport_recv_connected(sk, pkt); 1028 virtio_transport_recv_connected(sk, pkt);
1029 break; 1029 break;
1030 case SS_DISCONNECTING: 1030 case TCP_CLOSING:
1031 virtio_transport_recv_disconnecting(sk, pkt); 1031 virtio_transport_recv_disconnecting(sk, pkt);
1032 virtio_transport_free_pkt(pkt); 1032 virtio_transport_free_pkt(pkt);
1033 break; 1033 break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 10ae7823a19d..391775e3575c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -21,7 +21,6 @@
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/kmod.h> 22#include <linux/kmod.h>
23#include <linux/list.h> 23#include <linux/list.h>
24#include <linux/miscdevice.h>
25#include <linux/module.h> 24#include <linux/module.h>
26#include <linux/mutex.h> 25#include <linux/mutex.h>
27#include <linux/net.h> 26#include <linux/net.h>
@@ -743,7 +742,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
743 /* The local context ID may be out of date, update it. */ 742 /* The local context ID may be out of date, update it. */
744 vsk->local_addr.svm_cid = dst.svm_cid; 743 vsk->local_addr.svm_cid = dst.svm_cid;
745 744
746 if (sk->sk_state == SS_CONNECTED) 745 if (sk->sk_state == TCP_ESTABLISHED)
747 vmci_trans(vsk)->notify_ops->handle_notify_pkt( 746 vmci_trans(vsk)->notify_ops->handle_notify_pkt(
748 sk, pkt, true, &dst, &src, 747 sk, pkt, true, &dst, &src,
749 &bh_process_pkt); 748 &bh_process_pkt);
@@ -801,7 +800,9 @@ static void vmci_transport_handle_detach(struct sock *sk)
801 * left in our consume queue. 800 * left in our consume queue.
802 */ 801 */
803 if (vsock_stream_has_data(vsk) <= 0) { 802 if (vsock_stream_has_data(vsk) <= 0) {
804 if (sk->sk_state == SS_CONNECTING) { 803 sk->sk_state = TCP_CLOSE;
804
805 if (sk->sk_state == TCP_SYN_SENT) {
805 /* The peer may detach from a queue pair while 806 /* The peer may detach from a queue pair while
806 * we are still in the connecting state, i.e., 807 * we are still in the connecting state, i.e.,
807 * if the peer VM is killed after attaching to 808 * if the peer VM is killed after attaching to
@@ -810,12 +811,10 @@ static void vmci_transport_handle_detach(struct sock *sk)
810 * event like a reset. 811 * event like a reset.
811 */ 812 */
812 813
813 sk->sk_state = SS_UNCONNECTED;
814 sk->sk_err = ECONNRESET; 814 sk->sk_err = ECONNRESET;
815 sk->sk_error_report(sk); 815 sk->sk_error_report(sk);
816 return; 816 return;
817 } 817 }
818 sk->sk_state = SS_UNCONNECTED;
819 } 818 }
820 sk->sk_state_change(sk); 819 sk->sk_state_change(sk);
821 } 820 }
@@ -883,17 +882,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
883 vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; 882 vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
884 883
885 switch (sk->sk_state) { 884 switch (sk->sk_state) {
886 case VSOCK_SS_LISTEN: 885 case TCP_LISTEN:
887 vmci_transport_recv_listen(sk, pkt); 886 vmci_transport_recv_listen(sk, pkt);
888 break; 887 break;
889 case SS_CONNECTING: 888 case TCP_SYN_SENT:
890 /* Processing of pending connections for servers goes through 889 /* Processing of pending connections for servers goes through
891 * the listening socket, so see vmci_transport_recv_listen() 890 * the listening socket, so see vmci_transport_recv_listen()
892 * for that path. 891 * for that path.
893 */ 892 */
894 vmci_transport_recv_connecting_client(sk, pkt); 893 vmci_transport_recv_connecting_client(sk, pkt);
895 break; 894 break;
896 case SS_CONNECTED: 895 case TCP_ESTABLISHED:
897 vmci_transport_recv_connected(sk, pkt); 896 vmci_transport_recv_connected(sk, pkt);
898 break; 897 break;
899 default: 898 default:
@@ -942,7 +941,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
942 vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; 941 vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
943 942
944 switch (pending->sk_state) { 943 switch (pending->sk_state) {
945 case SS_CONNECTING: 944 case TCP_SYN_SENT:
946 err = vmci_transport_recv_connecting_server(sk, 945 err = vmci_transport_recv_connecting_server(sk,
947 pending, 946 pending,
948 pkt); 947 pkt);
@@ -1072,7 +1071,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
1072 vsock_add_pending(sk, pending); 1071 vsock_add_pending(sk, pending);
1073 sk->sk_ack_backlog++; 1072 sk->sk_ack_backlog++;
1074 1073
1075 pending->sk_state = SS_CONNECTING; 1074 pending->sk_state = TCP_SYN_SENT;
1076 vmci_trans(vpending)->produce_size = 1075 vmci_trans(vpending)->produce_size =
1077 vmci_trans(vpending)->consume_size = qp_size; 1076 vmci_trans(vpending)->consume_size = qp_size;
1078 vmci_trans(vpending)->queue_pair_size = qp_size; 1077 vmci_trans(vpending)->queue_pair_size = qp_size;
@@ -1197,11 +1196,11 @@ vmci_transport_recv_connecting_server(struct sock *listener,
1197 * the socket will be valid until it is removed from the queue. 1196 * the socket will be valid until it is removed from the queue.
1198 * 1197 *
1199 * If we fail sending the attach below, we remove the socket from the 1198 * If we fail sending the attach below, we remove the socket from the
1200 * connected list and move the socket to SS_UNCONNECTED before 1199 * connected list and move the socket to TCP_CLOSE before
1201 * releasing the lock, so a pending slow path processing of an incoming 1200 * releasing the lock, so a pending slow path processing of an incoming
1202 * packet will not see the socket in the connected state in that case. 1201 * packet will not see the socket in the connected state in that case.
1203 */ 1202 */
1204 pending->sk_state = SS_CONNECTED; 1203 pending->sk_state = TCP_ESTABLISHED;
1205 1204
1206 vsock_insert_connected(vpending); 1205 vsock_insert_connected(vpending);
1207 1206
@@ -1232,7 +1231,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
1232 1231
1233destroy: 1232destroy:
1234 pending->sk_err = skerr; 1233 pending->sk_err = skerr;
1235 pending->sk_state = SS_UNCONNECTED; 1234 pending->sk_state = TCP_CLOSE;
1236 /* As long as we drop our reference, all necessary cleanup will handle 1235 /* As long as we drop our reference, all necessary cleanup will handle
1237 * when the cleanup function drops its reference and our destruct 1236 * when the cleanup function drops its reference and our destruct
1238 * implementation is called. Note that since the listen handler will 1237 * implementation is called. Note that since the listen handler will
@@ -1270,7 +1269,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
1270 * accounting (it can already be found since it's in the bound 1269 * accounting (it can already be found since it's in the bound
1271 * table). 1270 * table).
1272 */ 1271 */
1273 sk->sk_state = SS_CONNECTED; 1272 sk->sk_state = TCP_ESTABLISHED;
1274 sk->sk_socket->state = SS_CONNECTED; 1273 sk->sk_socket->state = SS_CONNECTED;
1275 vsock_insert_connected(vsk); 1274 vsock_insert_connected(vsk);
1276 sk->sk_state_change(sk); 1275 sk->sk_state_change(sk);
@@ -1338,7 +1337,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
1338destroy: 1337destroy:
1339 vmci_transport_send_reset(sk, pkt); 1338 vmci_transport_send_reset(sk, pkt);
1340 1339
1341 sk->sk_state = SS_UNCONNECTED; 1340 sk->sk_state = TCP_CLOSE;
1342 sk->sk_err = skerr; 1341 sk->sk_err = skerr;
1343 sk->sk_error_report(sk); 1342 sk->sk_error_report(sk);
1344 return err; 1343 return err;
@@ -1526,7 +1525,7 @@ static int vmci_transport_recv_connected(struct sock *sk,
1526 sock_set_flag(sk, SOCK_DONE); 1525 sock_set_flag(sk, SOCK_DONE);
1527 vsk->peer_shutdown = SHUTDOWN_MASK; 1526 vsk->peer_shutdown = SHUTDOWN_MASK;
1528 if (vsock_stream_has_data(vsk) <= 0) 1527 if (vsock_stream_has_data(vsk) <= 0)
1529 sk->sk_state = SS_DISCONNECTING; 1528 sk->sk_state = TCP_CLOSING;
1530 1529
1531 sk->sk_state_change(sk); 1530 sk->sk_state_change(sk);
1532 break; 1531 break;
@@ -1790,7 +1789,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
1790 err = vmci_transport_send_conn_request( 1789 err = vmci_transport_send_conn_request(
1791 sk, vmci_trans(vsk)->queue_pair_size); 1790 sk, vmci_trans(vsk)->queue_pair_size);
1792 if (err < 0) { 1791 if (err < 0) {
1793 sk->sk_state = SS_UNCONNECTED; 1792 sk->sk_state = TCP_CLOSE;
1794 return err; 1793 return err;
1795 } 1794 }
1796 } else { 1795 } else {
@@ -1800,7 +1799,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
1800 sk, vmci_trans(vsk)->queue_pair_size, 1799 sk, vmci_trans(vsk)->queue_pair_size,
1801 supported_proto_versions); 1800 supported_proto_versions);
1802 if (err < 0) { 1801 if (err < 0) {
1803 sk->sk_state = SS_UNCONNECTED; 1802 sk->sk_state = TCP_CLOSE;
1804 return err; 1803 return err;
1805 } 1804 }
1806 1805
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 1406db4d97d1..41fb427f150a 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
355 * queue. Ask for notifications when there is something to 355 * queue. Ask for notifications when there is something to
356 * read. 356 * read.
357 */ 357 */
358 if (sk->sk_state == SS_CONNECTED) { 358 if (sk->sk_state == TCP_ESTABLISHED) {
359 if (!send_waiting_read(sk, 1)) 359 if (!send_waiting_read(sk, 1))
360 return -1; 360 return -1;
361 361
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index f3a0afc46208..0cc84f2bb05e 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
176 * queue. Ask for notifications when there is something to 176 * queue. Ask for notifications when there is something to
177 * read. 177 * read.
178 */ 178 */
179 if (sk->sk_state == SS_CONNECTED) 179 if (sk->sk_state == TCP_ESTABLISHED)
180 vsock_block_update_write_window(sk); 180 vsock_block_update_write_window(sk);
181 *data_ready_now = false; 181 *data_ready_now = false;
182 } 182 }
diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore
index c33451b896d9..61cbc304a3d3 100644
--- a/net/wireless/.gitignore
+++ b/net/wireless/.gitignore
@@ -1 +1,2 @@
1regdb.c 1shipped-certs.c
2extra-certs.c
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6c606120abfe..da91bb547db3 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -19,6 +19,7 @@ config WEXT_PRIV
19config CFG80211 19config CFG80211
20 tristate "cfg80211 - wireless configuration API" 20 tristate "cfg80211 - wireless configuration API"
21 depends on RFKILL || !RFKILL 21 depends on RFKILL || !RFKILL
22 select FW_LOADER
22 ---help--- 23 ---help---
23 cfg80211 is the Linux wireless LAN (802.11) configuration API. 24 cfg80211 is the Linux wireless LAN (802.11) configuration API.
24 Enable this if you have a wireless device. 25 Enable this if you have a wireless device.
@@ -82,6 +83,36 @@ config CFG80211_CERTIFICATION_ONUS
82 you are a wireless researcher and are working in a controlled 83 you are a wireless researcher and are working in a controlled
83 and approved environment by your local regulatory agency. 84 and approved environment by your local regulatory agency.
84 85
86config CFG80211_REQUIRE_SIGNED_REGDB
87 bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS
88 default y
89 select SYSTEM_DATA_VERIFICATION
90 help
91 Require that in addition to the "regulatory.db" file a
92 "regulatory.db.p7s" can be loaded with a valid PKCS#7
93 signature for the regulatory.db file made by one of the
94 keys in the certs/ directory.
95
96config CFG80211_USE_KERNEL_REGDB_KEYS
97 bool "allow regdb keys shipped with the kernel" if CFG80211_CERTIFICATION_ONUS
98 default y
99 depends on CFG80211_REQUIRE_SIGNED_REGDB
100 help
101 Allow the regulatory database to be signed by one of the keys for
102 which certificates are part of the kernel sources
103 (in net/wireless/certs/).
104
105 This is currently only Seth Forshee's key, who is the regulatory
106 database maintainer.
107
108config CFG80211_EXTRA_REGDB_KEYDIR
109 string "additional regdb key directory" if CFG80211_CERTIFICATION_ONUS
110 depends on CFG80211_REQUIRE_SIGNED_REGDB
111 help
112 If selected, point to a directory with DER-encoded X.509
113 certificates like in the kernel sources (net/wireless/certs/)
114 that shall be accepted for a signed regulatory database.
115
85config CFG80211_REG_CELLULAR_HINTS 116config CFG80211_REG_CELLULAR_HINTS
86 bool "cfg80211 regulatory support for cellular base station hints" 117 bool "cfg80211 regulatory support for cellular base station hints"
87 depends on CFG80211_CERTIFICATION_ONUS 118 depends on CFG80211_CERTIFICATION_ONUS
@@ -139,35 +170,14 @@ config CFG80211_DEBUGFS
139 170
140 If unsure, say N. 171 If unsure, say N.
141 172
142config CFG80211_INTERNAL_REGDB
143 bool "use statically compiled regulatory rules database" if EXPERT
144 default n
145 depends on CFG80211
146 ---help---
147 This option generates an internal data structure representing
148 the wireless regulatory rules described in net/wireless/db.txt
149 and includes code to query that database. This is an alternative
150 to using CRDA for defining regulatory rules for the kernel.
151
152 Using this option requires some parsing of the db.txt at build time,
153 the parser will be upkept with the latest wireless-regdb updates but
154 older wireless-regdb formats will be ignored. The parser may later
155 be replaced to avoid issues with conflicts on versions of
156 wireless-regdb.
157
158 For details see:
159
160 http://wireless.kernel.org/en/developers/Regulatory
161
162 Most distributions have a CRDA package. So if unsure, say N.
163
164config CFG80211_CRDA_SUPPORT 173config CFG80211_CRDA_SUPPORT
165 bool "support CRDA" if CFG80211_INTERNAL_REGDB 174 bool "support CRDA" if EXPERT
166 default y 175 default y
167 depends on CFG80211 176 depends on CFG80211
168 help 177 help
169 You should enable this option unless you know for sure you have no 178 You should enable this option unless you know for sure you have no
170 need for it, for example when using internal regdb (above.) 179 need for it, for example when using internal regdb (above) or the
180 database loaded as a firmware file.
171 181
172 If unsure, say Y. 182 If unsure, say Y.
173 183
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index b7c0300e0b08..278d979c211a 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -15,11 +15,27 @@ cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o
15cfg80211-$(CONFIG_OF) += of.o 15cfg80211-$(CONFIG_OF) += of.o
16cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o 16cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
17cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o 17cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
18cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
19 18
20CFLAGS_trace.o := -I$(src) 19CFLAGS_trace.o := -I$(src)
21 20
22$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk 21cfg80211-$(CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS) += shipped-certs.o
23 @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@ 22ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
23cfg80211-y += extra-certs.o
24endif
24 25
25clean-files := regdb.c 26$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
27 @$(kecho) " GEN $@"
28 @echo '#include "reg.h"' > $@
29 @echo 'const u8 shipped_regdb_certs[] = {' >> $@
30 @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
31 @echo '};' >> $@
32 @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
33
34$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
35 $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
36 @$(kecho) " GEN $@"
37 @echo '#include "reg.h"' > $@
38 @echo 'const u8 extra_regdb_certs[] = {' >> $@
39 @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
40 @echo '};' >> $@
41 @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
new file mode 100644
index 000000000000..c6f8f9d6b988
--- /dev/null
+++ b/net/wireless/certs/sforshee.x509
Binary files differ
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index fad1b5baf8ff..a48859982a32 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -465,7 +465,7 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
465 struct ieee80211_channel *chan) 465 struct ieee80211_channel *chan)
466{ 466{
467 int width; 467 int width;
468 u32 cf_offset, freq; 468 u32 freq;
469 469
470 if (chandef->chan->center_freq == chan->center_freq) 470 if (chandef->chan->center_freq == chan->center_freq)
471 return true; 471 return true;
@@ -474,8 +474,6 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
474 if (width <= 20) 474 if (width <= 20)
475 return false; 475 return false;
476 476
477 cf_offset = width / 2 - 10;
478
479 for (freq = chandef->center_freq1 - width / 2 + 10; 477 for (freq = chandef->center_freq1 - width / 2 + 10;
480 freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) { 478 freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) {
481 if (chan->center_freq == freq) 479 if (chan->center_freq == freq)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7b33e8c366bc..fdde0d98fde1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1384,7 +1384,7 @@ out_fail_sysfs:
1384out_fail_pernet: 1384out_fail_pernet:
1385 return err; 1385 return err;
1386} 1386}
1387subsys_initcall(cfg80211_init); 1387fs_initcall(cfg80211_init);
1388 1388
1389static void __exit cfg80211_exit(void) 1389static void __exit cfg80211_exit(void)
1390{ 1390{
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 705835047f98..d2f7e8b8a097 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -217,6 +217,7 @@ enum cfg80211_event_type {
217 EVENT_DISCONNECTED, 217 EVENT_DISCONNECTED,
218 EVENT_IBSS_JOINED, 218 EVENT_IBSS_JOINED,
219 EVENT_STOPPED, 219 EVENT_STOPPED,
220 EVENT_PORT_AUTHORIZED,
220}; 221};
221 222
222struct cfg80211_event { 223struct cfg80211_event {
@@ -236,6 +237,9 @@ struct cfg80211_event {
236 u8 bssid[ETH_ALEN]; 237 u8 bssid[ETH_ALEN];
237 struct ieee80211_channel *channel; 238 struct ieee80211_channel *channel;
238 } ij; 239 } ij;
240 struct {
241 u8 bssid[ETH_ALEN];
242 } pa;
239 }; 243 };
240}; 244};
241 245
@@ -386,6 +390,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
386 bool wextev); 390 bool wextev);
387void __cfg80211_roamed(struct wireless_dev *wdev, 391void __cfg80211_roamed(struct wireless_dev *wdev,
388 struct cfg80211_roam_info *info); 392 struct cfg80211_roam_info *info);
393void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid);
389int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, 394int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
390 struct wireless_dev *wdev); 395 struct wireless_dev *wdev);
391void cfg80211_autodisconnect_wk(struct work_struct *work); 396void cfg80211_autodisconnect_wk(struct work_struct *work);
diff --git a/net/wireless/db.txt b/net/wireless/db.txt
deleted file mode 100644
index a2fc3a09ccdc..000000000000
--- a/net/wireless/db.txt
+++ /dev/null
@@ -1,17 +0,0 @@
1#
2# This file is a placeholder to prevent accidental build breakage if someone
3# enables CONFIG_CFG80211_INTERNAL_REGDB. Almost no one actually needs to
4# enable that build option.
5#
6# You should be using CRDA instead. It is even better if you use the CRDA
7# package provided by your distribution, since they will probably keep it
8# up-to-date on your behalf.
9#
10# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will
11# need to replace this file with one containing appropriately formatted
12# regulatory rules that cover the regulatory domains you will be using. Your
13# best option is to extract the db.txt file from the wireless-regdb git
14# repository:
15#
16# git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git
17#
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
deleted file mode 100644
index baf2426b555a..000000000000
--- a/net/wireless/genregdb.awk
+++ /dev/null
@@ -1,158 +0,0 @@
1#!/usr/bin/awk -f
2#
3# genregdb.awk -- generate regdb.c from db.txt
4#
5# Actually, it reads from stdin (presumed to be db.txt) and writes
6# to stdout (presumed to be regdb.c), but close enough...
7#
8# Copyright 2009 John W. Linville <linville@tuxdriver.com>
9#
10# Permission to use, copy, modify, and/or distribute this software for any
11# purpose with or without fee is hereby granted, provided that the above
12# copyright notice and this permission notice appear in all copies.
13#
14# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
19# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
20# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21
22BEGIN {
23 active = 0
24 rules = 0;
25 print "/*"
26 print " * DO NOT EDIT -- file generated from data in db.txt"
27 print " */"
28 print ""
29 print "#include <linux/nl80211.h>"
30 print "#include <net/cfg80211.h>"
31 print "#include \"regdb.h\""
32 print ""
33 regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
34}
35
36function parse_country_head() {
37 country=$2
38 sub(/:/, "", country)
39 printf "static const struct ieee80211_regdomain regdom_%s = {\n", country
40 printf "\t.alpha2 = \"%s\",\n", country
41 if ($NF ~ /DFS-ETSI/)
42 printf "\t.dfs_region = NL80211_DFS_ETSI,\n"
43 else if ($NF ~ /DFS-FCC/)
44 printf "\t.dfs_region = NL80211_DFS_FCC,\n"
45 else if ($NF ~ /DFS-JP/)
46 printf "\t.dfs_region = NL80211_DFS_JP,\n"
47 printf "\t.reg_rules = {\n"
48 active = 1
49 regdb = regdb "\t&regdom_" country ",\n"
50}
51
52function parse_reg_rule()
53{
54 flag_starts_at = 7
55
56 start = $1
57 sub(/\(/, "", start)
58 end = $3
59 bw = $5
60 sub(/\),/, "", bw)
61 gain = 0
62 power = $6
63 # power might be in mW...
64 units = $7
65 dfs_cac = 0
66
67 sub(/\(/, "", power)
68 sub(/\),/, "", power)
69 sub(/\),/, "", units)
70 sub(/\)/, "", units)
71
72 if (units == "mW") {
73 flag_starts_at = 8
74 power = 10 * log(power)/log(10)
75 if ($8 ~ /[[:digit:]]/) {
76 flag_starts_at = 9
77 dfs_cac = $8
78 }
79 } else {
80 if ($7 ~ /[[:digit:]]/) {
81 flag_starts_at = 8
82 dfs_cac = $7
83 }
84 }
85 sub(/\(/, "", dfs_cac)
86 sub(/\),/, "", dfs_cac)
87 flagstr = ""
88 for (i=flag_starts_at; i<=NF; i++)
89 flagstr = flagstr $i
90 split(flagstr, flagarray, ",")
91 flags = ""
92 for (arg in flagarray) {
93 if (flagarray[arg] == "NO-OFDM") {
94 flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | "
95 } else if (flagarray[arg] == "NO-CCK") {
96 flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | "
97 } else if (flagarray[arg] == "NO-INDOOR") {
98 flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | "
99 } else if (flagarray[arg] == "NO-OUTDOOR") {
100 flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | "
101 } else if (flagarray[arg] == "DFS") {
102 flags = flags "\n\t\t\tNL80211_RRF_DFS | "
103 } else if (flagarray[arg] == "PTP-ONLY") {
104 flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | "
105 } else if (flagarray[arg] == "PTMP-ONLY") {
106 flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | "
107 } else if (flagarray[arg] == "PASSIVE-SCAN") {
108 flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
109 } else if (flagarray[arg] == "NO-IBSS") {
110 flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
111 } else if (flagarray[arg] == "NO-IR") {
112 flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
113 } else if (flagarray[arg] == "AUTO-BW") {
114 flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | "
115 }
116
117 }
118 flags = flags "0"
119 printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
120 rules++
121}
122
123function print_tail_country()
124{
125 active = 0
126 printf "\t},\n"
127 printf "\t.n_reg_rules = %d\n", rules
128 printf "};\n\n"
129 rules = 0;
130}
131
132/^[ \t]*#/ {
133 # Ignore
134}
135
136!active && /^[ \t]*$/ {
137 # Ignore
138}
139
140!active && /country/ {
141 parse_country_head()
142}
143
144active && /^[ \t]*\(/ {
145 parse_reg_rule()
146}
147
148active && /^[ \t]*$/ {
149 print_tail_country()
150}
151
152END {
153 if (active)
154 print_tail_country()
155 print regdb "};"
156 print ""
157 print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);"
158}
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index 459611577d3d..801d4781a73b 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -44,7 +44,7 @@ static DEFINE_SPINLOCK(lib80211_crypto_lock);
44static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, 44static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info,
45 int force); 45 int force);
46static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); 46static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info);
47static void lib80211_crypt_deinit_handler(unsigned long data); 47static void lib80211_crypt_deinit_handler(struct timer_list *t);
48 48
49int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, 49int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
50 spinlock_t *lock) 50 spinlock_t *lock)
@@ -55,8 +55,8 @@ int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
55 info->lock = lock; 55 info->lock = lock;
56 56
57 INIT_LIST_HEAD(&info->crypt_deinit_list); 57 INIT_LIST_HEAD(&info->crypt_deinit_list);
58 setup_timer(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, 58 timer_setup(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler,
59 (unsigned long)info); 59 0);
60 60
61 return 0; 61 return 0;
62} 62}
@@ -116,9 +116,10 @@ static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info)
116 spin_unlock_irqrestore(info->lock, flags); 116 spin_unlock_irqrestore(info->lock, flags);
117} 117}
118 118
119static void lib80211_crypt_deinit_handler(unsigned long data) 119static void lib80211_crypt_deinit_handler(struct timer_list *t)
120{ 120{
121 struct lib80211_crypt_info *info = (struct lib80211_crypt_info *)data; 121 struct lib80211_crypt_info *info = from_timer(info, t,
122 crypt_deinit_timer);
122 unsigned long flags; 123 unsigned long flags;
123 124
124 lib80211_crypt_deinit_entries(info, 0); 125 lib80211_crypt_deinit_entries(info, 0);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index eb866647a27a..b1ac23ca20c8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2130,6 +2130,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
2130 case NL80211_CHAN_HT40MINUS: 2130 case NL80211_CHAN_HT40MINUS:
2131 cfg80211_chandef_create(chandef, chandef->chan, 2131 cfg80211_chandef_create(chandef, chandef->chan,
2132 chantype); 2132 chantype);
2133 /* user input for center_freq is incorrect */
2134 if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
2135 chandef->center_freq1 != nla_get_u32(
2136 info->attrs[NL80211_ATTR_CENTER_FREQ1]))
2137 return -EINVAL;
2138 /* center_freq2 must be zero */
2139 if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
2140 nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
2141 return -EINVAL;
2133 break; 2142 break;
2134 default: 2143 default:
2135 return -EINVAL; 2144 return -EINVAL;
@@ -2596,10 +2605,32 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2596 goto nla_put_failure; 2605 goto nla_put_failure;
2597 } 2606 }
2598 2607
2599 if (wdev->ssid_len) { 2608 wdev_lock(wdev);
2600 if (nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) 2609 switch (wdev->iftype) {
2610 case NL80211_IFTYPE_AP:
2611 if (wdev->ssid_len &&
2612 nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
2601 goto nla_put_failure; 2613 goto nla_put_failure;
2614 break;
2615 case NL80211_IFTYPE_STATION:
2616 case NL80211_IFTYPE_P2P_CLIENT:
2617 case NL80211_IFTYPE_ADHOC: {
2618 const u8 *ssid_ie;
2619 if (!wdev->current_bss)
2620 break;
2621 ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub,
2622 WLAN_EID_SSID);
2623 if (!ssid_ie)
2624 break;
2625 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
2626 goto nla_put_failure;
2627 break;
2628 }
2629 default:
2630 /* nothing */
2631 break;
2602 } 2632 }
2633 wdev_unlock(wdev);
2603 2634
2604 genlmsg_end(msg, hdr); 2635 genlmsg_end(msg, hdr);
2605 return 0; 2636 return 0;
@@ -5677,6 +5708,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
5677 } 5708 }
5678} 5709}
5679 5710
5711static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
5712{
5713 return reg_reload_regdb();
5714}
5715
5680static int nl80211_get_mesh_config(struct sk_buff *skb, 5716static int nl80211_get_mesh_config(struct sk_buff *skb,
5681 struct genl_info *info) 5717 struct genl_info *info)
5682{ 5718{
@@ -6277,7 +6313,7 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
6277 if (!hdr) 6313 if (!hdr)
6278 return -1; 6314 return -1;
6279 6315
6280 genl_dump_check_consistent(cb, hdr, &nl80211_fam); 6316 genl_dump_check_consistent(cb, hdr);
6281 6317
6282 if (nl80211_put_regdom(regdom, msg)) 6318 if (nl80211_put_regdom(regdom, msg))
6283 goto nla_put_failure; 6319 goto nla_put_failure;
@@ -6618,6 +6654,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
6618 return regulatory_pre_cac_allowed(wdev->wiphy); 6654 return regulatory_pre_cac_allowed(wdev->wiphy);
6619} 6655}
6620 6656
6657static int
6658nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
6659 void *request, struct nlattr **attrs,
6660 bool is_sched_scan)
6661{
6662 u8 *mac_addr, *mac_addr_mask;
6663 u32 *flags;
6664 enum nl80211_feature_flags randomness_flag;
6665
6666 if (!attrs[NL80211_ATTR_SCAN_FLAGS])
6667 return 0;
6668
6669 if (is_sched_scan) {
6670 struct cfg80211_sched_scan_request *req = request;
6671
6672 randomness_flag = wdev ?
6673 NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
6674 NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
6675 flags = &req->flags;
6676 mac_addr = req->mac_addr;
6677 mac_addr_mask = req->mac_addr_mask;
6678 } else {
6679 struct cfg80211_scan_request *req = request;
6680
6681 randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
6682 flags = &req->flags;
6683 mac_addr = req->mac_addr;
6684 mac_addr_mask = req->mac_addr_mask;
6685 }
6686
6687 *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
6688
6689 if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
6690 !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
6691 return -EOPNOTSUPP;
6692
6693 if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
6694 int err;
6695
6696 if (!(wiphy->features & randomness_flag) ||
6697 (wdev && wdev->current_bss))
6698 return -EOPNOTSUPP;
6699
6700 err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
6701 if (err)
6702 return err;
6703 }
6704
6705 if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
6706 !wiphy_ext_feature_isset(wiphy,
6707 NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
6708 return -EOPNOTSUPP;
6709
6710 if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
6711 !wiphy_ext_feature_isset(wiphy,
6712 NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
6713 return -EOPNOTSUPP;
6714
6715 if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
6716 !wiphy_ext_feature_isset(wiphy,
6717 NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
6718 return -EOPNOTSUPP;
6719
6720 if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
6721 !wiphy_ext_feature_isset(wiphy,
6722 NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
6723 return -EOPNOTSUPP;
6724
6725 return 0;
6726}
6727
6621static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) 6728static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
6622{ 6729{
6623 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6730 struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6823,34 +6930,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
6823 nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); 6930 nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
6824 } 6931 }
6825 6932
6826 if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { 6933 err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
6827 request->flags = nla_get_u32( 6934 false);
6828 info->attrs[NL80211_ATTR_SCAN_FLAGS]); 6935 if (err)
6829 if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && 6936 goto out_free;
6830 !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
6831 err = -EOPNOTSUPP;
6832 goto out_free;
6833 }
6834
6835 if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
6836 if (!(wiphy->features &
6837 NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
6838 err = -EOPNOTSUPP;
6839 goto out_free;
6840 }
6841
6842 if (wdev->current_bss) {
6843 err = -EOPNOTSUPP;
6844 goto out_free;
6845 }
6846
6847 err = nl80211_parse_random_mac(info->attrs,
6848 request->mac_addr,
6849 request->mac_addr_mask);
6850 if (err)
6851 goto out_free;
6852 }
6853 }
6854 6937
6855 request->no_cck = 6938 request->no_cck =
6856 nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); 6939 nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
@@ -7298,37 +7381,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
7298 request->ie_len); 7381 request->ie_len);
7299 } 7382 }
7300 7383
7301 if (attrs[NL80211_ATTR_SCAN_FLAGS]) { 7384 err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
7302 request->flags = nla_get_u32( 7385 if (err)
7303 attrs[NL80211_ATTR_SCAN_FLAGS]); 7386 goto out_free;
7304 if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
7305 !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
7306 err = -EOPNOTSUPP;
7307 goto out_free;
7308 }
7309
7310 if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
7311 u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
7312
7313 if (!wdev) /* must be net-detect */
7314 flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
7315
7316 if (!(wiphy->features & flg)) {
7317 err = -EOPNOTSUPP;
7318 goto out_free;
7319 }
7320
7321 if (wdev && wdev->current_bss) {
7322 err = -EOPNOTSUPP;
7323 goto out_free;
7324 }
7325
7326 err = nl80211_parse_random_mac(attrs, request->mac_addr,
7327 request->mac_addr_mask);
7328 if (err)
7329 goto out_free;
7330 }
7331 }
7332 7387
7333 if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) 7388 if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
7334 request->delay = 7389 request->delay =
@@ -7689,7 +7744,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
7689 if (!hdr) 7744 if (!hdr)
7690 return -1; 7745 return -1;
7691 7746
7692 genl_dump_check_consistent(cb, hdr, &nl80211_fam); 7747 genl_dump_check_consistent(cb, hdr);
7693 7748
7694 if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation)) 7749 if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
7695 goto nla_put_failure; 7750 goto nla_put_failure;
@@ -8932,8 +8987,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
8932 8987
8933 if (info->attrs[NL80211_ATTR_USE_MFP]) { 8988 if (info->attrs[NL80211_ATTR_USE_MFP]) {
8934 connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); 8989 connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
8990 if (connect.mfp == NL80211_MFP_OPTIONAL &&
8991 !wiphy_ext_feature_isset(&rdev->wiphy,
8992 NL80211_EXT_FEATURE_MFP_OPTIONAL))
8993 return -EOPNOTSUPP;
8994
8935 if (connect.mfp != NL80211_MFP_REQUIRED && 8995 if (connect.mfp != NL80211_MFP_REQUIRED &&
8936 connect.mfp != NL80211_MFP_NO) 8996 connect.mfp != NL80211_MFP_NO &&
8997 connect.mfp != NL80211_MFP_OPTIONAL)
8937 return -EINVAL; 8998 return -EINVAL;
8938 } else { 8999 } else {
8939 connect.mfp = NL80211_MFP_NO; 9000 connect.mfp = NL80211_MFP_NO;
@@ -12685,6 +12746,12 @@ static const struct genl_ops nl80211_ops[] = {
12685 .flags = GENL_ADMIN_PERM, 12746 .flags = GENL_ADMIN_PERM,
12686 }, 12747 },
12687 { 12748 {
12749 .cmd = NL80211_CMD_RELOAD_REGDB,
12750 .doit = nl80211_reload_regdb,
12751 .policy = nl80211_policy,
12752 .flags = GENL_ADMIN_PERM,
12753 },
12754 {
12688 .cmd = NL80211_CMD_GET_MESH_CONFIG, 12755 .cmd = NL80211_CMD_GET_MESH_CONFIG,
12689 .doit = nl80211_get_mesh_config, 12756 .doit = nl80211_get_mesh_config,
12690 .policy = nl80211_policy, 12757 .policy = nl80211_policy,
@@ -13812,9 +13879,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
13812 info->req_ie)) || 13879 info->req_ie)) ||
13813 (info->resp_ie && 13880 (info->resp_ie &&
13814 nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, 13881 nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
13815 info->resp_ie)) || 13882 info->resp_ie)))
13816 (info->authorized &&
13817 nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
13818 goto nla_put_failure; 13883 goto nla_put_failure;
13819 13884
13820 genlmsg_end(msg, hdr); 13885 genlmsg_end(msg, hdr);
@@ -13828,6 +13893,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
13828 nlmsg_free(msg); 13893 nlmsg_free(msg);
13829} 13894}
13830 13895
13896void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
13897 struct net_device *netdev, const u8 *bssid)
13898{
13899 struct sk_buff *msg;
13900 void *hdr;
13901
13902 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
13903 if (!msg)
13904 return;
13905
13906 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
13907 if (!hdr) {
13908 nlmsg_free(msg);
13909 return;
13910 }
13911
13912 if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
13913 goto nla_put_failure;
13914
13915 genlmsg_end(msg, hdr);
13916
13917 genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
13918 NL80211_MCGRP_MLME, GFP_KERNEL);
13919 return;
13920
13921 nla_put_failure:
13922 genlmsg_cancel(msg, hdr);
13923 nlmsg_free(msg);
13924}
13925
13831void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, 13926void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
13832 struct net_device *netdev, u16 reason, 13927 struct net_device *netdev, u16 reason,
13833 const u8 *ie, size_t ie_len, bool from_ap) 13928 const u8 *ie, size_t ie_len, bool from_ap)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index fc415c8f7aac..79e47fe60c35 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -59,6 +59,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
59void nl80211_send_roamed(struct cfg80211_registered_device *rdev, 59void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
60 struct net_device *netdev, 60 struct net_device *netdev,
61 struct cfg80211_roam_info *info, gfp_t gfp); 61 struct cfg80211_roam_info *info, gfp_t gfp);
62void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
63 struct net_device *netdev, const u8 *bssid);
62void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, 64void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
63 struct net_device *netdev, u16 reason, 65 struct net_device *netdev, u16 reason,
64 const u8 *ie, size_t ie_len, bool from_ap); 66 const u8 *ie, size_t ie_len, bool from_ap);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6e94f6934a0e..78e71b0390be 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -53,12 +53,13 @@
53#include <linux/ctype.h> 53#include <linux/ctype.h>
54#include <linux/nl80211.h> 54#include <linux/nl80211.h>
55#include <linux/platform_device.h> 55#include <linux/platform_device.h>
56#include <linux/verification.h>
56#include <linux/moduleparam.h> 57#include <linux/moduleparam.h>
58#include <linux/firmware.h>
57#include <net/cfg80211.h> 59#include <net/cfg80211.h>
58#include "core.h" 60#include "core.h"
59#include "reg.h" 61#include "reg.h"
60#include "rdev-ops.h" 62#include "rdev-ops.h"
61#include "regdb.h"
62#include "nl80211.h" 63#include "nl80211.h"
63 64
64/* 65/*
@@ -100,7 +101,7 @@ static struct regulatory_request core_request_world = {
100static struct regulatory_request __rcu *last_request = 101static struct regulatory_request __rcu *last_request =
101 (void __force __rcu *)&core_request_world; 102 (void __force __rcu *)&core_request_world;
102 103
103/* To trigger userspace events */ 104/* To trigger userspace events and load firmware */
104static struct platform_device *reg_pdev; 105static struct platform_device *reg_pdev;
105 106
106/* 107/*
@@ -443,7 +444,6 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
443 return regd; 444 return regd;
444} 445}
445 446
446#ifdef CONFIG_CFG80211_INTERNAL_REGDB
447struct reg_regdb_apply_request { 447struct reg_regdb_apply_request {
448 struct list_head list; 448 struct list_head list;
449 const struct ieee80211_regdomain *regdom; 449 const struct ieee80211_regdomain *regdom;
@@ -475,55 +475,26 @@ static void reg_regdb_apply(struct work_struct *work)
475 475
476static DECLARE_WORK(reg_regdb_work, reg_regdb_apply); 476static DECLARE_WORK(reg_regdb_work, reg_regdb_apply);
477 477
478static int reg_query_builtin(const char *alpha2) 478static int reg_schedule_apply(const struct ieee80211_regdomain *regdom)
479{ 479{
480 const struct ieee80211_regdomain *regdom = NULL;
481 struct reg_regdb_apply_request *request; 480 struct reg_regdb_apply_request *request;
482 unsigned int i;
483
484 for (i = 0; i < reg_regdb_size; i++) {
485 if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) {
486 regdom = reg_regdb[i];
487 break;
488 }
489 }
490
491 if (!regdom)
492 return -ENODATA;
493 481
494 request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL); 482 request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL);
495 if (!request) 483 if (!request) {
496 return -ENOMEM; 484 kfree(regdom);
497
498 request->regdom = reg_copy_regd(regdom);
499 if (IS_ERR_OR_NULL(request->regdom)) {
500 kfree(request);
501 return -ENOMEM; 485 return -ENOMEM;
502 } 486 }
503 487
488 request->regdom = regdom;
489
504 mutex_lock(&reg_regdb_apply_mutex); 490 mutex_lock(&reg_regdb_apply_mutex);
505 list_add_tail(&request->list, &reg_regdb_apply_list); 491 list_add_tail(&request->list, &reg_regdb_apply_list);
506 mutex_unlock(&reg_regdb_apply_mutex); 492 mutex_unlock(&reg_regdb_apply_mutex);
507 493
508 schedule_work(&reg_regdb_work); 494 schedule_work(&reg_regdb_work);
509
510 return 0; 495 return 0;
511} 496}
512 497
513/* Feel free to add any other sanity checks here */
514static void reg_regdb_size_check(void)
515{
516 /* We should ideally BUILD_BUG_ON() but then random builds would fail */
517 WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it...");
518}
519#else
520static inline void reg_regdb_size_check(void) {}
521static inline int reg_query_builtin(const char *alpha2)
522{
523 return -ENODATA;
524}
525#endif /* CONFIG_CFG80211_INTERNAL_REGDB */
526
527#ifdef CONFIG_CFG80211_CRDA_SUPPORT 498#ifdef CONFIG_CFG80211_CRDA_SUPPORT
528/* Max number of consecutive attempts to communicate with CRDA */ 499/* Max number of consecutive attempts to communicate with CRDA */
529#define REG_MAX_CRDA_TIMEOUTS 10 500#define REG_MAX_CRDA_TIMEOUTS 10
@@ -599,10 +570,402 @@ static inline int call_crda(const char *alpha2)
599} 570}
600#endif /* CONFIG_CFG80211_CRDA_SUPPORT */ 571#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
601 572
573/* code to directly load a firmware database through request_firmware */
574static const struct fwdb_header *regdb;
575
576struct fwdb_country {
577 u8 alpha2[2];
578 __be16 coll_ptr;
579 /* this struct cannot be extended */
580} __packed __aligned(4);
581
582struct fwdb_collection {
583 u8 len;
584 u8 n_rules;
585 u8 dfs_region;
586 /* no optional data yet */
587 /* aligned to 2, then followed by __be16 array of rule pointers */
588} __packed __aligned(4);
589
590enum fwdb_flags {
591 FWDB_FLAG_NO_OFDM = BIT(0),
592 FWDB_FLAG_NO_OUTDOOR = BIT(1),
593 FWDB_FLAG_DFS = BIT(2),
594 FWDB_FLAG_NO_IR = BIT(3),
595 FWDB_FLAG_AUTO_BW = BIT(4),
596};
597
598struct fwdb_rule {
599 u8 len;
600 u8 flags;
601 __be16 max_eirp;
602 __be32 start, end, max_bw;
603 /* start of optional data */
604 __be16 cac_timeout;
605} __packed __aligned(4);
606
607#define FWDB_MAGIC 0x52474442
608#define FWDB_VERSION 20
609
610struct fwdb_header {
611 __be32 magic;
612 __be32 version;
613 struct fwdb_country country[];
614} __packed __aligned(4);
615
616static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
617{
618 struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2));
619
620 if ((u8 *)rule + sizeof(rule->len) > data + size)
621 return false;
622
623 /* mandatory fields */
624 if (rule->len < offsetofend(struct fwdb_rule, max_bw))
625 return false;
626
627 return true;
628}
629
630static bool valid_country(const u8 *data, unsigned int size,
631 const struct fwdb_country *country)
632{
633 unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
634 struct fwdb_collection *coll = (void *)(data + ptr);
635 __be16 *rules_ptr;
636 unsigned int i;
637
638 /* make sure we can read len/n_rules */
639 if ((u8 *)coll + offsetofend(typeof(*coll), n_rules) > data + size)
640 return false;
641
642 /* make sure base struct and all rules fit */
643 if ((u8 *)coll + ALIGN(coll->len, 2) +
644 (coll->n_rules * 2) > data + size)
645 return false;
646
647 /* mandatory fields must exist */
648 if (coll->len < offsetofend(struct fwdb_collection, dfs_region))
649 return false;
650
651 rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
652
653 for (i = 0; i < coll->n_rules; i++) {
654 u16 rule_ptr = be16_to_cpu(rules_ptr[i]);
655
656 if (!valid_rule(data, size, rule_ptr))
657 return false;
658 }
659
660 return true;
661}
662
663#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB
664static struct key *builtin_regdb_keys;
665
666static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
667{
668 const u8 *end = p + buflen;
669 size_t plen;
670 key_ref_t key;
671
672 while (p < end) {
673 /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
674 * than 256 bytes in size.
675 */
676 if (end - p < 4)
677 goto dodgy_cert;
678 if (p[0] != 0x30 &&
679 p[1] != 0x82)
680 goto dodgy_cert;
681 plen = (p[2] << 8) | p[3];
682 plen += 4;
683 if (plen > end - p)
684 goto dodgy_cert;
685
686 key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
687 "asymmetric", NULL, p, plen,
688 ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
689 KEY_USR_VIEW | KEY_USR_READ),
690 KEY_ALLOC_NOT_IN_QUOTA |
691 KEY_ALLOC_BUILT_IN |
692 KEY_ALLOC_BYPASS_RESTRICTION);
693 if (IS_ERR(key)) {
694 pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
695 PTR_ERR(key));
696 } else {
697 pr_notice("Loaded X.509 cert '%s'\n",
698 key_ref_to_ptr(key)->description);
699 key_ref_put(key);
700 }
701 p += plen;
702 }
703
704 return;
705
706dodgy_cert:
707 pr_err("Problem parsing in-kernel X.509 certificate list\n");
708}
709
710static int __init load_builtin_regdb_keys(void)
711{
712 builtin_regdb_keys =
713 keyring_alloc(".builtin_regdb_keys",
714 KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
715 ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
716 KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
717 KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
718 if (IS_ERR(builtin_regdb_keys))
719 return PTR_ERR(builtin_regdb_keys);
720
721 pr_notice("Loading compiled-in X.509 certificates for regulatory database\n");
722
723#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS
724 load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len);
725#endif
726#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR
727 if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0')
728 load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len);
729#endif
730
731 return 0;
732}
733
734static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
735{
736 const struct firmware *sig;
737 bool result;
738
739 if (request_firmware(&sig, "regulatory.db.p7s", &reg_pdev->dev))
740 return false;
741
742 result = verify_pkcs7_signature(data, size, sig->data, sig->size,
743 builtin_regdb_keys,
744 VERIFYING_UNSPECIFIED_SIGNATURE,
745 NULL, NULL) == 0;
746
747 release_firmware(sig);
748
749 return result;
750}
751
752static void free_regdb_keyring(void)
753{
754 key_put(builtin_regdb_keys);
755}
756#else
757static int load_builtin_regdb_keys(void)
758{
759 return 0;
760}
761
762static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
763{
764 return true;
765}
766
767static void free_regdb_keyring(void)
768{
769}
770#endif /* CONFIG_CFG80211_REQUIRE_SIGNED_REGDB */
771
772static bool valid_regdb(const u8 *data, unsigned int size)
773{
774 const struct fwdb_header *hdr = (void *)data;
775 const struct fwdb_country *country;
776
777 if (size < sizeof(*hdr))
778 return false;
779
780 if (hdr->magic != cpu_to_be32(FWDB_MAGIC))
781 return false;
782
783 if (hdr->version != cpu_to_be32(FWDB_VERSION))
784 return false;
785
786 if (!regdb_has_valid_signature(data, size))
787 return false;
788
789 country = &hdr->country[0];
790 while ((u8 *)(country + 1) <= data + size) {
791 if (!country->coll_ptr)
792 break;
793 if (!valid_country(data, size, country))
794 return false;
795 country++;
796 }
797
798 return true;
799}
800
801static int regdb_query_country(const struct fwdb_header *db,
802 const struct fwdb_country *country)
803{
804 unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
805 struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
806 struct ieee80211_regdomain *regdom;
807 unsigned int size_of_regd;
808 unsigned int i;
809
810 size_of_regd =
811 sizeof(struct ieee80211_regdomain) +
812 coll->n_rules * sizeof(struct ieee80211_reg_rule);
813
814 regdom = kzalloc(size_of_regd, GFP_KERNEL);
815 if (!regdom)
816 return -ENOMEM;
817
818 regdom->n_reg_rules = coll->n_rules;
819 regdom->alpha2[0] = country->alpha2[0];
820 regdom->alpha2[1] = country->alpha2[1];
821 regdom->dfs_region = coll->dfs_region;
822
823 for (i = 0; i < regdom->n_reg_rules; i++) {
824 __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
825 unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
826 struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr);
827 struct ieee80211_reg_rule *rrule = &regdom->reg_rules[i];
828
829 rrule->freq_range.start_freq_khz = be32_to_cpu(rule->start);
830 rrule->freq_range.end_freq_khz = be32_to_cpu(rule->end);
831 rrule->freq_range.max_bandwidth_khz = be32_to_cpu(rule->max_bw);
832
833 rrule->power_rule.max_antenna_gain = 0;
834 rrule->power_rule.max_eirp = be16_to_cpu(rule->max_eirp);
835
836 rrule->flags = 0;
837 if (rule->flags & FWDB_FLAG_NO_OFDM)
838 rrule->flags |= NL80211_RRF_NO_OFDM;
839 if (rule->flags & FWDB_FLAG_NO_OUTDOOR)
840 rrule->flags |= NL80211_RRF_NO_OUTDOOR;
841 if (rule->flags & FWDB_FLAG_DFS)
842 rrule->flags |= NL80211_RRF_DFS;
843 if (rule->flags & FWDB_FLAG_NO_IR)
844 rrule->flags |= NL80211_RRF_NO_IR;
845 if (rule->flags & FWDB_FLAG_AUTO_BW)
846 rrule->flags |= NL80211_RRF_AUTO_BW;
847
848 rrule->dfs_cac_ms = 0;
849
850 /* handle optional data */
851 if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
852 rrule->dfs_cac_ms =
853 1000 * be16_to_cpu(rule->cac_timeout);
854 }
855
856 return reg_schedule_apply(regdom);
857}
858
859static int query_regdb(const char *alpha2)
860{
861 const struct fwdb_header *hdr = regdb;
862 const struct fwdb_country *country;
863
864 ASSERT_RTNL();
865
866 if (IS_ERR(regdb))
867 return PTR_ERR(regdb);
868
869 country = &hdr->country[0];
870 while (country->coll_ptr) {
871 if (alpha2_equal(alpha2, country->alpha2))
872 return regdb_query_country(regdb, country);
873 country++;
874 }
875
876 return -ENODATA;
877}
878
879static void regdb_fw_cb(const struct firmware *fw, void *context)
880{
881 int set_error = 0;
882 bool restore = true;
883 void *db;
884
885 if (!fw) {
886 pr_info("failed to load regulatory.db\n");
887 set_error = -ENODATA;
888 } else if (!valid_regdb(fw->data, fw->size)) {
889 pr_info("loaded regulatory.db is malformed or signature is missing/invalid\n");
890 set_error = -EINVAL;
891 }
892
893 rtnl_lock();
894 if (WARN_ON(regdb && !IS_ERR(regdb))) {
895 /* just restore and free new db */
896 } else if (set_error) {
897 regdb = ERR_PTR(set_error);
898 } else if (fw) {
899 db = kmemdup(fw->data, fw->size, GFP_KERNEL);
900 if (db) {
901 regdb = db;
902 restore = context && query_regdb(context);
903 } else {
904 restore = true;
905 }
906 }
907
908 if (restore)
909 restore_regulatory_settings(true);
910
911 rtnl_unlock();
912
913 kfree(context);
914
915 release_firmware(fw);
916}
917
918static int query_regdb_file(const char *alpha2)
919{
920 ASSERT_RTNL();
921
922 if (regdb)
923 return query_regdb(alpha2);
924
925 alpha2 = kmemdup(alpha2, 2, GFP_KERNEL);
926 if (!alpha2)
927 return -ENOMEM;
928
929 return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
930 &reg_pdev->dev, GFP_KERNEL,
931 (void *)alpha2, regdb_fw_cb);
932}
933
934int reg_reload_regdb(void)
935{
936 const struct firmware *fw;
937 void *db;
938 int err;
939
940 err = request_firmware(&fw, "regulatory.db", &reg_pdev->dev);
941 if (err)
942 return err;
943
944 if (!valid_regdb(fw->data, fw->size)) {
945 err = -ENODATA;
946 goto out;
947 }
948
949 db = kmemdup(fw->data, fw->size, GFP_KERNEL);
950 if (!db) {
951 err = -ENOMEM;
952 goto out;
953 }
954
955 rtnl_lock();
956 if (!IS_ERR_OR_NULL(regdb))
957 kfree(regdb);
958 regdb = db;
959 rtnl_unlock();
960
961 out:
962 release_firmware(fw);
963 return err;
964}
965
602static bool reg_query_database(struct regulatory_request *request) 966static bool reg_query_database(struct regulatory_request *request)
603{ 967{
604 /* query internal regulatory database (if it exists) */ 968 if (query_regdb_file(request->alpha2) == 0)
605 if (reg_query_builtin(request->alpha2) == 0)
606 return true; 969 return true;
607 970
608 if (call_crda(request->alpha2) == 0) 971 if (call_crda(request->alpha2) == 0)
@@ -3281,24 +3644,13 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
3281 } 3644 }
3282} 3645}
3283 3646
3284int __init regulatory_init(void) 3647static int __init regulatory_init_db(void)
3285{ 3648{
3286 int err = 0; 3649 int err;
3287
3288 reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
3289 if (IS_ERR(reg_pdev))
3290 return PTR_ERR(reg_pdev);
3291 3650
3292 spin_lock_init(&reg_requests_lock); 3651 err = load_builtin_regdb_keys();
3293 spin_lock_init(&reg_pending_beacons_lock); 3652 if (err)
3294 spin_lock_init(&reg_indoor_lock); 3653 return err;
3295
3296 reg_regdb_size_check();
3297
3298 rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
3299
3300 user_alpha2[0] = '9';
3301 user_alpha2[1] = '7';
3302 3654
3303 /* We always try to get an update for the static regdomain */ 3655 /* We always try to get an update for the static regdomain */
3304 err = regulatory_hint_core(cfg80211_world_regdom->alpha2); 3656 err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
@@ -3327,6 +3679,31 @@ int __init regulatory_init(void)
3327 3679
3328 return 0; 3680 return 0;
3329} 3681}
3682#ifndef MODULE
3683late_initcall(regulatory_init_db);
3684#endif
3685
3686int __init regulatory_init(void)
3687{
3688 reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
3689 if (IS_ERR(reg_pdev))
3690 return PTR_ERR(reg_pdev);
3691
3692 spin_lock_init(&reg_requests_lock);
3693 spin_lock_init(&reg_pending_beacons_lock);
3694 spin_lock_init(&reg_indoor_lock);
3695
3696 rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
3697
3698 user_alpha2[0] = '9';
3699 user_alpha2[1] = '7';
3700
3701#ifdef MODULE
3702 return regulatory_init_db();
3703#else
3704 return 0;
3705#endif
3706}
3330 3707
3331void regulatory_exit(void) 3708void regulatory_exit(void)
3332{ 3709{
@@ -3360,4 +3737,9 @@ void regulatory_exit(void)
3360 list_del(&reg_request->list); 3737 list_del(&reg_request->list);
3361 kfree(reg_request); 3738 kfree(reg_request);
3362 } 3739 }
3740
3741 if (!IS_ERR_OR_NULL(regdb))
3742 kfree(regdb);
3743
3744 free_regdb_keyring();
3363} 3745}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index ca7fedf2e7a1..9ceeb5f3a7cb 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,5 +1,8 @@
1#ifndef __NET_WIRELESS_REG_H 1#ifndef __NET_WIRELESS_REG_H
2#define __NET_WIRELESS_REG_H 2#define __NET_WIRELESS_REG_H
3
4#include <net/cfg80211.h>
5
3/* 6/*
4 * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> 7 * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
5 * 8 *
@@ -179,4 +182,15 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
179 * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1 182 * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
180 */ 183 */
181bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2); 184bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
185
186/**
187 * reg_reload_regdb - reload the regulatory.db firmware file
188 */
189int reg_reload_regdb(void);
190
191extern const u8 shipped_regdb_certs[];
192extern unsigned int shipped_regdb_certs_len;
193extern const u8 extra_regdb_certs[];
194extern unsigned int extra_regdb_certs_len;
195
182#endif /* __NET_WIRELESS_REG_H */ 196#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h
deleted file mode 100644
index 3279cfcefb0c..000000000000
--- a/net/wireless/regdb.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef __REGDB_H__
2#define __REGDB_H__
3
4/*
5 * Copyright 2009 John W. Linville <linville@tuxdriver.com>
6 *
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20extern const struct ieee80211_regdomain *reg_regdb[];
21extern int reg_regdb_size;
22
23#endif /* __REGDB_H__ */
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3dd05a08c60a..fdb3646274a5 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -956,7 +956,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
956 ev->rm.resp_ie_len = info->resp_ie_len; 956 ev->rm.resp_ie_len = info->resp_ie_len;
957 memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); 957 memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len);
958 ev->rm.bss = info->bss; 958 ev->rm.bss = info->bss;
959 ev->rm.authorized = info->authorized;
960 959
961 spin_lock_irqsave(&wdev->event_lock, flags); 960 spin_lock_irqsave(&wdev->event_lock, flags);
962 list_add_tail(&ev->list, &wdev->event_list); 961 list_add_tail(&ev->list, &wdev->event_list);
@@ -965,6 +964,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
965} 964}
966EXPORT_SYMBOL(cfg80211_roamed); 965EXPORT_SYMBOL(cfg80211_roamed);
967 966
967void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid)
968{
969 ASSERT_WDEV_LOCK(wdev);
970
971 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
972 return;
973
974 if (WARN_ON(!wdev->current_bss) ||
975 WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
976 return;
977
978 nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev,
979 bssid);
980}
981
982void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
983 gfp_t gfp)
984{
985 struct wireless_dev *wdev = dev->ieee80211_ptr;
986 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
987 struct cfg80211_event *ev;
988 unsigned long flags;
989
990 if (WARN_ON(!bssid))
991 return;
992
993 ev = kzalloc(sizeof(*ev), gfp);
994 if (!ev)
995 return;
996
997 ev->type = EVENT_PORT_AUTHORIZED;
998 memcpy(ev->pa.bssid, bssid, ETH_ALEN);
999
1000 /*
1001 * Use the wdev event list so that if there are pending
1002 * connected/roamed events, they will be reported first.
1003 */
1004 spin_lock_irqsave(&wdev->event_lock, flags);
1005 list_add_tail(&ev->list, &wdev->event_list);
1006 spin_unlock_irqrestore(&wdev->event_lock, flags);
1007 queue_work(cfg80211_wq, &rdev->event_work);
1008}
1009EXPORT_SYMBOL(cfg80211_port_authorized);
1010
968void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, 1011void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
969 size_t ie_len, u16 reason, bool from_ap) 1012 size_t ie_len, u16 reason, bool from_ap)
970{ 1013{
diff --git a/net/wireless/util.c b/net/wireless/util.c
index c1238d582fd1..c69160694b6c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -158,32 +158,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
158 case NL80211_BAND_2GHZ: 158 case NL80211_BAND_2GHZ:
159 want = 7; 159 want = 7;
160 for (i = 0; i < sband->n_bitrates; i++) { 160 for (i = 0; i < sband->n_bitrates; i++) {
161 if (sband->bitrates[i].bitrate == 10) { 161 switch (sband->bitrates[i].bitrate) {
162 case 10:
163 case 20:
164 case 55:
165 case 110:
162 sband->bitrates[i].flags |= 166 sband->bitrates[i].flags |=
163 IEEE80211_RATE_MANDATORY_B | 167 IEEE80211_RATE_MANDATORY_B |
164 IEEE80211_RATE_MANDATORY_G; 168 IEEE80211_RATE_MANDATORY_G;
165 want--; 169 want--;
166 } 170 break;
167 171 case 60:
168 if (sband->bitrates[i].bitrate == 20 || 172 case 120:
169 sband->bitrates[i].bitrate == 55 || 173 case 240:
170 sband->bitrates[i].bitrate == 110 ||
171 sband->bitrates[i].bitrate == 60 ||
172 sband->bitrates[i].bitrate == 120 ||
173 sband->bitrates[i].bitrate == 240) {
174 sband->bitrates[i].flags |= 174 sband->bitrates[i].flags |=
175 IEEE80211_RATE_MANDATORY_G; 175 IEEE80211_RATE_MANDATORY_G;
176 want--; 176 want--;
177 } 177 /* fall through */
178 178 default:
179 if (sband->bitrates[i].bitrate != 10 &&
180 sband->bitrates[i].bitrate != 20 &&
181 sband->bitrates[i].bitrate != 55 &&
182 sband->bitrates[i].bitrate != 110)
183 sband->bitrates[i].flags |= 179 sband->bitrates[i].flags |=
184 IEEE80211_RATE_ERP_G; 180 IEEE80211_RATE_ERP_G;
181 break;
182 }
185 } 183 }
186 WARN_ON(want != 0 && want != 3 && want != 6); 184 WARN_ON(want != 0 && want != 3);
187 break; 185 break;
188 case NL80211_BAND_60GHZ: 186 case NL80211_BAND_60GHZ:
189 /* check for mandatory HT MCS 1..4 */ 187 /* check for mandatory HT MCS 1..4 */
@@ -530,121 +528,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
530} 528}
531EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); 529EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr);
532 530
533int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
534 enum nl80211_iftype iftype,
535 const u8 *bssid, bool qos)
536{
537 struct ieee80211_hdr hdr;
538 u16 hdrlen, ethertype;
539 __le16 fc;
540 const u8 *encaps_data;
541 int encaps_len, skip_header_bytes;
542 int nh_pos, h_pos;
543 int head_need;
544
545 if (unlikely(skb->len < ETH_HLEN))
546 return -EINVAL;
547
548 nh_pos = skb_network_header(skb) - skb->data;
549 h_pos = skb_transport_header(skb) - skb->data;
550
551 /* convert Ethernet header to proper 802.11 header (based on
552 * operation mode) */
553 ethertype = (skb->data[12] << 8) | skb->data[13];
554 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
555
556 switch (iftype) {
557 case NL80211_IFTYPE_AP:
558 case NL80211_IFTYPE_AP_VLAN:
559 case NL80211_IFTYPE_P2P_GO:
560 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
561 /* DA BSSID SA */
562 memcpy(hdr.addr1, skb->data, ETH_ALEN);
563 memcpy(hdr.addr2, addr, ETH_ALEN);
564 memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
565 hdrlen = 24;
566 break;
567 case NL80211_IFTYPE_STATION:
568 case NL80211_IFTYPE_P2P_CLIENT:
569 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
570 /* BSSID SA DA */
571 memcpy(hdr.addr1, bssid, ETH_ALEN);
572 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
573 memcpy(hdr.addr3, skb->data, ETH_ALEN);
574 hdrlen = 24;
575 break;
576 case NL80211_IFTYPE_OCB:
577 case NL80211_IFTYPE_ADHOC:
578 /* DA SA BSSID */
579 memcpy(hdr.addr1, skb->data, ETH_ALEN);
580 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
581 memcpy(hdr.addr3, bssid, ETH_ALEN);
582 hdrlen = 24;
583 break;
584 default:
585 return -EOPNOTSUPP;
586 }
587
588 if (qos) {
589 fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
590 hdrlen += 2;
591 }
592
593 hdr.frame_control = fc;
594 hdr.duration_id = 0;
595 hdr.seq_ctrl = 0;
596
597 skip_header_bytes = ETH_HLEN;
598 if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) {
599 encaps_data = bridge_tunnel_header;
600 encaps_len = sizeof(bridge_tunnel_header);
601 skip_header_bytes -= 2;
602 } else if (ethertype >= ETH_P_802_3_MIN) {
603 encaps_data = rfc1042_header;
604 encaps_len = sizeof(rfc1042_header);
605 skip_header_bytes -= 2;
606 } else {
607 encaps_data = NULL;
608 encaps_len = 0;
609 }
610
611 skb_pull(skb, skip_header_bytes);
612 nh_pos -= skip_header_bytes;
613 h_pos -= skip_header_bytes;
614
615 head_need = hdrlen + encaps_len - skb_headroom(skb);
616
617 if (head_need > 0 || skb_cloned(skb)) {
618 head_need = max(head_need, 0);
619 if (head_need)
620 skb_orphan(skb);
621
622 if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC))
623 return -ENOMEM;
624 }
625
626 if (encaps_data) {
627 memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
628 nh_pos += encaps_len;
629 h_pos += encaps_len;
630 }
631
632 memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
633
634 nh_pos += hdrlen;
635 h_pos += hdrlen;
636
637 /* Update skb pointers to various headers since this modified frame
638 * is going to go through Linux networking code that may potentially
639 * need things like pointer to IP header. */
640 skb_reset_mac_header(skb);
641 skb_set_network_header(skb, nh_pos);
642 skb_set_transport_header(skb, h_pos);
643
644 return 0;
645}
646EXPORT_SYMBOL(ieee80211_data_from_8023);
647
648static void 531static void
649__frame_add_frag(struct sk_buff *skb, struct page *page, 532__frame_add_frag(struct sk_buff *skb, struct page *page,
650 void *ptr, int len, int size) 533 void *ptr, int len, int size)
@@ -964,6 +847,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
964 case EVENT_STOPPED: 847 case EVENT_STOPPED:
965 __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); 848 __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
966 break; 849 break;
850 case EVENT_PORT_AUTHORIZED:
851 __cfg80211_port_authorized(wdev, ev->pa.bssid);
852 break;
967 } 853 }
968 wdev_unlock(wdev); 854 wdev_unlock(wdev);
969 855
@@ -1368,13 +1254,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
1368} 1254}
1369EXPORT_SYMBOL(cfg80211_get_p2p_attr); 1255EXPORT_SYMBOL(cfg80211_get_p2p_attr);
1370 1256
1371static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) 1257static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext)
1372{ 1258{
1373 int i; 1259 int i;
1374 1260
1375 for (i = 0; i < n_ids; i++) 1261 /* Make sure array values are legal */
1376 if (ids[i] == id) 1262 if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION))
1263 return false;
1264
1265 i = 0;
1266 while (i < n_ids) {
1267 if (ids[i] == WLAN_EID_EXTENSION) {
1268 if (id_ext && (ids[i + 1] == id))
1269 return true;
1270
1271 i += 2;
1272 continue;
1273 }
1274
1275 if (ids[i] == id && !id_ext)
1377 return true; 1276 return true;
1277
1278 i++;
1279 }
1378 return false; 1280 return false;
1379} 1281}
1380 1282
@@ -1404,14 +1306,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
1404{ 1306{
1405 size_t pos = offset; 1307 size_t pos = offset;
1406 1308
1407 while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { 1309 while (pos < ielen) {
1310 u8 ext = 0;
1311
1312 if (ies[pos] == WLAN_EID_EXTENSION)
1313 ext = 2;
1314 if ((pos + ext) >= ielen)
1315 break;
1316
1317 if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext],
1318 ies[pos] == WLAN_EID_EXTENSION))
1319 break;
1320
1408 if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { 1321 if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
1409 pos = skip_ie(ies, ielen, pos); 1322 pos = skip_ie(ies, ielen, pos);
1410 1323
1411 while (pos < ielen && 1324 while (pos < ielen) {
1412 !ieee80211_id_in_list(after_ric, n_after_ric, 1325 if (ies[pos] == WLAN_EID_EXTENSION)
1413 ies[pos])) 1326 ext = 2;
1414 pos = skip_ie(ies, ielen, pos); 1327 else
1328 ext = 0;
1329
1330 if ((pos + ext) >= ielen)
1331 break;
1332
1333 if (!ieee80211_id_in_list(after_ric,
1334 n_after_ric,
1335 ies[pos + ext],
1336 ext == 2))
1337 pos = skip_ie(ies, ielen, pos);
1338 }
1415 } else { 1339 } else {
1416 pos = skip_ie(ies, ielen, pos); 1340 pos = skip_ie(ies, ielen, pos);
1417 } 1341 }
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ac095936552d..562cc11131f6 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -374,9 +374,11 @@ static void __x25_destroy_socket(struct sock *);
374/* 374/*
375 * handler for deferred kills. 375 * handler for deferred kills.
376 */ 376 */
377static void x25_destroy_timer(unsigned long data) 377static void x25_destroy_timer(struct timer_list *t)
378{ 378{
379 x25_destroy_socket_from_timer((struct sock *)data); 379 struct sock *sk = from_timer(sk, t, sk_timer);
380
381 x25_destroy_socket_from_timer(sk);
380} 382}
381 383
382/* 384/*
@@ -414,7 +416,6 @@ static void __x25_destroy_socket(struct sock *sk)
414 /* Defer: outstanding buffers */ 416 /* Defer: outstanding buffers */
415 sk->sk_timer.expires = jiffies + 10 * HZ; 417 sk->sk_timer.expires = jiffies + 10 * HZ;
416 sk->sk_timer.function = x25_destroy_timer; 418 sk->sk_timer.function = x25_destroy_timer;
417 sk->sk_timer.data = (unsigned long)sk;
418 add_timer(&sk->sk_timer); 419 add_timer(&sk->sk_timer);
419 } else { 420 } else {
420 /* drop last reference so sock_put will free */ 421 /* drop last reference so sock_put will free */
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 997ff7b2509b..ad1734d36ed7 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -103,7 +103,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
103 *vc_fac_mask |= X25_MASK_REVERSE; 103 *vc_fac_mask |= X25_MASK_REVERSE;
104 break; 104 break;
105 } 105 }
106 106 /*fall through */
107 case X25_FAC_THROUGHPUT: 107 case X25_FAC_THROUGHPUT:
108 facilities->throughput = p[1]; 108 facilities->throughput = p[1];
109 *vc_fac_mask |= X25_MASK_THROUGHPUT; 109 *vc_fac_mask |= X25_MASK_THROUGHPUT;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 7ac50098a375..3c12cae32001 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -345,6 +345,7 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp
345 345
346 case X25_RESET_REQUEST: 346 case X25_RESET_REQUEST:
347 x25_write_internal(sk, X25_RESET_CONFIRMATION); 347 x25_write_internal(sk, X25_RESET_CONFIRMATION);
348 /* fall through */
348 case X25_RESET_CONFIRMATION: { 349 case X25_RESET_CONFIRMATION: {
349 x25_stop_timer(sk); 350 x25_stop_timer(sk);
350 x25->condition = 0x00; 351 x25->condition = 0x00;
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index e0cd04d28352..a6a8ab09b914 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -36,7 +36,7 @@
36LIST_HEAD(x25_neigh_list); 36LIST_HEAD(x25_neigh_list);
37DEFINE_RWLOCK(x25_neigh_list_lock); 37DEFINE_RWLOCK(x25_neigh_list_lock);
38 38
39static void x25_t20timer_expiry(unsigned long); 39static void x25_t20timer_expiry(struct timer_list *);
40 40
41static void x25_transmit_restart_confirmation(struct x25_neigh *nb); 41static void x25_transmit_restart_confirmation(struct x25_neigh *nb);
42static void x25_transmit_restart_request(struct x25_neigh *nb); 42static void x25_transmit_restart_request(struct x25_neigh *nb);
@@ -49,9 +49,9 @@ static inline void x25_start_t20timer(struct x25_neigh *nb)
49 mod_timer(&nb->t20timer, jiffies + nb->t20); 49 mod_timer(&nb->t20timer, jiffies + nb->t20);
50} 50}
51 51
52static void x25_t20timer_expiry(unsigned long param) 52static void x25_t20timer_expiry(struct timer_list *t)
53{ 53{
54 struct x25_neigh *nb = (struct x25_neigh *)param; 54 struct x25_neigh *nb = from_timer(nb, t, t20timer);
55 55
56 x25_transmit_restart_request(nb); 56 x25_transmit_restart_request(nb);
57 57
@@ -252,7 +252,7 @@ void x25_link_device_up(struct net_device *dev)
252 return; 252 return;
253 253
254 skb_queue_head_init(&nb->queue); 254 skb_queue_head_init(&nb->queue);
255 setup_timer(&nb->t20timer, x25_t20timer_expiry, (unsigned long)nb); 255 timer_setup(&nb->t20timer, x25_t20timer_expiry, 0);
256 256
257 dev_hold(dev); 257 dev_hold(dev);
258 nb->dev = dev; 258 nb->dev = dev;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 5c5db1a36399..fa3461002b3e 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -26,18 +26,17 @@
26#include <net/tcp_states.h> 26#include <net/tcp_states.h>
27#include <net/x25.h> 27#include <net/x25.h>
28 28
29static void x25_heartbeat_expiry(unsigned long); 29static void x25_heartbeat_expiry(struct timer_list *t);
30static void x25_timer_expiry(unsigned long); 30static void x25_timer_expiry(struct timer_list *t);
31 31
32void x25_init_timers(struct sock *sk) 32void x25_init_timers(struct sock *sk)
33{ 33{
34 struct x25_sock *x25 = x25_sk(sk); 34 struct x25_sock *x25 = x25_sk(sk);
35 35
36 setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk); 36 timer_setup(&x25->timer, x25_timer_expiry, 0);
37 37
38 /* initialized by sock_init_data */ 38 /* initialized by sock_init_data */
39 sk->sk_timer.data = (unsigned long)sk; 39 sk->sk_timer.function = x25_heartbeat_expiry;
40 sk->sk_timer.function = &x25_heartbeat_expiry;
41} 40}
42 41
43void x25_start_heartbeat(struct sock *sk) 42void x25_start_heartbeat(struct sock *sk)
@@ -93,9 +92,9 @@ unsigned long x25_display_timer(struct sock *sk)
93 return x25->timer.expires - jiffies; 92 return x25->timer.expires - jiffies;
94} 93}
95 94
96static void x25_heartbeat_expiry(unsigned long param) 95static void x25_heartbeat_expiry(struct timer_list *t)
97{ 96{
98 struct sock *sk = (struct sock *)param; 97 struct sock *sk = from_timer(sk, t, sk_timer);
99 98
100 bh_lock_sock(sk); 99 bh_lock_sock(sk);
101 if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */ 100 if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */
@@ -160,9 +159,10 @@ static inline void x25_do_timer_expiry(struct sock * sk)
160 } 159 }
161} 160}
162 161
163static void x25_timer_expiry(unsigned long param) 162static void x25_timer_expiry(struct timer_list *t)
164{ 163{
165 struct sock *sk = (struct sock *)param; 164 struct x25_sock *x25 = from_timer(x25, t, timer);
165 struct sock *sk = &x25->sk;
166 166
167 bh_lock_sock(sk); 167 bh_lock_sock(sk);
168 if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */ 168 if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6eb228a70131..9542975eb2f9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -57,7 +57,7 @@ static __read_mostly seqcount_t xfrm_policy_hash_generation;
57static void xfrm_init_pmtu(struct dst_entry *dst); 57static void xfrm_init_pmtu(struct dst_entry *dst);
58static int stale_bundle(struct dst_entry *dst); 58static int stale_bundle(struct dst_entry *dst);
59static int xfrm_bundle_ok(struct xfrm_dst *xdst); 59static int xfrm_bundle_ok(struct xfrm_dst *xdst);
60static void xfrm_policy_queue_process(unsigned long arg); 60static void xfrm_policy_queue_process(struct timer_list *t);
61 61
62static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); 62static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
63static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 63static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
@@ -179,9 +179,9 @@ static inline unsigned long make_jiffies(long secs)
179 return secs*HZ; 179 return secs*HZ;
180} 180}
181 181
182static void xfrm_policy_timer(unsigned long data) 182static void xfrm_policy_timer(struct timer_list *t)
183{ 183{
184 struct xfrm_policy *xp = (struct xfrm_policy *)data; 184 struct xfrm_policy *xp = from_timer(xp, t, timer);
185 unsigned long now = get_seconds(); 185 unsigned long now = get_seconds();
186 long next = LONG_MAX; 186 long next = LONG_MAX;
187 int warn = 0; 187 int warn = 0;
@@ -267,10 +267,9 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
267 rwlock_init(&policy->lock); 267 rwlock_init(&policy->lock);
268 refcount_set(&policy->refcnt, 1); 268 refcount_set(&policy->refcnt, 1);
269 skb_queue_head_init(&policy->polq.hold_queue); 269 skb_queue_head_init(&policy->polq.hold_queue);
270 setup_timer(&policy->timer, xfrm_policy_timer, 270 timer_setup(&policy->timer, xfrm_policy_timer, 0);
271 (unsigned long)policy); 271 timer_setup(&policy->polq.hold_timer,
272 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, 272 xfrm_policy_queue_process, 0);
273 (unsigned long)policy);
274 } 273 }
275 return policy; 274 return policy;
276} 275}
@@ -1306,6 +1305,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1306 newp->xfrm_nr = old->xfrm_nr; 1305 newp->xfrm_nr = old->xfrm_nr;
1307 newp->index = old->index; 1306 newp->index = old->index;
1308 newp->type = old->type; 1307 newp->type = old->type;
1308 newp->family = old->family;
1309 memcpy(newp->xfrm_vec, old->xfrm_vec, 1309 memcpy(newp->xfrm_vec, old->xfrm_vec,
1310 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1310 newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1311 spin_lock_bh(&net->xfrm.xfrm_policy_lock); 1311 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1361,29 +1361,36 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1361 struct net *net = xp_net(policy); 1361 struct net *net = xp_net(policy);
1362 int nx; 1362 int nx;
1363 int i, error; 1363 int i, error;
1364 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1365 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1364 xfrm_address_t tmp; 1366 xfrm_address_t tmp;
1365 1367
1366 for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { 1368 for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
1367 struct xfrm_state *x; 1369 struct xfrm_state *x;
1368 xfrm_address_t *local; 1370 xfrm_address_t *remote = daddr;
1369 xfrm_address_t *remote; 1371 xfrm_address_t *local = saddr;
1370 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1372 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1371 1373
1372 remote = &tmpl->id.daddr; 1374 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1373 local = &tmpl->saddr; 1375 tmpl->mode == XFRM_MODE_BEET) {
1374 if (xfrm_addr_any(local, tmpl->encap_family)) { 1376 remote = &tmpl->id.daddr;
1375 error = xfrm_get_saddr(net, fl->flowi_oif, 1377 local = &tmpl->saddr;
1376 &tmp, remote, 1378 if (xfrm_addr_any(local, tmpl->encap_family)) {
1377 tmpl->encap_family, 0); 1379 error = xfrm_get_saddr(net, fl->flowi_oif,
1378 if (error) 1380 &tmp, remote,
1379 goto fail; 1381 tmpl->encap_family, 0);
1380 local = &tmp; 1382 if (error)
1383 goto fail;
1384 local = &tmp;
1385 }
1381 } 1386 }
1382 1387
1383 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1388 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1384 1389
1385 if (x && x->km.state == XFRM_STATE_VALID) { 1390 if (x && x->km.state == XFRM_STATE_VALID) {
1386 xfrm[nx++] = x; 1391 xfrm[nx++] = x;
1392 daddr = remote;
1393 saddr = local;
1387 continue; 1394 continue;
1388 } 1395 }
1389 if (x) { 1396 if (x) {
@@ -1851,12 +1858,12 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1851 return xdst; 1858 return xdst;
1852} 1859}
1853 1860
1854static void xfrm_policy_queue_process(unsigned long arg) 1861static void xfrm_policy_queue_process(struct timer_list *t)
1855{ 1862{
1856 struct sk_buff *skb; 1863 struct sk_buff *skb;
1857 struct sock *sk; 1864 struct sock *sk;
1858 struct dst_entry *dst; 1865 struct dst_entry *dst;
1859 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1866 struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
1860 struct net *net = xp_net(pol); 1867 struct net *net = xp_net(pol);
1861 struct xfrm_policy_queue *pq = &pol->polq; 1868 struct xfrm_policy_queue *pq = &pol->polq;
1862 struct flowi fl; 1869 struct flowi fl;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1f5cee2269af..065d89606888 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -556,7 +556,7 @@ out:
556 return HRTIMER_NORESTART; 556 return HRTIMER_NORESTART;
557} 557}
558 558
559static void xfrm_replay_timer_handler(unsigned long data); 559static void xfrm_replay_timer_handler(struct timer_list *t);
560 560
561struct xfrm_state *xfrm_state_alloc(struct net *net) 561struct xfrm_state *xfrm_state_alloc(struct net *net)
562{ 562{
@@ -574,8 +574,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
574 INIT_HLIST_NODE(&x->byspi); 574 INIT_HLIST_NODE(&x->byspi);
575 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, 575 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
576 CLOCK_BOOTTIME, HRTIMER_MODE_ABS); 576 CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
577 setup_timer(&x->rtimer, xfrm_replay_timer_handler, 577 timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
578 (unsigned long)x);
579 x->curlft.add_time = get_seconds(); 578 x->curlft.add_time = get_seconds();
580 x->lft.soft_byte_limit = XFRM_INF; 579 x->lft.soft_byte_limit = XFRM_INF;
581 x->lft.soft_packet_limit = XFRM_INF; 580 x->lft.soft_packet_limit = XFRM_INF;
@@ -1879,9 +1878,9 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1879} 1878}
1880EXPORT_SYMBOL(xfrm_state_walk_done); 1879EXPORT_SYMBOL(xfrm_state_walk_done);
1881 1880
1882static void xfrm_replay_timer_handler(unsigned long data) 1881static void xfrm_replay_timer_handler(struct timer_list *t)
1883{ 1882{
1884 struct xfrm_state *x = (struct xfrm_state *)data; 1883 struct xfrm_state *x = from_timer(x, t, rtimer);
1885 1884
1886 spin_lock(&x->lock); 1885 spin_lock(&x->lock);
1887 1886
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e44a0fed48dd..983b0233767b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -42,7 +42,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
42 return 0; 42 return 0;
43 43
44 algp = nla_data(rt); 44 algp = nla_data(rt);
45 if (nla_len(rt) < xfrm_alg_len(algp)) 45 if (nla_len(rt) < (int)xfrm_alg_len(algp))
46 return -EINVAL; 46 return -EINVAL;
47 47
48 switch (type) { 48 switch (type) {
@@ -68,7 +68,7 @@ static int verify_auth_trunc(struct nlattr **attrs)
68 return 0; 68 return 0;
69 69
70 algp = nla_data(rt); 70 algp = nla_data(rt);
71 if (nla_len(rt) < xfrm_alg_auth_len(algp)) 71 if (nla_len(rt) < (int)xfrm_alg_auth_len(algp))
72 return -EINVAL; 72 return -EINVAL;
73 73
74 algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; 74 algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -84,7 +84,7 @@ static int verify_aead(struct nlattr **attrs)
84 return 0; 84 return 0;
85 85
86 algp = nla_data(rt); 86 algp = nla_data(rt);
87 if (nla_len(rt) < aead_len(algp)) 87 if (nla_len(rt) < (int)aead_len(algp))
88 return -EINVAL; 88 return -EINVAL;
89 89
90 algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; 90 algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -130,7 +130,7 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
130 if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) 130 if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
131 return -EINVAL; 131 return -EINVAL;
132 132
133 if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && 133 if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
134 nla_len(rt) != sizeof(*rs)) 134 nla_len(rt) != sizeof(*rs))
135 return -EINVAL; 135 return -EINVAL;
136 } 136 }
@@ -404,7 +404,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
404 struct nlattr *rp) 404 struct nlattr *rp)
405{ 405{
406 struct xfrm_replay_state_esn *up; 406 struct xfrm_replay_state_esn *up;
407 int ulen; 407 unsigned int ulen;
408 408
409 if (!replay_esn || !rp) 409 if (!replay_esn || !rp)
410 return 0; 410 return 0;
@@ -414,7 +414,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
414 414
415 /* Check the overall length and the internal bitmap length to avoid 415 /* Check the overall length and the internal bitmap length to avoid
416 * potential overflow. */ 416 * potential overflow. */
417 if (nla_len(rp) < ulen || 417 if (nla_len(rp) < (int)ulen ||
418 xfrm_replay_state_esn_len(replay_esn) != ulen || 418 xfrm_replay_state_esn_len(replay_esn) != ulen ||
419 replay_esn->bmp_len != up->bmp_len) 419 replay_esn->bmp_len != up->bmp_len)
420 return -EINVAL; 420 return -EINVAL;
@@ -430,14 +430,14 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
430 struct nlattr *rta) 430 struct nlattr *rta)
431{ 431{
432 struct xfrm_replay_state_esn *p, *pp, *up; 432 struct xfrm_replay_state_esn *p, *pp, *up;
433 int klen, ulen; 433 unsigned int klen, ulen;
434 434
435 if (!rta) 435 if (!rta)
436 return 0; 436 return 0;
437 437
438 up = nla_data(rta); 438 up = nla_data(rta);
439 klen = xfrm_replay_state_esn_len(up); 439 klen = xfrm_replay_state_esn_len(up);
440 ulen = nla_len(rta) >= klen ? klen : sizeof(*up); 440 ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up);
441 441
442 p = kzalloc(klen, GFP_KERNEL); 442 p = kzalloc(klen, GFP_KERNEL);
443 if (!p) 443 if (!p)
@@ -458,9 +458,9 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
458 return 0; 458 return 0;
459} 459}
460 460
461static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) 461static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
462{ 462{
463 int len = 0; 463 unsigned int len = 0;
464 464
465 if (xfrm_ctx) { 465 if (xfrm_ctx) {
466 len += sizeof(struct xfrm_user_sec_ctx); 466 len += sizeof(struct xfrm_user_sec_ctx);
@@ -1032,7 +1032,7 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
1032 return -1; 1032 return -1;
1033} 1033}
1034 1034
1035static inline size_t xfrm_spdinfo_msgsize(void) 1035static inline unsigned int xfrm_spdinfo_msgsize(void)
1036{ 1036{
1037 return NLMSG_ALIGN(4) 1037 return NLMSG_ALIGN(4)
1038 + nla_total_size(sizeof(struct xfrmu_spdinfo)) 1038 + nla_total_size(sizeof(struct xfrmu_spdinfo))
@@ -1147,18 +1147,19 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
1147 u32 *flags = nlmsg_data(nlh); 1147 u32 *flags = nlmsg_data(nlh);
1148 u32 sportid = NETLINK_CB(skb).portid; 1148 u32 sportid = NETLINK_CB(skb).portid;
1149 u32 seq = nlh->nlmsg_seq; 1149 u32 seq = nlh->nlmsg_seq;
1150 int err;
1150 1151
1151 r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); 1152 r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC);
1152 if (r_skb == NULL) 1153 if (r_skb == NULL)
1153 return -ENOMEM; 1154 return -ENOMEM;
1154 1155
1155 if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) 1156 err = build_spdinfo(r_skb, net, sportid, seq, *flags);
1156 BUG(); 1157 BUG_ON(err < 0);
1157 1158
1158 return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); 1159 return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
1159} 1160}
1160 1161
1161static inline size_t xfrm_sadinfo_msgsize(void) 1162static inline unsigned int xfrm_sadinfo_msgsize(void)
1162{ 1163{
1163 return NLMSG_ALIGN(4) 1164 return NLMSG_ALIGN(4)
1164 + nla_total_size(sizeof(struct xfrmu_sadhinfo)) 1165 + nla_total_size(sizeof(struct xfrmu_sadhinfo))
@@ -1205,13 +1206,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
1205 u32 *flags = nlmsg_data(nlh); 1206 u32 *flags = nlmsg_data(nlh);
1206 u32 sportid = NETLINK_CB(skb).portid; 1207 u32 sportid = NETLINK_CB(skb).portid;
1207 u32 seq = nlh->nlmsg_seq; 1208 u32 seq = nlh->nlmsg_seq;
1209 int err;
1208 1210
1209 r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); 1211 r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC);
1210 if (r_skb == NULL) 1212 if (r_skb == NULL)
1211 return -ENOMEM; 1213 return -ENOMEM;
1212 1214
1213 if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) 1215 err = build_sadinfo(r_skb, net, sportid, seq, *flags);
1214 BUG(); 1216 BUG_ON(err < 0);
1215 1217
1216 return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); 1218 return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
1217} 1219}
@@ -1634,7 +1636,7 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s
1634 return copy_sec_ctx(xp->security, skb); 1636 return copy_sec_ctx(xp->security, skb);
1635 return 0; 1637 return 0;
1636} 1638}
1637static inline size_t userpolicy_type_attrsize(void) 1639static inline unsigned int userpolicy_type_attrsize(void)
1638{ 1640{
1639#ifdef CONFIG_XFRM_SUB_POLICY 1641#ifdef CONFIG_XFRM_SUB_POLICY
1640 return nla_total_size(sizeof(struct xfrm_userpolicy_type)); 1642 return nla_total_size(sizeof(struct xfrm_userpolicy_type));
@@ -1853,9 +1855,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
1853 return 0; 1855 return 0;
1854} 1856}
1855 1857
1856static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) 1858static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
1857{ 1859{
1858 size_t replay_size = x->replay_esn ? 1860 unsigned int replay_size = x->replay_esn ?
1859 xfrm_replay_state_esn_len(x->replay_esn) : 1861 xfrm_replay_state_esn_len(x->replay_esn) :
1860 sizeof(struct xfrm_replay_state); 1862 sizeof(struct xfrm_replay_state);
1861 1863
@@ -1960,8 +1962,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1960 c.seq = nlh->nlmsg_seq; 1962 c.seq = nlh->nlmsg_seq;
1961 c.portid = nlh->nlmsg_pid; 1963 c.portid = nlh->nlmsg_pid;
1962 1964
1963 if (build_aevent(r_skb, x, &c) < 0) 1965 err = build_aevent(r_skb, x, &c);
1964 BUG(); 1966 BUG_ON(err < 0);
1967
1965 err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); 1968 err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
1966 spin_unlock_bh(&x->lock); 1969 spin_unlock_bh(&x->lock);
1967 xfrm_state_put(x); 1970 xfrm_state_put(x);
@@ -2324,8 +2327,8 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff
2324 return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); 2327 return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk);
2325} 2328}
2326 2329
2327static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma, 2330static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma,
2328 int with_encp) 2331 int with_encp)
2329{ 2332{
2330 return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) 2333 return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id))
2331 + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) 2334 + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0)
@@ -2388,6 +2391,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2388{ 2391{
2389 struct net *net = &init_net; 2392 struct net *net = &init_net;
2390 struct sk_buff *skb; 2393 struct sk_buff *skb;
2394 int err;
2391 2395
2392 skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap), 2396 skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap),
2393 GFP_ATOMIC); 2397 GFP_ATOMIC);
@@ -2395,8 +2399,8 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2395 return -ENOMEM; 2399 return -ENOMEM;
2396 2400
2397 /* build migrate */ 2401 /* build migrate */
2398 if (build_migrate(skb, m, num_migrate, k, sel, encap, dir, type) < 0) 2402 err = build_migrate(skb, m, num_migrate, k, sel, encap, dir, type);
2399 BUG(); 2403 BUG_ON(err < 0);
2400 2404
2401 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); 2405 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
2402} 2406}
@@ -2572,7 +2576,7 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
2572 mutex_unlock(&net->xfrm.xfrm_cfg_mutex); 2576 mutex_unlock(&net->xfrm.xfrm_cfg_mutex);
2573} 2577}
2574 2578
2575static inline size_t xfrm_expire_msgsize(void) 2579static inline unsigned int xfrm_expire_msgsize(void)
2576{ 2580{
2577 return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) 2581 return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
2578 + nla_total_size(sizeof(struct xfrm_mark)); 2582 + nla_total_size(sizeof(struct xfrm_mark));
@@ -2623,13 +2627,14 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event
2623{ 2627{
2624 struct net *net = xs_net(x); 2628 struct net *net = xs_net(x);
2625 struct sk_buff *skb; 2629 struct sk_buff *skb;
2630 int err;
2626 2631
2627 skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); 2632 skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
2628 if (skb == NULL) 2633 if (skb == NULL)
2629 return -ENOMEM; 2634 return -ENOMEM;
2630 2635
2631 if (build_aevent(skb, x, c) < 0) 2636 err = build_aevent(skb, x, c);
2632 BUG(); 2637 BUG_ON(err < 0);
2633 2638
2634 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); 2639 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
2635} 2640}
@@ -2660,9 +2665,9 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
2660 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); 2665 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
2661} 2666}
2662 2667
2663static inline size_t xfrm_sa_len(struct xfrm_state *x) 2668static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
2664{ 2669{
2665 size_t l = 0; 2670 unsigned int l = 0;
2666 if (x->aead) 2671 if (x->aead)
2667 l += nla_total_size(aead_len(x->aead)); 2672 l += nla_total_size(aead_len(x->aead));
2668 if (x->aalg) { 2673 if (x->aalg) {
@@ -2707,8 +2712,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
2707 struct xfrm_usersa_id *id; 2712 struct xfrm_usersa_id *id;
2708 struct nlmsghdr *nlh; 2713 struct nlmsghdr *nlh;
2709 struct sk_buff *skb; 2714 struct sk_buff *skb;
2710 int len = xfrm_sa_len(x); 2715 unsigned int len = xfrm_sa_len(x);
2711 int headlen, err; 2716 unsigned int headlen;
2717 int err;
2712 2718
2713 headlen = sizeof(*p); 2719 headlen = sizeof(*p);
2714 if (c->event == XFRM_MSG_DELSA) { 2720 if (c->event == XFRM_MSG_DELSA) {
@@ -2782,8 +2788,8 @@ static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c
2782 2788
2783} 2789}
2784 2790
2785static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, 2791static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x,
2786 struct xfrm_policy *xp) 2792 struct xfrm_policy *xp)
2787{ 2793{
2788 return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) 2794 return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire))
2789 + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) 2795 + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2835,13 +2841,14 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
2835{ 2841{
2836 struct net *net = xs_net(x); 2842 struct net *net = xs_net(x);
2837 struct sk_buff *skb; 2843 struct sk_buff *skb;
2844 int err;
2838 2845
2839 skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); 2846 skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC);
2840 if (skb == NULL) 2847 if (skb == NULL)
2841 return -ENOMEM; 2848 return -ENOMEM;
2842 2849
2843 if (build_acquire(skb, x, xt, xp) < 0) 2850 err = build_acquire(skb, x, xt, xp);
2844 BUG(); 2851 BUG_ON(err < 0);
2845 2852
2846 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); 2853 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
2847} 2854}
@@ -2906,7 +2913,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
2906 return xp; 2913 return xp;
2907} 2914}
2908 2915
2909static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) 2916static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp)
2910{ 2917{
2911 return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) 2918 return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire))
2912 + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) 2919 + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2950,26 +2957,28 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
2950{ 2957{
2951 struct net *net = xp_net(xp); 2958 struct net *net = xp_net(xp);
2952 struct sk_buff *skb; 2959 struct sk_buff *skb;
2960 int err;
2953 2961
2954 skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); 2962 skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC);
2955 if (skb == NULL) 2963 if (skb == NULL)
2956 return -ENOMEM; 2964 return -ENOMEM;
2957 2965
2958 if (build_polexpire(skb, xp, dir, c) < 0) 2966 err = build_polexpire(skb, xp, dir, c);
2959 BUG(); 2967 BUG_ON(err < 0);
2960 2968
2961 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); 2969 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
2962} 2970}
2963 2971
2964static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) 2972static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
2965{ 2973{
2966 int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); 2974 unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
2967 struct net *net = xp_net(xp); 2975 struct net *net = xp_net(xp);
2968 struct xfrm_userpolicy_info *p; 2976 struct xfrm_userpolicy_info *p;
2969 struct xfrm_userpolicy_id *id; 2977 struct xfrm_userpolicy_id *id;
2970 struct nlmsghdr *nlh; 2978 struct nlmsghdr *nlh;
2971 struct sk_buff *skb; 2979 struct sk_buff *skb;
2972 int headlen, err; 2980 unsigned int headlen;
2981 int err;
2973 2982
2974 headlen = sizeof(*p); 2983 headlen = sizeof(*p);
2975 if (c->event == XFRM_MSG_DELPOLICY) { 2984 if (c->event == XFRM_MSG_DELPOLICY) {
@@ -3076,7 +3085,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct
3076 3085
3077} 3086}
3078 3087
3079static inline size_t xfrm_report_msgsize(void) 3088static inline unsigned int xfrm_report_msgsize(void)
3080{ 3089{
3081 return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); 3090 return NLMSG_ALIGN(sizeof(struct xfrm_user_report));
3082} 3091}
@@ -3110,18 +3119,19 @@ static int xfrm_send_report(struct net *net, u8 proto,
3110 struct xfrm_selector *sel, xfrm_address_t *addr) 3119 struct xfrm_selector *sel, xfrm_address_t *addr)
3111{ 3120{
3112 struct sk_buff *skb; 3121 struct sk_buff *skb;
3122 int err;
3113 3123
3114 skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); 3124 skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC);
3115 if (skb == NULL) 3125 if (skb == NULL)
3116 return -ENOMEM; 3126 return -ENOMEM;
3117 3127
3118 if (build_report(skb, proto, sel, addr) < 0) 3128 err = build_report(skb, proto, sel, addr);
3119 BUG(); 3129 BUG_ON(err < 0);
3120 3130
3121 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); 3131 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
3122} 3132}
3123 3133
3124static inline size_t xfrm_mapping_msgsize(void) 3134static inline unsigned int xfrm_mapping_msgsize(void)
3125{ 3135{
3126 return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); 3136 return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping));
3127} 3137}
@@ -3157,6 +3167,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
3157{ 3167{
3158 struct net *net = xs_net(x); 3168 struct net *net = xs_net(x);
3159 struct sk_buff *skb; 3169 struct sk_buff *skb;
3170 int err;
3160 3171
3161 if (x->id.proto != IPPROTO_ESP) 3172 if (x->id.proto != IPPROTO_ESP)
3162 return -EINVAL; 3173 return -EINVAL;
@@ -3168,8 +3179,8 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
3168 if (skb == NULL) 3179 if (skb == NULL)
3169 return -ENOMEM; 3180 return -ENOMEM;
3170 3181
3171 if (build_mapping(skb, x, ipaddr, sport) < 0) 3182 err = build_mapping(skb, x, ipaddr, sport);
3172 BUG(); 3183 BUG_ON(err < 0);
3173 3184
3174 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); 3185 return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
3175} 3186}