aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2017-01-10 11:39:52 -0500
committerMark Brown <broonie@kernel.org>2017-01-10 11:39:52 -0500
commit9c1852b459f04f6309e40d1d167512b0a5598529 (patch)
treea74526f1fe3f9826b81327f0b9ba9b98a543f87c /net
parent9b41da80e09128574f09bed8dc5a5fc6f72a8239 (diff)
parent7ce7d89f48834cefece7804d38fc5d85382edf77 (diff)
Merge tag 'v4.10-rc1' into asoc-samsung
Linux 4.10-rc1
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/fddi.c11
-rw-r--r--net/802/hippi.c16
-rw-r--r--net/8021q/vlan.c11
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/Kconfig8
-rw-r--r--net/atm/br2684.c6
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c17
-rw-r--r--net/atm/mpoa_caches.c43
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_addr.c2
-rw-r--r--net/ax25/ax25_dev.c2
-rw-r--r--net/ax25/ax25_ds_in.c2
-rw-r--r--net/ax25/ax25_ds_subr.c2
-rw-r--r--net/ax25/ax25_ds_timer.c2
-rw-r--r--net/ax25/ax25_iface.c2
-rw-r--r--net/ax25/ax25_in.c2
-rw-r--r--net/ax25/ax25_ip.c2
-rw-r--r--net/ax25/ax25_out.c2
-rw-r--r--net/ax25/ax25_route.c2
-rw-r--r--net/ax25/ax25_std_in.c2
-rw-r--r--net/ax25/ax25_std_subr.c2
-rw-r--r--net/ax25/ax25_std_timer.c2
-rw-r--r--net/ax25/ax25_subr.c2
-rw-r--r--net/ax25/ax25_timer.c2
-rw-r--r--net/ax25/ax25_uid.c2
-rw-r--r--net/batman-adv/Kconfig2
-rw-r--r--net/batman-adv/bat_iv_ogm.c58
-rw-r--r--net/batman-adv/bat_v.c8
-rw-r--r--net/batman-adv/bat_v_elp.c71
-rw-r--r--net/batman-adv/bat_v_ogm.c75
-rw-r--r--net/batman-adv/debugfs.c26
-rw-r--r--net/batman-adv/distributed-arp-table.c84
-rw-r--r--net/batman-adv/fragmentation.c82
-rw-r--r--net/batman-adv/fragmentation.h2
-rw-r--r--net/batman-adv/gateway_client.c9
-rw-r--r--net/batman-adv/hard-interface.c225
-rw-r--r--net/batman-adv/hard-interface.h21
-rw-r--r--net/batman-adv/hash.h30
-rw-r--r--net/batman-adv/icmp_socket.c5
-rw-r--r--net/batman-adv/log.c4
-rw-r--r--net/batman-adv/log.h12
-rw-r--r--net/batman-adv/main.c16
-rw-r--r--net/batman-adv/main.h28
-rw-r--r--net/batman-adv/multicast.c70
-rw-r--r--net/batman-adv/multicast.h6
-rw-r--r--net/batman-adv/netlink.c31
-rw-r--r--net/batman-adv/network-coding.c43
-rw-r--r--net/batman-adv/originator.c23
-rw-r--r--net/batman-adv/packet.h12
-rw-r--r--net/batman-adv/routing.c180
-rw-r--r--net/batman-adv/send.c419
-rw-r--r--net/batman-adv/send.h11
-rw-r--r--net/batman-adv/soft-interface.c27
-rw-r--r--net/batman-adv/sysfs.c53
-rw-r--r--net/batman-adv/tp_meter.c6
-rw-r--r--net/batman-adv/translation-table.c39
-rw-r--r--net/batman-adv/tvlv.c5
-rw-r--r--net/batman-adv/types.h45
-rw-r--r--net/bluetooth/Makefile2
-rw-r--r--net/bluetooth/bnep/netdev.c3
-rw-r--r--net/bluetooth/l2cap_core.c6
-rw-r--r--net/bluetooth/smp.c85
-rw-r--r--net/bluetooth/smp.h1
-rw-r--r--net/bridge/br_device.c7
-rw-r--r--net/bridge/br_fdb.c10
-rw-r--r--net/bridge/br_ioctl.c2
-rw-r--r--net/bridge/br_multicast.c188
-rw-r--r--net/bridge/br_netfilter_hooks.c28
-rw-r--r--net/bridge/br_netfilter_ipv6.c2
-rw-r--r--net/bridge/br_netlink.c34
-rw-r--r--net/bridge/br_private.h11
-rw-r--r--net/bridge/br_private_stp.h1
-rw-r--r--net/bridge/br_stp.c65
-rw-r--r--net/bridge/br_stp_if.c14
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/br_sysfs_br.c40
-rw-r--r--net/bridge/netfilter/Kconfig1
-rw-r--r--net/bridge/netfilter/ebt_arpreply.c3
-rw-r--r--net/bridge/netfilter/ebt_log.c11
-rw-r--r--net/bridge/netfilter/ebt_nflog.c6
-rw-r--r--net/bridge/netfilter/ebt_redirect.c6
-rw-r--r--net/bridge/netfilter/ebtable_broute.c2
-rw-r--r--net/bridge/netfilter/ebtables.c8
-rw-r--r--net/bridge/netfilter/nf_log_bridge.c17
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c2
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c30
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/cfcnfg.c9
-rw-r--r--net/can/bcm.c32
-rw-r--r--net/can/gw.c2
-rw-r--r--net/ceph/auth.c4
-rw-r--r--net/ceph/auth_x.c197
-rw-r--r--net/ceph/auth_x.h3
-rw-r--r--net/ceph/crush/mapper.c2
-rw-r--r--net/ceph/crypto.c461
-rw-r--r--net/ceph/crypto.h26
-rw-r--r--net/ceph/messenger.c19
-rw-r--r--net/ceph/mon_client.c12
-rw-r--r--net/ceph/osd_client.c39
-rw-r--r--net/compat.c2
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c74
-rw-r--r--net/core/dev.c892
-rw-r--r--net/core/devlink.c100
-rw-r--r--net/core/drop_monitor.c21
-rw-r--r--net/core/ethtool.c97
-rw-r--r--net/core/fib_rules.c78
-rw-r--r--net/core/filter.c344
-rw-r--r--net/core/flow.c60
-rw-r--r--net/core/flow_dissector.c33
-rw-r--r--net/core/gen_estimator.c296
-rw-r--r--net/core/gen_stats.c20
-rw-r--r--net/core/lwt_bpf.c396
-rw-r--r--net/core/lwtunnel.c17
-rw-r--r--net/core/neighbour.c16
-rw-r--r--net/core/net-sysfs.c65
-rw-r--r--net/core/net_namespace.c77
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c22
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/secure_seq.c11
-rw-r--r--net/core/skbuff.c74
-rw-r--r--net/core/sock.c86
-rw-r--r--net/core/stream.c28
-rw-r--r--net/core/sysctl_net_core.c5
-rw-r--r--net/core/utils.c2
-rw-r--r--net/dccp/ipv4.c8
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/decnet/af_decnet.c16
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_fib.c2
-rw-r--r--net/decnet/dn_table.c2
-rw-r--r--net/decnet/sysctl_net_decnet.c2
-rw-r--r--net/dsa/slave.c16
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/hsr/hsr_device.c1
-rw-r--r--net/hsr/hsr_netlink.c23
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h2
-rw-r--r--net/ieee802154/Makefile2
-rw-r--r--net/ieee802154/netlink.c24
-rw-r--r--net/ieee802154/nl-phy.c6
-rw-r--r--net/ieee802154/nl802154.c44
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c14
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fib_frontend.c5
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/fib_trie.c157
-rw-r--r--net/ipv4/fou.c23
-rw-r--r--net/ipv4/icmp.c8
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c20
-rw-r--r--net/ipv4/inet_diag.c73
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c39
-rw-r--r--net/ipv4/ip_sockglue.c36
-rw-r--r--net/ipv4/ip_tunnel.c10
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipip.c4
-rw-r--r--net/ipv4/ipmr.c17
-rw-r--r--net/ipv4/netfilter/Kconfig14
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/arp_tables.c44
-rw-r--r--net/ipv4/netfilter/ip_tables.c44
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c6
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c11
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c4
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c8
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c10
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c145
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c41
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c163
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c241
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c15
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c14
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c4
-rw-r--r--net/ipv4/ping.c11
-rw-r--r--net/ipv4/raw.c35
-rw-r--r--net/ipv4/raw_diag.c266
-rw-r--r--net/ipv4/route.c119
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c118
-rw-r--r--net/ipv4/tcp_bbr.c32
-rw-r--r--net/ipv4/tcp_cong.c14
-rw-r--r--net/ipv4/tcp_dctcp.c1
-rw-r--r--net/ipv4/tcp_highspeed.c11
-rw-r--r--net/ipv4/tcp_hybla.c1
-rw-r--r--net/ipv4/tcp_illinois.c10
-rw-r--r--net/ipv4/tcp_input.c33
-rw-r--r--net/ipv4/tcp_ipv4.c26
-rw-r--r--net/ipv4/tcp_lp.c1
-rw-r--r--net/ipv4/tcp_metrics.c23
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c190
-rw-r--r--net/ipv4/tcp_scalable.c15
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/tcp_vegas.c1
-rw-r--r--net/ipv4/tcp_veno.c10
-rw-r--r--net/ipv4/tcp_westwood.c1
-rw-r--r--net/ipv4/tcp_yeah.c10
-rw-r--r--net/ipv4/udp.c256
-rw-r--r--net/ipv4/udplite.c3
-rw-r--r--net/ipv6/Kconfig35
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c50
-rw-r--r--net/ipv6/af_inet6.c20
-rw-r--r--net/ipv6/ah6.c5
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/esp6.c5
-rw-r--r--net/ipv6/exthdrs.c270
-rw-r--r--net/ipv6/icmp.c9
-rw-r--r--net/ipv6/ila/ila_lwt.c92
-rw-r--r--net/ipv6/ila/ila_xlat.c43
-rw-r--r--net/ipv6/inet6_connection_sock.c9
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c6
-rw-r--r--net/ipv6/ip6_output.c16
-rw-r--r--net/ipv6/ip6_tunnel.c14
-rw-r--r--net/ipv6/ip6_vti.c28
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ipcomp6.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c20
-rw-r--r--net/ipv6/mip6.c2
-rw-r--r--net/ipv6/ndisc.c29
-rw-r--r--net/ipv6/netfilter.c1
-rw-r--r--net/ipv6/netfilter/Kconfig14
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c45
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c2
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c23
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c8
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c3
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c146
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c42
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c151
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c275
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c6
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c14
-rw-r--r--net/ipv6/reassembly.c7
-rw-r--r--net/ipv6/route.c31
-rw-r--r--net/ipv6/seg6.c495
-rw-r--r--net/ipv6/seg6_hmac.c484
-rw-r--r--net/ipv6/seg6_iptunnel.c431
-rw-r--r--net/ipv6/sit.c18
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/ipv6/udp.c50
-rw-r--r--net/ipv6/udplite.c3
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/ipx/af_ipx.c4
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c2
-rw-r--r--net/irda/irda_device.c2
-rw-r--r--net/irda/irlan/irlan_eth.c4
-rw-r--r--net/irda/irnet/irnet.h3
-rw-r--r--net/irda/irnet/irnet_ppp.h11
-rw-r--r--net/irda/irnetlink.c22
-rw-r--r--net/irda/irproc.c1
-rw-r--r--net/iucv/af_iucv.c34
-rw-r--r--net/iucv/iucv.c124
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/l2tp/l2tp_core.h10
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/l2tp/l2tp_netlink.c59
-rw-r--r--net/l2tp/l2tp_ppp.c60
-rw-r--r--net/lapb/lapb_iface.c2
-rw-r--r--net/lapb/lapb_in.c2
-rw-r--r--net/lapb/lapb_out.c2
-rw-r--r--net/lapb/lapb_subr.c2
-rw-r--r--net/lapb/lapb_timer.c2
-rw-r--r--net/llc/af_llc.c24
-rw-r--r--net/mac80211/Makefile3
-rw-r--r--net/mac80211/aes_cmac.c8
-rw-r--r--net/mac80211/aes_cmac.h4
-rw-r--r--net/mac80211/agg-rx.c8
-rw-r--r--net/mac80211/cfg.c35
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_netdev.c11
-rw-r--r--net/mac80211/debugfs_sta.c9
-rw-r--r--net/mac80211/fils_aead.c342
-rw-r--r--net/mac80211/fils_aead.h19
-rw-r--r--net/mac80211/ieee80211_i.h30
-rw-r--r--net/mac80211/iface.c31
-rw-r--r--net/mac80211/key.c3
-rw-r--r--net/mac80211/main.c5
-rw-r--r--net/mac80211/mlme.c81
-rw-r--r--net/mac80211/rx.c13
-rw-r--r--net/mac80211/sta_info.c37
-rw-r--r--net/mac80211/sta_info.h4
-rw-r--r--net/mac80211/tx.c55
-rw-r--r--net/mac80211/util.c61
-rw-r--r--net/mac80211/wme.c23
-rw-r--r--net/mac80211/wpa.c2
-rw-r--r--net/mac802154/Makefile2
-rw-r--r--net/mac802154/util.c4
-rw-r--r--net/mpls/mpls_iptunnel.c5
-rw-r--r--net/netfilter/Kconfig56
-rw-r--r--net/netfilter/Makefile24
-rw-r--r--net/netfilter/core.c100
-rw-r--r--net/netfilter/ipset/Kconfig9
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h31
-rw-r--r--net/netfilter/ipset/ip_set_core.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h254
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c315
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c10
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c37
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c27
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c54
-rw-r--r--net/netfilter/nf_conntrack_core.c21
-rw-r--r--net/netfilter/nf_conntrack_proto.c158
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c101
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c100
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c103
-rw-r--r--net/netfilter/nf_conntrack_standalone.c10
-rw-r--r--net/netfilter/nf_dup_netdev.c35
-rw-r--r--net/netfilter/nf_internals.h5
-rw-r--r--net/netfilter/nf_log_common.c28
-rw-r--r--net/netfilter/nf_log_netdev.c81
-rw-r--r--net/netfilter/nf_nat_core.c12
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c36
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c40
-rw-r--r--net/netfilter/nf_nat_proto_udplite.c35
-rw-r--r--net/netfilter/nf_queue.c36
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c718
-rw-r--r--net/netfilter/nf_tables_core.c91
-rw-r--r--net/netfilter/nf_tables_trace.c8
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_log.c7
-rw-r--r--net/netfilter/nfnetlink_queue.c10
-rw-r--r--net/netfilter/nft_bitwise.c13
-rw-r--r--net/netfilter/nft_byteorder.c13
-rw-r--r--net/netfilter/nft_cmp.c16
-rw-r--r--net/netfilter/nft_counter.c223
-rw-r--r--net/netfilter/nft_ct.c76
-rw-r--r--net/netfilter/nft_dynset.c13
-rw-r--r--net/netfilter/nft_fib.c159
-rw-r--r--net/netfilter/nft_fib_inet.c82
-rw-r--r--net/netfilter/nft_fwd_netdev.c4
-rw-r--r--net/netfilter/nft_hash.c6
-rw-r--r--net/netfilter/nft_immediate.c16
-rw-r--r--net/netfilter/nft_log.c5
-rw-r--r--net/netfilter/nft_lookup.c18
-rw-r--r--net/netfilter/nft_masq.c6
-rw-r--r--net/netfilter/nft_meta.c11
-rw-r--r--net/netfilter/nft_nat.c11
-rw-r--r--net/netfilter/nft_numgen.c2
-rw-r--r--net/netfilter/nft_objref.c226
-rw-r--r--net/netfilter/nft_payload.c120
-rw-r--r--net/netfilter/nft_queue.c2
-rw-r--r--net/netfilter/nft_quota.c158
-rw-r--r--net/netfilter/nft_range.c13
-rw-r--r--net/netfilter/nft_redir.c6
-rw-r--r--net/netfilter/nft_reject_inet.c18
-rw-r--r--net/netfilter/nft_rt.c153
-rw-r--r--net/netfilter/nft_set_hash.c25
-rw-r--r--net/netfilter/nft_set_rbtree.c12
-rw-r--r--net/netfilter/x_tables.c60
-rw-r--r--net/netfilter/xt_AUDIT.c10
-rw-r--r--net/netfilter/xt_CONNSECMARK.c4
-rw-r--r--net/netfilter/xt_CT.c6
-rw-r--r--net/netfilter/xt_LOG.c6
-rw-r--r--net/netfilter/xt_NETMAP.c31
-rw-r--r--net/netfilter/xt_NFLOG.c6
-rw-r--r--net/netfilter/xt_NFQUEUE.c4
-rw-r--r--net/netfilter/xt_RATEEST.c4
-rw-r--r--net/netfilter/xt_REDIRECT.c16
-rw-r--r--net/netfilter/xt_TCPMSS.c4
-rw-r--r--net/netfilter/xt_TEE.c4
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_addrtype.c10
-rw-r--r--net/netfilter/xt_bpf.c96
-rw-r--r--net/netfilter/xt_cluster.c2
-rw-r--r--net/netfilter/xt_connbytes.c4
-rw-r--r--net/netfilter/xt_connlabel.c6
-rw-r--r--net/netfilter/xt_connlimit.c14
-rw-r--r--net/netfilter/xt_connmark.c8
-rw-r--r--net/netfilter/xt_conntrack.c12
-rw-r--r--net/netfilter/xt_devgroup.c4
-rw-r--r--net/netfilter/xt_dscp.c2
-rw-r--r--net/netfilter/xt_hashlimit.c2
-rw-r--r--net/netfilter/xt_helper.c4
-rw-r--r--net/netfilter/xt_ipvs.c4
-rw-r--r--net/netfilter/xt_multiport.c52
-rw-r--r--net/netfilter/xt_nat.c18
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/netfilter/xt_osf.c10
-rw-r--r--net/netfilter/xt_owner.c2
-rw-r--r--net/netfilter/xt_pkttype.c4
-rw-r--r--net/netfilter/xt_policy.c4
-rw-r--r--net/netfilter/xt_rateest.c28
-rw-r--r--net/netfilter/xt_recent.c12
-rw-r--r--net/netfilter/xt_set.c38
-rw-r--r--net/netfilter/xt_socket.c336
-rw-r--r--net/netfilter/xt_state.c4
-rw-r--r--net/netfilter/xt_time.c2
-rw-r--r--net/netlabel/netlabel_calipso.c21
-rw-r--r--net/netlabel/netlabel_cipso_v4.c22
-rw-r--r--net/netlabel/netlabel_mgmt.c21
-rw-r--r--net/netlabel/netlabel_unlabeled.c21
-rw-r--r--net/netlink/af_netlink.c10
-rw-r--r--net/netlink/genetlink.c323
-rw-r--r--net/nfc/netlink.c34
-rw-r--r--net/openvswitch/actions.c129
-rw-r--r--net/openvswitch/conntrack.c8
-rw-r--r--net/openvswitch/datapath.c35
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c105
-rw-r--r--net/openvswitch/flow.h22
-rw-r--r--net/openvswitch/flow_netlink.c178
-rw-r--r--net/openvswitch/vport-internal_dev.c10
-rw-r--r--net/openvswitch/vport-netdev.c10
-rw-r--r--net/openvswitch/vport.c48
-rw-r--r--net/openvswitch/vport.h3
-rw-r--r--net/packet/af_packet.c76
-rw-r--r--net/phonet/pep-gprs.c12
-rw-r--r--net/phonet/pep.c9
-rw-r--r--net/phonet/pn_dev.c2
-rw-r--r--net/rds/af_rds.c4
-rw-r--r--net/rds/connection.c18
-rw-r--r--net/rds/message.c1
-rw-r--r--net/rds/rdma.c2
-rw-r--r--net/rds/rdma_transport.c5
-rw-r--r--net/rds/rds.h12
-rw-r--r--net/rds/recv.c36
-rw-r--r--net/rds/send.c9
-rw-r--r--net/rds/tcp.c22
-rw-r--r--net/rds/tcp_connect.c14
-rw-r--r--net/rds/tcp_listen.c31
-rw-r--r--net/rds/tcp_send.c3
-rw-r--r--net/rds/threads.c3
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rose/rose_route.c2
-rw-r--r--net/rxrpc/af_rxrpc.c11
-rw-r--r--net/rxrpc/conn_client.c4
-rw-r--r--net/rxrpc/input.c7
-rw-r--r--net/sched/act_api.c9
-rw-r--r--net/sched/act_bpf.c20
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c16
-rw-r--r--net/sched/act_mirred.c85
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c23
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c23
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c19
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c24
-rw-r--r--net/sched/cls_bpf.c49
-rw-r--r--net/sched/cls_flower.c303
-rw-r--r--net/sched/em_ipset.c17
-rw-r--r--net/sched/em_meta.c9
-rw-r--r--net/sched/sch_api.c13
-rw-r--r--net/sched/sch_cbq.c8
-rw-r--r--net/sched/sch_drr.c6
-rw-r--r--net/sched/sch_fq.c18
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_htb.c6
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_qfq.c8
-rw-r--r--net/sched/sch_teql.c5
-rw-r--r--net/sctp/associola.c12
-rw-r--r--net/sctp/bind_addr.c3
-rw-r--r--net/sctp/chunk.c32
-rw-r--r--net/sctp/endpointola.c5
-rw-r--r--net/sctp/input.c93
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/output.c441
-rw-r--r--net/sctp/outqueue.c4
-rw-r--r--net/sctp/protocol.c40
-rw-r--r--net/sctp/socket.c14
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/socket.c40
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c3
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c2
-rw-r--r--net/sunrpc/netns.h2
-rw-r--r--net/sunrpc/stats.c10
-rw-r--r--net/sunrpc/sunrpc_syms.c2
-rw-r--r--net/sunrpc/svc.c14
-rw-r--r--net/sunrpc/svcauth.c3
-rw-r--r--net/sunrpc/svcsock.c26
-rw-r--r--net/sunrpc/sysctl.c2
-rw-r--r--net/sunrpc/xprt.c3
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c94
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c36
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c5
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c21
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c116
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c94
-rw-r--r--net/sunrpc/xprtrdma/transport.c34
-rw-r--r--net/sunrpc/xprtrdma/verbs.c37
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h31
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/switchdev/switchdev.c5
-rw-r--r--net/tipc/core.c2
-rw-r--r--net/tipc/core.h2
-rw-r--r--net/tipc/msg.c4
-rw-r--r--net/tipc/msg.h2
-rw-r--r--net/tipc/netlink.c27
-rw-r--r--net/tipc/netlink_compat.c25
-rw-r--r--net/tipc/socket.c498
-rw-r--r--net/unix/af_unix.c12
-rw-r--r--net/vmw_vsock/virtio_transport.c58
-rw-r--r--net/vmw_vsock/virtio_transport_common.c27
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c30
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c30
-rw-r--r--net/wimax/stack.c22
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/core.c33
-rw-r--r--net/wireless/core.h5
-rw-r--r--net/wireless/lib80211_crypt_tkip.c2
-rw-r--r--net/wireless/mesh.c2
-rw-r--r--net/wireless/mlme.c18
-rw-r--r--net/wireless/nl80211.c551
-rw-r--r--net/wireless/rdev-ops.h24
-rw-r--r--net/wireless/sme.c16
-rw-r--r--net/wireless/trace.h37
-rw-r--r--net/wireless/util.c125
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/x25/sysctl_net_x25.c2
-rw-r--r--net/x25/x25_link.c2
-rw-r--r--net/xfrm/xfrm_policy.c1
-rw-r--r--net/xfrm/xfrm_state.c12
-rw-r--r--net/xfrm/xfrm_user.c2
562 files changed, 15596 insertions, 6987 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 7b9219022418..1bb496ea997e 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -10,7 +10,7 @@
10 * v 1.0 03/22/99 10 * v 1.0 03/22/99
11 */ 11 */
12 12
13#include <asm/uaccess.h> 13#include <linux/uaccess.h>
14#include <linux/types.h> 14#include <linux/types.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/string.h> 16#include <linux/string.h>
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 7d3a0af954e8..6356623fc238 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -141,15 +141,6 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
141 141
142EXPORT_SYMBOL(fddi_type_trans); 142EXPORT_SYMBOL(fddi_type_trans);
143 143
144int fddi_change_mtu(struct net_device *dev, int new_mtu)
145{
146 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
147 return -EINVAL;
148 dev->mtu = new_mtu;
149 return 0;
150}
151EXPORT_SYMBOL(fddi_change_mtu);
152
153static const struct header_ops fddi_header_ops = { 144static const struct header_ops fddi_header_ops = {
154 .create = fddi_header, 145 .create = fddi_header,
155}; 146};
@@ -161,6 +152,8 @@ static void fddi_setup(struct net_device *dev)
161 dev->type = ARPHRD_FDDI; 152 dev->type = ARPHRD_FDDI;
162 dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ 153 dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */
163 dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */ 154 dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */
155 dev->min_mtu = FDDI_K_SNAP_HLEN;
156 dev->max_mtu = FDDI_K_SNAP_DLEN;
164 dev->addr_len = FDDI_K_ALEN; 157 dev->addr_len = FDDI_K_ALEN;
165 dev->tx_queue_len = 100; /* Long queues on FDDI */ 158 dev->tx_queue_len = 100; /* Long queues on FDDI */
166 dev->flags = IFF_BROADCAST | IFF_MULTICAST; 159 dev->flags = IFF_BROADCAST | IFF_MULTICAST;
diff --git a/net/802/hippi.c b/net/802/hippi.c
index ade1a52cdcff..4460606e9c36 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -34,7 +34,7 @@
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <net/arp.h> 35#include <net/arp.h>
36#include <net/sock.h> 36#include <net/sock.h>
37#include <asm/uaccess.h> 37#include <linux/uaccess.h>
38 38
39/* 39/*
40 * Create the HIPPI MAC header for an arbitrary protocol layer 40 * Create the HIPPI MAC header for an arbitrary protocol layer
@@ -116,18 +116,6 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
116 116
117EXPORT_SYMBOL(hippi_type_trans); 117EXPORT_SYMBOL(hippi_type_trans);
118 118
119int hippi_change_mtu(struct net_device *dev, int new_mtu)
120{
121 /*
122 * HIPPI's got these nice large MTUs.
123 */
124 if ((new_mtu < 68) || (new_mtu > 65280))
125 return -EINVAL;
126 dev->mtu = new_mtu;
127 return 0;
128}
129EXPORT_SYMBOL(hippi_change_mtu);
130
131/* 119/*
132 * For HIPPI we will actually use the lower 4 bytes of the hardware 120 * For HIPPI we will actually use the lower 4 bytes of the hardware
133 * address as the I-FIELD rather than the actual hardware address. 121 * address as the I-FIELD rather than the actual hardware address.
@@ -174,6 +162,8 @@ static void hippi_setup(struct net_device *dev)
174 dev->type = ARPHRD_HIPPI; 162 dev->type = ARPHRD_HIPPI;
175 dev->hard_header_len = HIPPI_HLEN; 163 dev->hard_header_len = HIPPI_HLEN;
176 dev->mtu = 65280; 164 dev->mtu = 65280;
165 dev->min_mtu = 68;
166 dev->max_mtu = 65280;
177 dev->addr_len = HIPPI_ALEN; 167 dev->addr_len = HIPPI_ALEN;
178 dev->tx_queue_len = 25 /* 5 */; 168 dev->tx_queue_len = 25 /* 5 */;
179 memset(dev->broadcast, 0xFF, HIPPI_ALEN); 169 memset(dev->broadcast, 0xFF, HIPPI_ALEN);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index f2531ad66b68..467069b73ce1 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -34,7 +34,7 @@
34#include <net/rtnetlink.h> 34#include <net/rtnetlink.h>
35#include <net/net_namespace.h> 35#include <net/net_namespace.h>
36#include <net/netns/generic.h> 36#include <net/netns/generic.h>
37#include <asm/uaccess.h> 37#include <linux/uaccess.h>
38 38
39#include <linux/if_vlan.h> 39#include <linux/if_vlan.h>
40#include "vlan.h" 40#include "vlan.h"
@@ -44,7 +44,7 @@
44 44
45/* Global VLAN variables */ 45/* Global VLAN variables */
46 46
47int vlan_net_id __read_mostly; 47unsigned int vlan_net_id __read_mostly;
48 48
49const char vlan_fullname[] = "802.1Q VLAN Support"; 49const char vlan_fullname[] = "802.1Q VLAN Support";
50const char vlan_version[] = DRV_VERSION; 50const char vlan_version[] = DRV_VERSION;
@@ -515,8 +515,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
515 return -EFAULT; 515 return -EFAULT;
516 516
517 /* Null terminate this sucker, just in case. */ 517 /* Null terminate this sucker, just in case. */
518 args.device1[23] = 0; 518 args.device1[sizeof(args.device1) - 1] = 0;
519 args.u.device2[23] = 0; 519 args.u.device2[sizeof(args.u.device2) - 1] = 0;
520 520
521 rtnl_lock(); 521 rtnl_lock();
522 522
@@ -571,8 +571,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
571 err = -EPERM; 571 err = -EPERM;
572 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 572 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
573 break; 573 break;
574 if ((args.u.name_type >= 0) && 574 if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) {
575 (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
576 struct vlan_net *vn; 575 struct vlan_net *vn;
577 576
578 vn = net_generic(net, vlan_net_id); 577 vn = net_generic(net, vlan_net_id);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index cc1557978066..df8bd65dd370 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -159,7 +159,7 @@ void vlan_netlink_fini(void);
159 159
160extern struct rtnl_link_ops vlan_link_ops; 160extern struct rtnl_link_ops vlan_link_ops;
161 161
162extern int vlan_net_id; 162extern unsigned int vlan_net_id;
163 163
164struct proc_dir_entry; 164struct proc_dir_entry;
165 165
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index fbfacd51aa34..10da6c588bf8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -826,5 +826,8 @@ void vlan_setup(struct net_device *dev)
826 dev->destructor = vlan_dev_free; 826 dev->destructor = vlan_dev_free;
827 dev->ethtool_ops = &vlan_ethtool_ops; 827 dev->ethtool_ops = &vlan_ethtool_ops;
828 828
829 dev->min_mtu = 0;
830 dev->max_mtu = ETH_MAX_MTU;
831
829 eth_zero_addr(dev->broadcast); 832 eth_zero_addr(dev->broadcast);
830} 833}
diff --git a/net/Kconfig b/net/Kconfig
index 7b6cd340b72b..a1005007224c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -402,6 +402,14 @@ config LWTUNNEL
402 weight tunnel endpoint. Tunnel encapsulation parameters are stored 402 weight tunnel endpoint. Tunnel encapsulation parameters are stored
403 with light weight tunnel state associated with fib routes. 403 with light weight tunnel state associated with fib routes.
404 404
405config LWTUNNEL_BPF
406 bool "Execute BPF program as route nexthop action"
407 depends on LWTUNNEL
408 default y if LWTUNNEL=y
409 ---help---
410 Allows to run BPF programs as a nexthop action following a route
411 lookup for incoming and outgoing packets.
412
405config DST_CACHE 413config DST_CACHE
406 bool 414 bool
407 default n 415 default n
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index aa0047c5c467..fca84e111c89 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -620,14 +620,12 @@ error:
620static const struct net_device_ops br2684_netdev_ops = { 620static const struct net_device_ops br2684_netdev_ops = {
621 .ndo_start_xmit = br2684_start_xmit, 621 .ndo_start_xmit = br2684_start_xmit,
622 .ndo_set_mac_address = br2684_mac_addr, 622 .ndo_set_mac_address = br2684_mac_addr,
623 .ndo_change_mtu = eth_change_mtu,
624 .ndo_validate_addr = eth_validate_addr, 623 .ndo_validate_addr = eth_validate_addr,
625}; 624};
626 625
627static const struct net_device_ops br2684_netdev_ops_routed = { 626static const struct net_device_ops br2684_netdev_ops_routed = {
628 .ndo_start_xmit = br2684_start_xmit, 627 .ndo_start_xmit = br2684_start_xmit,
629 .ndo_set_mac_address = br2684_mac_addr, 628 .ndo_set_mac_address = br2684_mac_addr,
630 .ndo_change_mtu = eth_change_mtu
631}; 629};
632 630
633static void br2684_setup(struct net_device *netdev) 631static void br2684_setup(struct net_device *netdev)
@@ -651,7 +649,9 @@ static void br2684_setup_routed(struct net_device *netdev)
651 netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */ 649 netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */
652 netdev->netdev_ops = &br2684_netdev_ops_routed; 650 netdev->netdev_ops = &br2684_netdev_ops_routed;
653 netdev->addr_len = 0; 651 netdev->addr_len = 0;
654 netdev->mtu = 1500; 652 netdev->mtu = ETH_DATA_LEN;
653 netdev->min_mtu = 0;
654 netdev->max_mtu = ETH_MAX_MTU;
655 netdev->type = ARPHRD_PPP; 655 netdev->type = ARPHRD_PPP;
656 netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; 656 netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
657 netdev->tx_queue_len = 100; 657 netdev->tx_queue_len = 100;
diff --git a/net/atm/common.c b/net/atm/common.c
index 6dc12305799e..a3ca922d307b 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
630 goto out; 630 goto out;
631 skb->dev = NULL; /* for paths shared with net_device interfaces */ 631 skb->dev = NULL; /* for paths shared with net_device interfaces */
632 ATM_SKB(skb)->atm_options = vcc->atm_options; 632 ATM_SKB(skb)->atm_options = vcc->atm_options;
633 if (copy_from_iter(skb_put(skb, size), size, &m->msg_iter) != size) { 633 if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
634 kfree_skb(skb); 634 kfree_skb(skb);
635 error = -EFAULT; 635 error = -EFAULT;
636 goto out; 636 goto out;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5d2693826afb..019557d0a11d 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -111,9 +111,9 @@ static inline void lec_arp_put(struct lec_arp_table *entry)
111} 111}
112 112
113static struct lane2_ops lane2_ops = { 113static struct lane2_ops lane2_ops = {
114 lane2_resolve, /* resolve, spec 3.1.3 */ 114 .resolve = lane2_resolve, /* spec 3.1.3 */
115 lane2_associate_req, /* associate_req, spec 3.1.4 */ 115 .associate_req = lane2_associate_req, /* spec 3.1.4 */
116 NULL /* associate indicator, spec 3.1.5 */ 116 .associate_indicator = NULL /* spec 3.1.5 */
117}; 117};
118 118
119static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 119static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -544,15 +544,6 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
544 return 0; 544 return 0;
545} 545}
546 546
547/* shamelessly stolen from drivers/net/net_init.c */
548static int lec_change_mtu(struct net_device *dev, int new_mtu)
549{
550 if ((new_mtu < 68) || (new_mtu > 18190))
551 return -EINVAL;
552 dev->mtu = new_mtu;
553 return 0;
554}
555
556static void lec_set_multicast_list(struct net_device *dev) 547static void lec_set_multicast_list(struct net_device *dev)
557{ 548{
558 /* 549 /*
@@ -565,7 +556,6 @@ static const struct net_device_ops lec_netdev_ops = {
565 .ndo_open = lec_open, 556 .ndo_open = lec_open,
566 .ndo_stop = lec_close, 557 .ndo_stop = lec_close,
567 .ndo_start_xmit = lec_start_xmit, 558 .ndo_start_xmit = lec_start_xmit,
568 .ndo_change_mtu = lec_change_mtu,
569 .ndo_tx_timeout = lec_tx_timeout, 559 .ndo_tx_timeout = lec_tx_timeout,
570 .ndo_set_rx_mode = lec_set_multicast_list, 560 .ndo_set_rx_mode = lec_set_multicast_list,
571}; 561};
@@ -742,6 +732,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
742 if (!dev_lec[i]) 732 if (!dev_lec[i])
743 return -ENOMEM; 733 return -ENOMEM;
744 dev_lec[i]->netdev_ops = &lec_netdev_ops; 734 dev_lec[i]->netdev_ops = &lec_netdev_ops;
735 dev_lec[i]->max_mtu = 18190;
745 snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); 736 snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
746 if (register_netdev(dev_lec[i])) { 737 if (register_netdev(dev_lec[i])) {
747 free_netdev(dev_lec[i]); 738 free_netdev(dev_lec[i]);
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 9e60e74c807d..a89fdebeffda 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -535,33 +535,32 @@ static void eg_destroy_cache(struct mpoa_client *mpc)
535 535
536 536
537static const struct in_cache_ops ingress_ops = { 537static const struct in_cache_ops ingress_ops = {
538 in_cache_add_entry, /* add_entry */ 538 .add_entry = in_cache_add_entry,
539 in_cache_get, /* get */ 539 .get = in_cache_get,
540 in_cache_get_with_mask, /* get_with_mask */ 540 .get_with_mask = in_cache_get_with_mask,
541 in_cache_get_by_vcc, /* get_by_vcc */ 541 .get_by_vcc = in_cache_get_by_vcc,
542 in_cache_put, /* put */ 542 .put = in_cache_put,
543 in_cache_remove_entry, /* remove_entry */ 543 .remove_entry = in_cache_remove_entry,
544 cache_hit, /* cache_hit */ 544 .cache_hit = cache_hit,
545 clear_count_and_expired, /* clear_count */ 545 .clear_count = clear_count_and_expired,
546 check_resolving_entries, /* check_resolving */ 546 .check_resolving = check_resolving_entries,
547 refresh_entries, /* refresh */ 547 .refresh = refresh_entries,
548 in_destroy_cache /* destroy_cache */ 548 .destroy_cache = in_destroy_cache
549}; 549};
550 550
551static const struct eg_cache_ops egress_ops = { 551static const struct eg_cache_ops egress_ops = {
552 eg_cache_add_entry, /* add_entry */ 552 .add_entry = eg_cache_add_entry,
553 eg_cache_get_by_cache_id, /* get_by_cache_id */ 553 .get_by_cache_id = eg_cache_get_by_cache_id,
554 eg_cache_get_by_tag, /* get_by_tag */ 554 .get_by_tag = eg_cache_get_by_tag,
555 eg_cache_get_by_vcc, /* get_by_vcc */ 555 .get_by_vcc = eg_cache_get_by_vcc,
556 eg_cache_get_by_src_ip, /* get_by_src_ip */ 556 .get_by_src_ip = eg_cache_get_by_src_ip,
557 eg_cache_put, /* put */ 557 .put = eg_cache_put,
558 eg_cache_remove_entry, /* remove_entry */ 558 .remove_entry = eg_cache_remove_entry,
559 update_eg_cache_entry, /* update */ 559 .update = update_eg_cache_entry,
560 clear_expired, /* clear_expired */ 560 .clear_expired = clear_expired,
561 eg_destroy_cache /* destroy_cache */ 561 .destroy_cache = eg_destroy_cache
562}; 562};
563 563
564
565void atm_mpoa_init_cache(struct mpoa_client *mpc) 564void atm_mpoa_init_cache(struct mpoa_client *mpc)
566{ 565{
567 mpc->in_ops = &ingress_ops; 566 mpc->in_ops = &ingress_ops;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2fdebabbfacd..90fcf5fc2e0a 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -32,7 +32,7 @@
32#include <linux/if_arp.h> 32#include <linux/if_arp.h>
33#include <linux/skbuff.h> 33#include <linux/skbuff.h>
34#include <net/sock.h> 34#include <net/sock.h>
35#include <asm/uaccess.h> 35#include <linux/uaccess.h>
36#include <linux/fcntl.h> 36#include <linux/fcntl.h>
37#include <linux/termios.h> /* For TIOCINQ/OUTQ */ 37#include <linux/termios.h> /* For TIOCINQ/OUTQ */
38#include <linux/mm.h> 38#include <linux/mm.h>
diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
index e7c9b0ea17a1..ac2542b7be88 100644
--- a/net/ax25/ax25_addr.c
+++ b/net/ax25/ax25_addr.c
@@ -21,7 +21,7 @@
21#include <linux/netdevice.h> 21#include <linux/netdevice.h>
22#include <linux/skbuff.h> 22#include <linux/skbuff.h>
23#include <net/sock.h> 23#include <net/sock.h>
24#include <asm/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/fcntl.h> 25#include <linux/fcntl.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/interrupt.h> 27#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 3d106767b272..9a3a301e1e2f 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -23,7 +23,7 @@
23#include <linux/if_arp.h> 23#include <linux/if_arp.h>
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <net/sock.h> 25#include <net/sock.h>
26#include <asm/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c
index 9bd31e88aeca..891596e74278 100644
--- a/net/ax25/ax25_ds_in.c
+++ b/net/ax25/ax25_ds_in.c
@@ -22,7 +22,7 @@
22#include <linux/skbuff.h> 22#include <linux/skbuff.h>
23#include <net/sock.h> 23#include <net/sock.h>
24#include <net/tcp_states.h> 24#include <net/tcp_states.h>
25#include <asm/uaccess.h> 25#include <linux/uaccess.h>
26#include <linux/fcntl.h> 26#include <linux/fcntl.h>
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/interrupt.h> 28#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index e05bd57b5afd..28827e81ba2b 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -23,7 +23,7 @@
23#include <linux/netdevice.h> 23#include <linux/netdevice.h>
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <net/sock.h> 25#include <net/sock.h>
26#include <asm/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5237dff6941d..5fb2104b7304 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -24,7 +24,7 @@
24#include <linux/netdevice.h> 24#include <linux/netdevice.h>
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26#include <net/sock.h> 26#include <net/sock.h>
27#include <asm/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/fcntl.h> 28#include <linux/fcntl.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/interrupt.h> 30#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 7f16e8a931b2..8c07c28569e4 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -23,7 +23,7 @@
23#include <linux/netdevice.h> 23#include <linux/netdevice.h>
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <net/sock.h> 25#include <net/sock.h>
26#include <asm/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index bb5a0e4e98d9..860752639b1a 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -25,7 +25,7 @@
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26#include <net/sock.h> 26#include <net/sock.h>
27#include <net/tcp_states.h> 27#include <net/tcp_states.h>
28#include <asm/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/fcntl.h> 29#include <linux/fcntl.h>
30#include <linux/mm.h> 30#include <linux/mm.h>
31#include <linux/interrupt.h> 31#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 2fa3be965101..183b1c583d56 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -23,7 +23,7 @@
23#include <linux/if_arp.h> 23#include <linux/if_arp.h>
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <net/sock.h> 25#include <net/sock.h>
26#include <asm/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28#include <linux/termios.h> /* For TIOCINQ/OUTQ */ 28#include <linux/termios.h> /* For TIOCINQ/OUTQ */
29#include <linux/mm.h> 29#include <linux/mm.h>
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 8ddd41baa81c..b11a5f466fcc 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -25,7 +25,7 @@
25#include <linux/netdevice.h> 25#include <linux/netdevice.h>
26#include <linux/skbuff.h> 26#include <linux/skbuff.h>
27#include <net/sock.h> 27#include <net/sock.h>
28#include <asm/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/fcntl.h> 29#include <linux/fcntl.h>
30#include <linux/mm.h> 30#include <linux/mm.h>
31#include <linux/interrupt.h> 31#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index d39097737e38..e1fda27cb27c 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -31,7 +31,7 @@
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/spinlock.h> 32#include <linux/spinlock.h>
33#include <net/sock.h> 33#include <net/sock.h>
34#include <asm/uaccess.h> 34#include <linux/uaccess.h>
35#include <linux/fcntl.h> 35#include <linux/fcntl.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/interrupt.h> 37#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c
index 3fbf8f7b2cf4..8632b86e843e 100644
--- a/net/ax25/ax25_std_in.c
+++ b/net/ax25/ax25_std_in.c
@@ -29,7 +29,7 @@
29#include <linux/skbuff.h> 29#include <linux/skbuff.h>
30#include <net/sock.h> 30#include <net/sock.h>
31#include <net/tcp_states.h> 31#include <net/tcp_states.h>
32#include <asm/uaccess.h> 32#include <linux/uaccess.h>
33#include <linux/fcntl.h> 33#include <linux/fcntl.h>
34#include <linux/mm.h> 34#include <linux/mm.h>
35#include <linux/interrupt.h> 35#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c
index 8b66a41e538f..94bd06396a43 100644
--- a/net/ax25/ax25_std_subr.c
+++ b/net/ax25/ax25_std_subr.c
@@ -20,7 +20,7 @@
20#include <linux/netdevice.h> 20#include <linux/netdevice.h>
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <net/sock.h> 22#include <net/sock.h>
23#include <asm/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/fcntl.h> 24#include <linux/fcntl.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 2c0d6ef66f9d..30bbc675261d 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -24,7 +24,7 @@
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <net/sock.h> 25#include <net/sock.h>
26#include <net/tcp_states.h> 26#include <net/tcp_states.h>
27#include <asm/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/fcntl.h> 28#include <linux/fcntl.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/interrupt.h> 30#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 655a7d4c96e1..4855d18a8511 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -25,7 +25,7 @@
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26#include <net/sock.h> 26#include <net/sock.h>
27#include <net/tcp_states.h> 27#include <net/tcp_states.h>
28#include <asm/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/fcntl.h> 29#include <linux/fcntl.h>
30#include <linux/mm.h> 30#include <linux/mm.h>
31#include <linux/interrupt.h> 31#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index c3cffa79bafb..23a6f38a80bf 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -28,7 +28,7 @@
28#include <linux/netdevice.h> 28#include <linux/netdevice.h>
29#include <linux/skbuff.h> 29#include <linux/skbuff.h>
30#include <net/sock.h> 30#include <net/sock.h>
31#include <asm/uaccess.h> 31#include <linux/uaccess.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/interrupt.h> 34#include <linux/interrupt.h>
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 4ad2fb7bcd35..0403b0def7e6 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -25,7 +25,7 @@
25#include <linux/if_arp.h> 25#include <linux/if_arp.h>
26#include <linux/skbuff.h> 26#include <linux/skbuff.h>
27#include <net/sock.h> 27#include <net/sock.h>
28#include <asm/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/fcntl.h> 29#include <linux/fcntl.h>
30#include <linux/mm.h> 30#include <linux/mm.h>
31#include <linux/interrupt.h> 31#include <linux/interrupt.h>
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index f20742cbae6d..b73b96a2854b 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -17,7 +17,7 @@ config BATMAN_ADV
17 17
18config BATMAN_ADV_BATMAN_V 18config BATMAN_ADV_BATMAN_V
19 bool "B.A.T.M.A.N. V protocol (experimental)" 19 bool "B.A.T.M.A.N. V protocol (experimental)"
20 depends on BATMAN_ADV && CFG80211=y || (CFG80211=m && BATMAN_ADV=m) 20 depends on BATMAN_ADV && !(CFG80211=m && BATMAN_ADV=y)
21 default n 21 default n
22 help 22 help
23 This option enables the B.A.T.M.A.N. V protocol, the successor 23 This option enables the B.A.T.M.A.N. V protocol, the successor
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index e2d18d0b1f06..f00f666e2ccd 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -698,7 +698,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
698 698
699 forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); 699 forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
700 if (!forw_packet_aggr->skb) { 700 if (!forw_packet_aggr->skb) {
701 batadv_forw_packet_free(forw_packet_aggr); 701 batadv_forw_packet_free(forw_packet_aggr, true);
702 return; 702 return;
703 } 703 }
704 704
@@ -717,17 +717,10 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
717 if (direct_link) 717 if (direct_link)
718 forw_packet_aggr->direct_link_flags |= 1; 718 forw_packet_aggr->direct_link_flags |= 1;
719 719
720 /* add new packet to packet list */
721 spin_lock_bh(&bat_priv->forw_bat_list_lock);
722 hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
723 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
724
725 /* start timer for this packet */
726 INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, 720 INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
727 batadv_iv_send_outstanding_bat_ogm_packet); 721 batadv_iv_send_outstanding_bat_ogm_packet);
728 queue_delayed_work(batadv_event_workqueue, 722
729 &forw_packet_aggr->delayed_work, 723 batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time);
730 send_time - jiffies);
731} 724}
732 725
733/* aggregate a new packet into the existing ogm packet */ 726/* aggregate a new packet into the existing ogm packet */
@@ -1272,7 +1265,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1272 */ 1265 */
1273 tq_iface_penalty = BATADV_TQ_MAX_VALUE; 1266 tq_iface_penalty = BATADV_TQ_MAX_VALUE;
1274 if (if_outgoing && (if_incoming == if_outgoing) && 1267 if (if_outgoing && (if_incoming == if_outgoing) &&
1275 batadv_is_wifi_netdev(if_outgoing->net_dev)) 1268 batadv_is_wifi_hardif(if_outgoing))
1276 tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, 1269 tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
1277 bat_priv); 1270 bat_priv);
1278 1271
@@ -1611,7 +1604,7 @@ out:
1611 if (hardif_neigh) 1604 if (hardif_neigh)
1612 batadv_hardif_neigh_put(hardif_neigh); 1605 batadv_hardif_neigh_put(hardif_neigh);
1613 1606
1614 kfree_skb(skb_priv); 1607 consume_skb(skb_priv);
1615} 1608}
1616 1609
1617/** 1610/**
@@ -1783,17 +1776,17 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
1783 struct delayed_work *delayed_work; 1776 struct delayed_work *delayed_work;
1784 struct batadv_forw_packet *forw_packet; 1777 struct batadv_forw_packet *forw_packet;
1785 struct batadv_priv *bat_priv; 1778 struct batadv_priv *bat_priv;
1779 bool dropped = false;
1786 1780
1787 delayed_work = to_delayed_work(work); 1781 delayed_work = to_delayed_work(work);
1788 forw_packet = container_of(delayed_work, struct batadv_forw_packet, 1782 forw_packet = container_of(delayed_work, struct batadv_forw_packet,
1789 delayed_work); 1783 delayed_work);
1790 bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); 1784 bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
1791 spin_lock_bh(&bat_priv->forw_bat_list_lock);
1792 hlist_del(&forw_packet->list);
1793 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
1794 1785
1795 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) 1786 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) {
1787 dropped = true;
1796 goto out; 1788 goto out;
1789 }
1797 1790
1798 batadv_iv_ogm_emit(forw_packet); 1791 batadv_iv_ogm_emit(forw_packet);
1799 1792
@@ -1810,7 +1803,10 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
1810 batadv_iv_ogm_schedule(forw_packet->if_incoming); 1803 batadv_iv_ogm_schedule(forw_packet->if_incoming);
1811 1804
1812out: 1805out:
1813 batadv_forw_packet_free(forw_packet); 1806 /* do we get something for free()? */
1807 if (batadv_forw_packet_steal(forw_packet,
1808 &bat_priv->forw_bat_list_lock))
1809 batadv_forw_packet_free(forw_packet, dropped);
1814} 1810}
1815 1811
1816static int batadv_iv_ogm_receive(struct sk_buff *skb, 1812static int batadv_iv_ogm_receive(struct sk_buff *skb,
@@ -1820,17 +1816,18 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
1820 struct batadv_ogm_packet *ogm_packet; 1816 struct batadv_ogm_packet *ogm_packet;
1821 u8 *packet_pos; 1817 u8 *packet_pos;
1822 int ogm_offset; 1818 int ogm_offset;
1823 bool ret; 1819 bool res;
1820 int ret = NET_RX_DROP;
1824 1821
1825 ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); 1822 res = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN);
1826 if (!ret) 1823 if (!res)
1827 return NET_RX_DROP; 1824 goto free_skb;
1828 1825
1829 /* did we receive a B.A.T.M.A.N. IV OGM packet on an interface 1826 /* did we receive a B.A.T.M.A.N. IV OGM packet on an interface
1830 * that does not have B.A.T.M.A.N. IV enabled ? 1827 * that does not have B.A.T.M.A.N. IV enabled ?
1831 */ 1828 */
1832 if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable) 1829 if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable)
1833 return NET_RX_DROP; 1830 goto free_skb;
1834 1831
1835 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); 1832 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
1836 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, 1833 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
@@ -1851,8 +1848,15 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
1851 ogm_packet = (struct batadv_ogm_packet *)packet_pos; 1848 ogm_packet = (struct batadv_ogm_packet *)packet_pos;
1852 } 1849 }
1853 1850
1854 kfree_skb(skb); 1851 ret = NET_RX_SUCCESS;
1855 return NET_RX_SUCCESS; 1852
1853free_skb:
1854 if (ret == NET_RX_SUCCESS)
1855 consume_skb(skb);
1856 else
1857 kfree_skb(skb);
1858
1859 return ret;
1856} 1860}
1857 1861
1858#ifdef CONFIG_BATMAN_ADV_DEBUGFS 1862#ifdef CONFIG_BATMAN_ADV_DEBUGFS
@@ -2486,7 +2490,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
2486 struct batadv_orig_node *orig_node; 2490 struct batadv_orig_node *orig_node;
2487 2491
2488 rcu_read_lock(); 2492 rcu_read_lock();
2489 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { 2493 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
2490 orig_node = gw_node->orig_node; 2494 orig_node = gw_node->orig_node;
2491 router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); 2495 router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
2492 if (!router) 2496 if (!router)
@@ -2674,7 +2678,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
2674 " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); 2678 " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n");
2675 2679
2676 rcu_read_lock(); 2680 rcu_read_lock();
2677 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { 2681 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
2678 /* fails if orig_node has no router */ 2682 /* fails if orig_node has no router */
2679 if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) 2683 if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
2680 continue; 2684 continue;
@@ -2774,7 +2778,7 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
2774 int idx = 0; 2778 int idx = 0;
2775 2779
2776 rcu_read_lock(); 2780 rcu_read_lock();
2777 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { 2781 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
2778 if (idx++ < idx_skip) 2782 if (idx++ < idx_skip)
2779 continue; 2783 continue;
2780 2784
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index e79f6f01182e..2ac612d7bab4 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -750,7 +750,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
750 u32 max_bw = 0, bw; 750 u32 max_bw = 0, bw;
751 751
752 rcu_read_lock(); 752 rcu_read_lock();
753 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { 753 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
754 if (!kref_get_unless_zero(&gw_node->refcount)) 754 if (!kref_get_unless_zero(&gw_node->refcount))
755 continue; 755 continue;
756 756
@@ -787,7 +787,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
787 struct batadv_orig_node *curr_gw_orig, 787 struct batadv_orig_node *curr_gw_orig,
788 struct batadv_orig_node *orig_node) 788 struct batadv_orig_node *orig_node)
789{ 789{
790 struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL; 790 struct batadv_gw_node *curr_gw, *orig_gw = NULL;
791 u32 gw_throughput, orig_throughput, threshold; 791 u32 gw_throughput, orig_throughput, threshold;
792 bool ret = false; 792 bool ret = false;
793 793
@@ -889,7 +889,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv,
889 " Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); 889 " Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n");
890 890
891 rcu_read_lock(); 891 rcu_read_lock();
892 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { 892 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
893 /* fails if orig_node has no router */ 893 /* fails if orig_node has no router */
894 if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) 894 if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
895 continue; 895 continue;
@@ -1009,7 +1009,7 @@ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
1009 int idx = 0; 1009 int idx = 0;
1010 1010
1011 rcu_read_lock(); 1011 rcu_read_lock();
1012 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { 1012 hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
1013 if (idx++ < idx_skip) 1013 if (idx++ < idx_skip)
1014 continue; 1014 continue;
1015 1015
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index ee08540ce503..f2fb2f05b6bf 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -75,6 +75,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
75{ 75{
76 struct batadv_hard_iface *hard_iface = neigh->if_incoming; 76 struct batadv_hard_iface *hard_iface = neigh->if_incoming;
77 struct ethtool_link_ksettings link_settings; 77 struct ethtool_link_ksettings link_settings;
78 struct net_device *real_netdev;
78 struct station_info sinfo; 79 struct station_info sinfo;
79 u32 throughput; 80 u32 throughput;
80 int ret; 81 int ret;
@@ -89,23 +90,27 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
89 /* if this is a wireless device, then ask its throughput through 90 /* if this is a wireless device, then ask its throughput through
90 * cfg80211 API 91 * cfg80211 API
91 */ 92 */
92 if (batadv_is_wifi_netdev(hard_iface->net_dev)) { 93 if (batadv_is_wifi_hardif(hard_iface)) {
93 if (hard_iface->net_dev->ieee80211_ptr) { 94 if (!batadv_is_cfg80211_hardif(hard_iface))
94 ret = cfg80211_get_station(hard_iface->net_dev, 95 /* unsupported WiFi driver version */
95 neigh->addr, &sinfo); 96 goto default_throughput;
96 if (ret == -ENOENT) { 97
97 /* Node is not associated anymore! It would be 98 real_netdev = batadv_get_real_netdev(hard_iface->net_dev);
98 * possible to delete this neighbor. For now set 99 if (!real_netdev)
99 * the throughput metric to 0. 100 goto default_throughput;
100 */ 101
101 return 0; 102 ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
102 } 103
103 if (!ret) 104 dev_put(real_netdev);
104 return sinfo.expected_throughput / 100; 105 if (ret == -ENOENT) {
106 /* Node is not associated anymore! It would be
107 * possible to delete this neighbor. For now set
108 * the throughput metric to 0.
109 */
110 return 0;
105 } 111 }
106 112 if (!ret)
107 /* unsupported WiFi driver version */ 113 return sinfo.expected_throughput / 100;
108 goto default_throughput;
109 } 114 }
110 115
111 /* if not a wifi interface, check if this device provides data via 116 /* if not a wifi interface, check if this device provides data via
@@ -187,7 +192,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
187 int elp_skb_len; 192 int elp_skb_len;
188 193
189 /* this probing routine is for Wifi neighbours only */ 194 /* this probing routine is for Wifi neighbours only */
190 if (!batadv_is_wifi_netdev(hard_iface->net_dev)) 195 if (!batadv_is_wifi_hardif(hard_iface))
191 return true; 196 return true;
192 197
193 /* probe the neighbor only if no unicast packets have been sent 198 /* probe the neighbor only if no unicast packets have been sent
@@ -352,7 +357,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
352 /* warn the user (again) if there is no throughput data is available */ 357 /* warn the user (again) if there is no throughput data is available */
353 hard_iface->bat_v.flags &= ~BATADV_WARNING_DEFAULT; 358 hard_iface->bat_v.flags &= ~BATADV_WARNING_DEFAULT;
354 359
355 if (batadv_is_wifi_netdev(hard_iface->net_dev)) 360 if (batadv_is_wifi_hardif(hard_iface))
356 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; 361 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
357 362
358 INIT_DELAYED_WORK(&hard_iface->bat_v.elp_wq, 363 INIT_DELAYED_WORK(&hard_iface->bat_v.elp_wq,
@@ -492,20 +497,21 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
492 struct batadv_elp_packet *elp_packet; 497 struct batadv_elp_packet *elp_packet;
493 struct batadv_hard_iface *primary_if; 498 struct batadv_hard_iface *primary_if;
494 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); 499 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
495 bool ret; 500 bool res;
501 int ret = NET_RX_DROP;
496 502
497 ret = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN); 503 res = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN);
498 if (!ret) 504 if (!res)
499 return NET_RX_DROP; 505 goto free_skb;
500 506
501 if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) 507 if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
502 return NET_RX_DROP; 508 goto free_skb;
503 509
504 /* did we receive a B.A.T.M.A.N. V ELP packet on an interface 510 /* did we receive a B.A.T.M.A.N. V ELP packet on an interface
505 * that does not have B.A.T.M.A.N. V ELP enabled ? 511 * that does not have B.A.T.M.A.N. V ELP enabled ?
506 */ 512 */
507 if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) 513 if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
508 return NET_RX_DROP; 514 goto free_skb;
509 515
510 elp_packet = (struct batadv_elp_packet *)skb->data; 516 elp_packet = (struct batadv_elp_packet *)skb->data;
511 517
@@ -516,14 +522,19 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
516 522
517 primary_if = batadv_primary_if_get_selected(bat_priv); 523 primary_if = batadv_primary_if_get_selected(bat_priv);
518 if (!primary_if) 524 if (!primary_if)
519 goto out; 525 goto free_skb;
520 526
521 batadv_v_elp_neigh_update(bat_priv, ethhdr->h_source, if_incoming, 527 batadv_v_elp_neigh_update(bat_priv, ethhdr->h_source, if_incoming,
522 elp_packet); 528 elp_packet);
523 529
524out: 530 ret = NET_RX_SUCCESS;
525 if (primary_if) 531 batadv_hardif_put(primary_if);
526 batadv_hardif_put(primary_if); 532
527 consume_skb(skb); 533free_skb:
528 return NET_RX_SUCCESS; 534 if (ret == NET_RX_SUCCESS)
535 consume_skb(skb);
536 else
537 kfree_skb(skb);
538
539 return ret;
529} 540}
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1aeeadca620c..38b9aab83fc0 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -140,6 +140,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
140 unsigned char *ogm_buff, *pkt_buff; 140 unsigned char *ogm_buff, *pkt_buff;
141 int ogm_buff_len; 141 int ogm_buff_len;
142 u16 tvlv_len = 0; 142 u16 tvlv_len = 0;
143 int ret;
143 144
144 bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); 145 bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
145 bat_priv = container_of(bat_v, struct batadv_priv, bat_v); 146 bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
@@ -182,6 +183,31 @@ static void batadv_v_ogm_send(struct work_struct *work)
182 if (!kref_get_unless_zero(&hard_iface->refcount)) 183 if (!kref_get_unless_zero(&hard_iface->refcount))
183 continue; 184 continue;
184 185
186 ret = batadv_hardif_no_broadcast(hard_iface, NULL, NULL);
187 if (ret) {
188 char *type;
189
190 switch (ret) {
191 case BATADV_HARDIF_BCAST_NORECIPIENT:
192 type = "no neighbor";
193 break;
194 case BATADV_HARDIF_BCAST_DUPFWD:
195 type = "single neighbor is source";
196 break;
197 case BATADV_HARDIF_BCAST_DUPORIG:
198 type = "single neighbor is originator";
199 break;
200 default:
201 type = "unknown";
202 }
203
204 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n",
205 hard_iface->net_dev->name, type);
206
207 batadv_hardif_put(hard_iface);
208 continue;
209 }
210
185 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 211 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
186 "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n", 212 "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
187 ogm_packet->orig, ntohl(ogm_packet->seqno), 213 ogm_packet->orig, ntohl(ogm_packet->seqno),
@@ -401,7 +427,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
401 struct batadv_hard_iface *if_incoming, 427 struct batadv_hard_iface *if_incoming,
402 struct batadv_hard_iface *if_outgoing) 428 struct batadv_hard_iface *if_outgoing)
403{ 429{
404 struct batadv_orig_ifinfo *orig_ifinfo = NULL; 430 struct batadv_orig_ifinfo *orig_ifinfo;
405 struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; 431 struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
406 bool protection_started = false; 432 bool protection_started = false;
407 int ret = -EINVAL; 433 int ret = -EINVAL;
@@ -486,7 +512,7 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
486 struct batadv_hard_iface *if_outgoing) 512 struct batadv_hard_iface *if_outgoing)
487{ 513{
488 struct batadv_neigh_node *router = NULL; 514 struct batadv_neigh_node *router = NULL;
489 struct batadv_orig_node *orig_neigh_node = NULL; 515 struct batadv_orig_node *orig_neigh_node;
490 struct batadv_neigh_node *orig_neigh_router = NULL; 516 struct batadv_neigh_node *orig_neigh_router = NULL;
491 struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL; 517 struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
492 u32 router_throughput, neigh_throughput; 518 u32 router_throughput, neigh_throughput;
@@ -651,6 +677,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
651 struct batadv_hard_iface *hard_iface; 677 struct batadv_hard_iface *hard_iface;
652 struct batadv_ogm2_packet *ogm_packet; 678 struct batadv_ogm2_packet *ogm_packet;
653 u32 ogm_throughput, link_throughput, path_throughput; 679 u32 ogm_throughput, link_throughput, path_throughput;
680 int ret;
654 681
655 ethhdr = eth_hdr(skb); 682 ethhdr = eth_hdr(skb);
656 ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset); 683 ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset);
@@ -716,6 +743,35 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
716 if (!kref_get_unless_zero(&hard_iface->refcount)) 743 if (!kref_get_unless_zero(&hard_iface->refcount))
717 continue; 744 continue;
718 745
746 ret = batadv_hardif_no_broadcast(hard_iface,
747 ogm_packet->orig,
748 hardif_neigh->orig);
749
750 if (ret) {
751 char *type;
752
753 switch (ret) {
754 case BATADV_HARDIF_BCAST_NORECIPIENT:
755 type = "no neighbor";
756 break;
757 case BATADV_HARDIF_BCAST_DUPFWD:
758 type = "single neighbor is source";
759 break;
760 case BATADV_HARDIF_BCAST_DUPORIG:
761 type = "single neighbor is originator";
762 break;
763 default:
764 type = "unknown";
765 }
766
767 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n",
768 ogm_packet->orig, hard_iface->net_dev->name,
769 type);
770
771 batadv_hardif_put(hard_iface);
772 continue;
773 }
774
719 batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, 775 batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
720 orig_node, neigh_node, 776 orig_node, neigh_node,
721 if_incoming, hard_iface); 777 if_incoming, hard_iface);
@@ -754,18 +810,18 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
754 * B.A.T.M.A.N. V enabled ? 810 * B.A.T.M.A.N. V enabled ?
755 */ 811 */
756 if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) 812 if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
757 return NET_RX_DROP; 813 goto free_skb;
758 814
759 if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) 815 if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
760 return NET_RX_DROP; 816 goto free_skb;
761 817
762 if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) 818 if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
763 return NET_RX_DROP; 819 goto free_skb;
764 820
765 ogm_packet = (struct batadv_ogm2_packet *)skb->data; 821 ogm_packet = (struct batadv_ogm2_packet *)skb->data;
766 822
767 if (batadv_is_my_mac(bat_priv, ogm_packet->orig)) 823 if (batadv_is_my_mac(bat_priv, ogm_packet->orig))
768 return NET_RX_DROP; 824 goto free_skb;
769 825
770 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); 826 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
771 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, 827 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
@@ -786,7 +842,12 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
786 } 842 }
787 843
788 ret = NET_RX_SUCCESS; 844 ret = NET_RX_SUCCESS;
789 consume_skb(skb); 845
846free_skb:
847 if (ret == NET_RX_SUCCESS)
848 consume_skb(skb);
849 else
850 kfree_skb(skb);
790 851
791 return ret; 852 return ret;
792} 853}
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index b4ffba7dd583..77925504379d 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -186,7 +186,7 @@ struct batadv_debuginfo batadv_debuginfo_##_name = { \
186/* the following attributes are general and therefore they will be directly 186/* the following attributes are general and therefore they will be directly
187 * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs 187 * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs
188 */ 188 */
189static BATADV_DEBUGINFO(routing_algos, S_IRUGO, batadv_algorithms_open); 189static BATADV_DEBUGINFO(routing_algos, 0444, batadv_algorithms_open);
190 190
191static struct batadv_debuginfo *batadv_general_debuginfos[] = { 191static struct batadv_debuginfo *batadv_general_debuginfos[] = {
192 &batadv_debuginfo_routing_algos, 192 &batadv_debuginfo_routing_algos,
@@ -194,26 +194,24 @@ static struct batadv_debuginfo *batadv_general_debuginfos[] = {
194}; 194};
195 195
196/* The following attributes are per soft interface */ 196/* The following attributes are per soft interface */
197static BATADV_DEBUGINFO(neighbors, S_IRUGO, neighbors_open); 197static BATADV_DEBUGINFO(neighbors, 0444, neighbors_open);
198static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open); 198static BATADV_DEBUGINFO(originators, 0444, batadv_originators_open);
199static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open); 199static BATADV_DEBUGINFO(gateways, 0444, batadv_gateways_open);
200static BATADV_DEBUGINFO(transtable_global, S_IRUGO, 200static BATADV_DEBUGINFO(transtable_global, 0444, batadv_transtable_global_open);
201 batadv_transtable_global_open);
202#ifdef CONFIG_BATMAN_ADV_BLA 201#ifdef CONFIG_BATMAN_ADV_BLA
203static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open); 202static BATADV_DEBUGINFO(bla_claim_table, 0444, batadv_bla_claim_table_open);
204static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO, 203static BATADV_DEBUGINFO(bla_backbone_table, 0444,
205 batadv_bla_backbone_table_open); 204 batadv_bla_backbone_table_open);
206#endif 205#endif
207#ifdef CONFIG_BATMAN_ADV_DAT 206#ifdef CONFIG_BATMAN_ADV_DAT
208static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open); 207static BATADV_DEBUGINFO(dat_cache, 0444, batadv_dat_cache_open);
209#endif 208#endif
210static BATADV_DEBUGINFO(transtable_local, S_IRUGO, 209static BATADV_DEBUGINFO(transtable_local, 0444, batadv_transtable_local_open);
211 batadv_transtable_local_open);
212#ifdef CONFIG_BATMAN_ADV_NC 210#ifdef CONFIG_BATMAN_ADV_NC
213static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); 211static BATADV_DEBUGINFO(nc_nodes, 0444, batadv_nc_nodes_open);
214#endif 212#endif
215#ifdef CONFIG_BATMAN_ADV_MCAST 213#ifdef CONFIG_BATMAN_ADV_MCAST
216static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open); 214static BATADV_DEBUGINFO(mcast_flags, 0444, batadv_mcast_flags_open);
217#endif 215#endif
218 216
219static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { 217static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
@@ -253,7 +251,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \
253 }, \ 251 }, \
254} 252}
255 253
256static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, 254static BATADV_HARDIF_DEBUGINFO(originators, 0444,
257 batadv_originators_hardif_open); 255 batadv_originators_hardif_open);
258 256
259static struct batadv_debuginfo *batadv_hardif_debuginfos[] = { 257static struct batadv_debuginfo *batadv_hardif_debuginfos[] = {
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index e257efdc5d03..49576c5a3fe3 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -369,12 +369,11 @@ out:
369 * batadv_dbg_arp - print a debug message containing all the ARP packet details 369 * batadv_dbg_arp - print a debug message containing all the ARP packet details
370 * @bat_priv: the bat priv with all the soft interface information 370 * @bat_priv: the bat priv with all the soft interface information
371 * @skb: ARP packet 371 * @skb: ARP packet
372 * @type: ARP type
373 * @hdr_size: size of the possible header before the ARP packet 372 * @hdr_size: size of the possible header before the ARP packet
374 * @msg: message to print together with the debugging information 373 * @msg: message to print together with the debugging information
375 */ 374 */
376static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, 375static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
377 u16 type, int hdr_size, char *msg) 376 int hdr_size, char *msg)
378{ 377{
379 struct batadv_unicast_4addr_packet *unicast_4addr_packet; 378 struct batadv_unicast_4addr_packet *unicast_4addr_packet;
380 struct batadv_bcast_packet *bcast_pkt; 379 struct batadv_bcast_packet *bcast_pkt;
@@ -441,7 +440,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
441#else 440#else
442 441
443static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, 442static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
444 u16 type, int hdr_size, char *msg) 443 int hdr_size, char *msg)
445{ 444{
446} 445}
447 446
@@ -950,6 +949,41 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
950} 949}
951 950
952/** 951/**
952 * batadv_dat_arp_create_reply - create an ARP Reply
953 * @bat_priv: the bat priv with all the soft interface information
954 * @ip_src: ARP sender IP
955 * @ip_dst: ARP target IP
956 * @hw_src: Ethernet source and ARP sender MAC
957 * @hw_dst: Ethernet destination and ARP target MAC
958 * @vid: VLAN identifier (optional, set to zero otherwise)
959 *
960 * Creates an ARP Reply from the given values, optionally encapsulated in a
961 * VLAN header.
962 *
963 * Return: An skb containing an ARP Reply.
964 */
965static struct sk_buff *
966batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src,
967 __be32 ip_dst, u8 *hw_src, u8 *hw_dst,
968 unsigned short vid)
969{
970 struct sk_buff *skb;
971
972 skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->soft_iface,
973 ip_src, hw_dst, hw_src, hw_dst);
974 if (!skb)
975 return NULL;
976
977 skb_reset_mac_header(skb);
978
979 if (vid & BATADV_VLAN_HAS_TAG)
980 skb = vlan_insert_tag(skb, htons(ETH_P_8021Q),
981 vid & VLAN_VID_MASK);
982
983 return skb;
984}
985
986/**
953 * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to 987 * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to
954 * answer using DAT 988 * answer using DAT
955 * @bat_priv: the bat priv with all the soft interface information 989 * @bat_priv: the bat priv with all the soft interface information
@@ -983,8 +1017,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
983 if (type != ARPOP_REQUEST) 1017 if (type != ARPOP_REQUEST)
984 goto out; 1018 goto out;
985 1019
986 batadv_dbg_arp(bat_priv, skb, type, hdr_size, 1020 batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing outgoing ARP REQUEST");
987 "Parsing outgoing ARP REQUEST");
988 1021
989 ip_src = batadv_arp_ip_src(skb, hdr_size); 1022 ip_src = batadv_arp_ip_src(skb, hdr_size);
990 hw_src = batadv_arp_hw_src(skb, hdr_size); 1023 hw_src = batadv_arp_hw_src(skb, hdr_size);
@@ -1007,20 +1040,12 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
1007 goto out; 1040 goto out;
1008 } 1041 }
1009 1042
1010 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, 1043 skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
1011 bat_priv->soft_iface, ip_dst, hw_src, 1044 dat_entry->mac_addr,
1012 dat_entry->mac_addr, hw_src); 1045 hw_src, vid);
1013 if (!skb_new) 1046 if (!skb_new)
1014 goto out; 1047 goto out;
1015 1048
1016 if (vid & BATADV_VLAN_HAS_TAG) {
1017 skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
1018 vid & VLAN_VID_MASK);
1019 if (!skb_new)
1020 goto out;
1021 }
1022
1023 skb_reset_mac_header(skb_new);
1024 skb_new->protocol = eth_type_trans(skb_new, 1049 skb_new->protocol = eth_type_trans(skb_new,
1025 bat_priv->soft_iface); 1050 bat_priv->soft_iface);
1026 bat_priv->stats.rx_packets++; 1051 bat_priv->stats.rx_packets++;
@@ -1075,8 +1100,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
1075 ip_src = batadv_arp_ip_src(skb, hdr_size); 1100 ip_src = batadv_arp_ip_src(skb, hdr_size);
1076 ip_dst = batadv_arp_ip_dst(skb, hdr_size); 1101 ip_dst = batadv_arp_ip_dst(skb, hdr_size);
1077 1102
1078 batadv_dbg_arp(bat_priv, skb, type, hdr_size, 1103 batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing incoming ARP REQUEST");
1079 "Parsing incoming ARP REQUEST");
1080 1104
1081 batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); 1105 batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
1082 1106
@@ -1084,25 +1108,11 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
1084 if (!dat_entry) 1108 if (!dat_entry)
1085 goto out; 1109 goto out;
1086 1110
1087 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, 1111 skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
1088 bat_priv->soft_iface, ip_dst, hw_src, 1112 dat_entry->mac_addr, hw_src, vid);
1089 dat_entry->mac_addr, hw_src);
1090
1091 if (!skb_new) 1113 if (!skb_new)
1092 goto out; 1114 goto out;
1093 1115
1094 /* the rest of the TX path assumes that the mac_header offset pointing
1095 * to the inner Ethernet header has been set, therefore reset it now.
1096 */
1097 skb_reset_mac_header(skb_new);
1098
1099 if (vid & BATADV_VLAN_HAS_TAG) {
1100 skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
1101 vid & VLAN_VID_MASK);
1102 if (!skb_new)
1103 goto out;
1104 }
1105
1106 /* To preserve backwards compatibility, the node has choose the outgoing 1116 /* To preserve backwards compatibility, the node has choose the outgoing
1107 * format based on the incoming request packet type. The assumption is 1117 * format based on the incoming request packet type. The assumption is
1108 * that a node not using the 4addr packet format doesn't support it. 1118 * that a node not using the 4addr packet format doesn't support it.
@@ -1149,8 +1159,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
1149 if (type != ARPOP_REPLY) 1159 if (type != ARPOP_REPLY)
1150 return; 1160 return;
1151 1161
1152 batadv_dbg_arp(bat_priv, skb, type, hdr_size, 1162 batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing outgoing ARP REPLY");
1153 "Parsing outgoing ARP REPLY");
1154 1163
1155 hw_src = batadv_arp_hw_src(skb, hdr_size); 1164 hw_src = batadv_arp_hw_src(skb, hdr_size);
1156 ip_src = batadv_arp_ip_src(skb, hdr_size); 1165 ip_src = batadv_arp_ip_src(skb, hdr_size);
@@ -1195,8 +1204,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
1195 if (type != ARPOP_REPLY) 1204 if (type != ARPOP_REPLY)
1196 goto out; 1205 goto out;
1197 1206
1198 batadv_dbg_arp(bat_priv, skb, type, hdr_size, 1207 batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing incoming ARP REPLY");
1199 "Parsing incoming ARP REPLY");
1200 1208
1201 hw_src = batadv_arp_hw_src(skb, hdr_size); 1209 hw_src = batadv_arp_hw_src(skb, hdr_size);
1202 ip_src = batadv_arp_ip_src(skb, hdr_size); 1210 ip_src = batadv_arp_ip_src(skb, hdr_size);
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0934730fb7ff..9c561e683f4b 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -20,6 +20,7 @@
20 20
21#include <linux/atomic.h> 21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h> 22#include <linux/byteorder/generic.h>
23#include <linux/errno.h>
23#include <linux/etherdevice.h> 24#include <linux/etherdevice.h>
24#include <linux/fs.h> 25#include <linux/fs.h>
25#include <linux/if_ether.h> 26#include <linux/if_ether.h>
@@ -42,17 +43,23 @@
42/** 43/**
43 * batadv_frag_clear_chain - delete entries in the fragment buffer chain 44 * batadv_frag_clear_chain - delete entries in the fragment buffer chain
44 * @head: head of chain with entries. 45 * @head: head of chain with entries.
46 * @dropped: whether the chain is cleared because all fragments are dropped
45 * 47 *
46 * Free fragments in the passed hlist. Should be called with appropriate lock. 48 * Free fragments in the passed hlist. Should be called with appropriate lock.
47 */ 49 */
48static void batadv_frag_clear_chain(struct hlist_head *head) 50static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped)
49{ 51{
50 struct batadv_frag_list_entry *entry; 52 struct batadv_frag_list_entry *entry;
51 struct hlist_node *node; 53 struct hlist_node *node;
52 54
53 hlist_for_each_entry_safe(entry, node, head, list) { 55 hlist_for_each_entry_safe(entry, node, head, list) {
54 hlist_del(&entry->list); 56 hlist_del(&entry->list);
55 kfree_skb(entry->skb); 57
58 if (dropped)
59 kfree_skb(entry->skb);
60 else
61 consume_skb(entry->skb);
62
56 kfree(entry); 63 kfree(entry);
57 } 64 }
58} 65}
@@ -73,7 +80,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
73 spin_lock_bh(&chain->lock); 80 spin_lock_bh(&chain->lock);
74 81
75 if (!check_cb || check_cb(chain)) { 82 if (!check_cb || check_cb(chain)) {
76 batadv_frag_clear_chain(&chain->head); 83 batadv_frag_clear_chain(&chain->fragment_list, true);
77 chain->size = 0; 84 chain->size = 0;
78 } 85 }
79 86
@@ -117,8 +124,8 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
117 if (chain->seqno == seqno) 124 if (chain->seqno == seqno)
118 return false; 125 return false;
119 126
120 if (!hlist_empty(&chain->head)) 127 if (!hlist_empty(&chain->fragment_list))
121 batadv_frag_clear_chain(&chain->head); 128 batadv_frag_clear_chain(&chain->fragment_list, true);
122 129
123 chain->size = 0; 130 chain->size = 0;
124 chain->seqno = seqno; 131 chain->seqno = seqno;
@@ -176,7 +183,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
176 chain = &orig_node->fragments[bucket]; 183 chain = &orig_node->fragments[bucket];
177 spin_lock_bh(&chain->lock); 184 spin_lock_bh(&chain->lock);
178 if (batadv_frag_init_chain(chain, seqno)) { 185 if (batadv_frag_init_chain(chain, seqno)) {
179 hlist_add_head(&frag_entry_new->list, &chain->head); 186 hlist_add_head(&frag_entry_new->list, &chain->fragment_list);
180 chain->size = skb->len - hdr_size; 187 chain->size = skb->len - hdr_size;
181 chain->timestamp = jiffies; 188 chain->timestamp = jiffies;
182 chain->total_size = ntohs(frag_packet->total_size); 189 chain->total_size = ntohs(frag_packet->total_size);
@@ -185,7 +192,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
185 } 192 }
186 193
187 /* Find the position for the new fragment. */ 194 /* Find the position for the new fragment. */
188 hlist_for_each_entry(frag_entry_curr, &chain->head, list) { 195 hlist_for_each_entry(frag_entry_curr, &chain->fragment_list, list) {
189 /* Drop packet if fragment already exists. */ 196 /* Drop packet if fragment already exists. */
190 if (frag_entry_curr->no == frag_entry_new->no) 197 if (frag_entry_curr->no == frag_entry_new->no)
191 goto err_unlock; 198 goto err_unlock;
@@ -220,11 +227,11 @@ out:
220 * exceeds the maximum size of one merged packet. Don't allow 227 * exceeds the maximum size of one merged packet. Don't allow
221 * packets to have different total_size. 228 * packets to have different total_size.
222 */ 229 */
223 batadv_frag_clear_chain(&chain->head); 230 batadv_frag_clear_chain(&chain->fragment_list, true);
224 chain->size = 0; 231 chain->size = 0;
225 } else if (ntohs(frag_packet->total_size) == chain->size) { 232 } else if (ntohs(frag_packet->total_size) == chain->size) {
226 /* All fragments received. Hand over chain to caller. */ 233 /* All fragments received. Hand over chain to caller. */
227 hlist_move_list(&chain->head, chain_out); 234 hlist_move_list(&chain->fragment_list, chain_out);
228 chain->size = 0; 235 chain->size = 0;
229 } 236 }
230 237
@@ -252,8 +259,9 @@ batadv_frag_merge_packets(struct hlist_head *chain)
252{ 259{
253 struct batadv_frag_packet *packet; 260 struct batadv_frag_packet *packet;
254 struct batadv_frag_list_entry *entry; 261 struct batadv_frag_list_entry *entry;
255 struct sk_buff *skb_out = NULL; 262 struct sk_buff *skb_out;
256 int size, hdr_size = sizeof(struct batadv_frag_packet); 263 int size, hdr_size = sizeof(struct batadv_frag_packet);
264 bool dropped = false;
257 265
258 /* Remove first entry, as this is the destination for the rest of the 266 /* Remove first entry, as this is the destination for the rest of the
259 * fragments. 267 * fragments.
@@ -270,6 +278,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
270 if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { 278 if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
271 kfree_skb(skb_out); 279 kfree_skb(skb_out);
272 skb_out = NULL; 280 skb_out = NULL;
281 dropped = true;
273 goto free; 282 goto free;
274 } 283 }
275 284
@@ -291,7 +300,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
291 300
292free: 301free:
293 /* Locking is not needed, because 'chain' is not part of any orig. */ 302 /* Locking is not needed, because 'chain' is not part of any orig. */
294 batadv_frag_clear_chain(chain); 303 batadv_frag_clear_chain(chain, dropped);
295 return skb_out; 304 return skb_out;
296} 305}
297 306
@@ -352,7 +361,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
352 struct batadv_orig_node *orig_node_src) 361 struct batadv_orig_node *orig_node_src)
353{ 362{
354 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); 363 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
355 struct batadv_orig_node *orig_node_dst = NULL; 364 struct batadv_orig_node *orig_node_dst;
356 struct batadv_neigh_node *neigh_node = NULL; 365 struct batadv_neigh_node *neigh_node = NULL;
357 struct batadv_frag_packet *packet; 366 struct batadv_frag_packet *packet;
358 u16 total_size; 367 u16 total_size;
@@ -433,8 +442,7 @@ err:
433 * @orig_node: final destination of the created fragments 442 * @orig_node: final destination of the created fragments
434 * @neigh_node: next-hop of the created fragments 443 * @neigh_node: next-hop of the created fragments
435 * 444 *
436 * Return: the netdev tx status or -1 in case of error. 445 * Return: the netdev tx status or a negative errno code on a failure
437 * When -1 is returned the skb is not consumed.
438 */ 446 */
439int batadv_frag_send_packet(struct sk_buff *skb, 447int batadv_frag_send_packet(struct sk_buff *skb,
440 struct batadv_orig_node *orig_node, 448 struct batadv_orig_node *orig_node,
@@ -447,7 +455,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
447 unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; 455 unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
448 unsigned int header_size = sizeof(frag_header); 456 unsigned int header_size = sizeof(frag_header);
449 unsigned int max_fragment_size, max_packet_size; 457 unsigned int max_fragment_size, max_packet_size;
450 int ret = -1; 458 int ret;
451 459
452 /* To avoid merge and refragmentation at next-hops we never send 460 /* To avoid merge and refragmentation at next-hops we never send
453 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE 461 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -457,13 +465,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
457 max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; 465 max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
458 466
459 /* Don't even try to fragment, if we need more than 16 fragments */ 467 /* Don't even try to fragment, if we need more than 16 fragments */
460 if (skb->len > max_packet_size) 468 if (skb->len > max_packet_size) {
461 goto out; 469 ret = -EAGAIN;
470 goto free_skb;
471 }
462 472
463 bat_priv = orig_node->bat_priv; 473 bat_priv = orig_node->bat_priv;
464 primary_if = batadv_primary_if_get_selected(bat_priv); 474 primary_if = batadv_primary_if_get_selected(bat_priv);
465 if (!primary_if) 475 if (!primary_if) {
466 goto out; 476 ret = -EINVAL;
477 goto put_primary_if;
478 }
467 479
468 /* Create one header to be copied to all fragments */ 480 /* Create one header to be copied to all fragments */
469 frag_header.packet_type = BATADV_UNICAST_FRAG; 481 frag_header.packet_type = BATADV_UNICAST_FRAG;
@@ -488,34 +500,35 @@ int batadv_frag_send_packet(struct sk_buff *skb,
488 /* Eat and send fragments from the tail of skb */ 500 /* Eat and send fragments from the tail of skb */
489 while (skb->len > max_fragment_size) { 501 while (skb->len > max_fragment_size) {
490 skb_fragment = batadv_frag_create(skb, &frag_header, mtu); 502 skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
491 if (!skb_fragment) 503 if (!skb_fragment) {
492 goto out; 504 ret = -ENOMEM;
505 goto free_skb;
506 }
493 507
494 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); 508 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
495 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, 509 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
496 skb_fragment->len + ETH_HLEN); 510 skb_fragment->len + ETH_HLEN);
497 ret = batadv_send_unicast_skb(skb_fragment, neigh_node); 511 ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
498 if (ret != NET_XMIT_SUCCESS) { 512 if (ret != NET_XMIT_SUCCESS) {
499 /* return -1 so that the caller can free the original 513 ret = NET_XMIT_DROP;
500 * skb 514 goto free_skb;
501 */
502 ret = -1;
503 goto out;
504 } 515 }
505 516
506 frag_header.no++; 517 frag_header.no++;
507 518
508 /* The initial check in this function should cover this case */ 519 /* The initial check in this function should cover this case */
509 if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) { 520 if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
510 ret = -1; 521 ret = -EINVAL;
511 goto out; 522 goto free_skb;
512 } 523 }
513 } 524 }
514 525
515 /* Make room for the fragment header. */ 526 /* Make room for the fragment header. */
516 if (batadv_skb_head_push(skb, header_size) < 0 || 527 if (batadv_skb_head_push(skb, header_size) < 0 ||
517 pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) 528 pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) {
518 goto out; 529 ret = -ENOMEM;
530 goto free_skb;
531 }
519 532
520 memcpy(skb->data, &frag_header, header_size); 533 memcpy(skb->data, &frag_header, header_size);
521 534
@@ -524,10 +537,13 @@ int batadv_frag_send_packet(struct sk_buff *skb,
524 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, 537 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
525 skb->len + ETH_HLEN); 538 skb->len + ETH_HLEN);
526 ret = batadv_send_unicast_skb(skb, neigh_node); 539 ret = batadv_send_unicast_skb(skb, neigh_node);
540 /* skb was consumed */
541 skb = NULL;
527 542
528out: 543put_primary_if:
529 if (primary_if) 544 batadv_hardif_put(primary_if);
530 batadv_hardif_put(primary_if); 545free_skb:
546 kfree_skb(skb);
531 547
532 return ret; 548 return ret;
533} 549}
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 3202fe329e63..b95f619606af 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -47,7 +47,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
47static inline bool 47static inline bool
48batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) 48batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
49{ 49{
50 if (!hlist_empty(&frags_entry->head) && 50 if (!hlist_empty(&frags_entry->fragment_list) &&
51 batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT)) 51 batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT))
52 return true; 52 return true;
53 return false; 53 return false;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de055d64debe..52b8bd6ec431 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -348,7 +348,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
348 348
349 spin_lock_bh(&bat_priv->gw.list_lock); 349 spin_lock_bh(&bat_priv->gw.list_lock);
350 kref_get(&gw_node->refcount); 350 kref_get(&gw_node->refcount);
351 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); 351 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list);
352 spin_unlock_bh(&bat_priv->gw.list_lock); 352 spin_unlock_bh(&bat_priv->gw.list_lock);
353 353
354 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 354 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -376,7 +376,8 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
376 struct batadv_gw_node *gw_node_tmp, *gw_node = NULL; 376 struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
377 377
378 rcu_read_lock(); 378 rcu_read_lock();
379 hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.list, list) { 379 hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.gateway_list,
380 list) {
380 if (gw_node_tmp->orig_node != orig_node) 381 if (gw_node_tmp->orig_node != orig_node)
381 continue; 382 continue;
382 383
@@ -475,7 +476,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
475 476
476 spin_lock_bh(&bat_priv->gw.list_lock); 477 spin_lock_bh(&bat_priv->gw.list_lock);
477 hlist_for_each_entry_safe(gw_node, node_tmp, 478 hlist_for_each_entry_safe(gw_node, node_tmp,
478 &bat_priv->gw.list, list) { 479 &bat_priv->gw.gateway_list, list) {
479 hlist_del_init_rcu(&gw_node->list); 480 hlist_del_init_rcu(&gw_node->list);
480 batadv_gw_node_put(gw_node); 481 batadv_gw_node_put(gw_node);
481 } 482 }
@@ -704,7 +705,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
704{ 705{
705 struct batadv_neigh_node *neigh_curr = NULL; 706 struct batadv_neigh_node *neigh_curr = NULL;
706 struct batadv_neigh_node *neigh_old = NULL; 707 struct batadv_neigh_node *neigh_old = NULL;
707 struct batadv_orig_node *orig_dst_node = NULL; 708 struct batadv_orig_node *orig_dst_node;
708 struct batadv_gw_node *gw_node = NULL; 709 struct batadv_gw_node *gw_node = NULL;
709 struct batadv_gw_node *curr_gw = NULL; 710 struct batadv_gw_node *curr_gw = NULL;
710 struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo; 711 struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 08ce36147c4c..61a431a9772b 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -92,8 +92,8 @@ out:
92 * 92 *
93 * Return: result of rtnl_link_ops->get_link_net or @fallback_net 93 * Return: result of rtnl_link_ops->get_link_net or @fallback_net
94 */ 94 */
95static const struct net *batadv_getlink_net(const struct net_device *netdev, 95static struct net *batadv_getlink_net(const struct net_device *netdev,
96 const struct net *fallback_net) 96 struct net *fallback_net)
97{ 97{
98 if (!netdev->rtnl_link_ops) 98 if (!netdev->rtnl_link_ops)
99 return fallback_net; 99 return fallback_net;
@@ -116,9 +116,9 @@ static const struct net *batadv_getlink_net(const struct net_device *netdev,
116 * Return: true if the devices are each others parent, otherwise false 116 * Return: true if the devices are each others parent, otherwise false
117 */ 117 */
118static bool batadv_mutual_parents(const struct net_device *dev1, 118static bool batadv_mutual_parents(const struct net_device *dev1,
119 const struct net *net1, 119 struct net *net1,
120 const struct net_device *dev2, 120 const struct net_device *dev2,
121 const struct net *net2) 121 struct net *net2)
122{ 122{
123 int dev1_parent_iflink = dev_get_iflink(dev1); 123 int dev1_parent_iflink = dev_get_iflink(dev1);
124 int dev2_parent_iflink = dev_get_iflink(dev2); 124 int dev2_parent_iflink = dev_get_iflink(dev2);
@@ -154,7 +154,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
154{ 154{
155 struct net *net = dev_net(net_dev); 155 struct net *net = dev_net(net_dev);
156 struct net_device *parent_dev; 156 struct net_device *parent_dev;
157 const struct net *parent_net; 157 struct net *parent_net;
158 bool ret; 158 bool ret;
159 159
160 /* check if this is a batman-adv mesh interface */ 160 /* check if this is a batman-adv mesh interface */
@@ -202,13 +202,77 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
202} 202}
203 203
204/** 204/**
205 * batadv_is_wifi_netdev - check if the given net_device struct is a wifi 205 * batadv_get_real_netdevice - check if the given netdev struct is a virtual
206 * interface 206 * interface on top of another 'real' interface
207 * @netdev: the device to check
208 *
209 * Callers must hold the rtnl semaphore. You may want batadv_get_real_netdev()
210 * instead of this.
211 *
212 * Return: the 'real' net device or the original net device and NULL in case
213 * of an error.
214 */
215static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
216{
217 struct batadv_hard_iface *hard_iface = NULL;
218 struct net_device *real_netdev = NULL;
219 struct net *real_net;
220 struct net *net;
221 int ifindex;
222
223 ASSERT_RTNL();
224
225 if (!netdev)
226 return NULL;
227
228 if (netdev->ifindex == dev_get_iflink(netdev)) {
229 dev_hold(netdev);
230 return netdev;
231 }
232
233 hard_iface = batadv_hardif_get_by_netdev(netdev);
234 if (!hard_iface || !hard_iface->soft_iface)
235 goto out;
236
237 net = dev_net(hard_iface->soft_iface);
238 ifindex = dev_get_iflink(netdev);
239 real_net = batadv_getlink_net(netdev, net);
240 real_netdev = dev_get_by_index(real_net, ifindex);
241
242out:
243 if (hard_iface)
244 batadv_hardif_put(hard_iface);
245 return real_netdev;
246}
247
248/**
249 * batadv_get_real_netdev - check if the given net_device struct is a virtual
250 * interface on top of another 'real' interface
207 * @net_device: the device to check 251 * @net_device: the device to check
208 * 252 *
209 * Return: true if the net device is a 802.11 wireless device, false otherwise. 253 * Return: the 'real' net device or the original net device and NULL in case
254 * of an error.
210 */ 255 */
211bool batadv_is_wifi_netdev(struct net_device *net_device) 256struct net_device *batadv_get_real_netdev(struct net_device *net_device)
257{
258 struct net_device *real_netdev;
259
260 rtnl_lock();
261 real_netdev = batadv_get_real_netdevice(net_device);
262 rtnl_unlock();
263
264 return real_netdev;
265}
266
267/**
268 * batadv_is_wext_netdev - check if the given net_device struct is a
269 * wext wifi interface
270 * @net_device: the device to check
271 *
272 * Return: true if the net device is a wext wireless device, false
273 * otherwise.
274 */
275static bool batadv_is_wext_netdev(struct net_device *net_device)
212{ 276{
213 if (!net_device) 277 if (!net_device)
214 return false; 278 return false;
@@ -221,6 +285,22 @@ bool batadv_is_wifi_netdev(struct net_device *net_device)
221 return true; 285 return true;
222#endif 286#endif
223 287
288 return false;
289}
290
291/**
292 * batadv_is_cfg80211_netdev - check if the given net_device struct is a
293 * cfg80211 wifi interface
294 * @net_device: the device to check
295 *
296 * Return: true if the net device is a cfg80211 wireless device, false
297 * otherwise.
298 */
299static bool batadv_is_cfg80211_netdev(struct net_device *net_device)
300{
301 if (!net_device)
302 return false;
303
224 /* cfg80211 drivers have to set ieee80211_ptr */ 304 /* cfg80211 drivers have to set ieee80211_ptr */
225 if (net_device->ieee80211_ptr) 305 if (net_device->ieee80211_ptr)
226 return true; 306 return true;
@@ -228,6 +308,125 @@ bool batadv_is_wifi_netdev(struct net_device *net_device)
228 return false; 308 return false;
229} 309}
230 310
311/**
312 * batadv_wifi_flags_evaluate - calculate wifi flags for net_device
313 * @net_device: the device to check
314 *
315 * Return: batadv_hard_iface_wifi_flags flags of the device
316 */
317static u32 batadv_wifi_flags_evaluate(struct net_device *net_device)
318{
319 u32 wifi_flags = 0;
320 struct net_device *real_netdev;
321
322 if (batadv_is_wext_netdev(net_device))
323 wifi_flags |= BATADV_HARDIF_WIFI_WEXT_DIRECT;
324
325 if (batadv_is_cfg80211_netdev(net_device))
326 wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT;
327
328 real_netdev = batadv_get_real_netdevice(net_device);
329 if (!real_netdev)
330 return wifi_flags;
331
332 if (real_netdev == net_device)
333 goto out;
334
335 if (batadv_is_wext_netdev(real_netdev))
336 wifi_flags |= BATADV_HARDIF_WIFI_WEXT_INDIRECT;
337
338 if (batadv_is_cfg80211_netdev(real_netdev))
339 wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT;
340
341out:
342 dev_put(real_netdev);
343 return wifi_flags;
344}
345
346/**
347 * batadv_is_cfg80211_hardif - check if the given hardif is a cfg80211 wifi
348 * interface
349 * @hard_iface: the device to check
350 *
351 * Return: true if the net device is a cfg80211 wireless device, false
352 * otherwise.
353 */
354bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface)
355{
356 u32 allowed_flags = 0;
357
358 allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT;
359 allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT;
360
361 return !!(hard_iface->wifi_flags & allowed_flags);
362}
363
364/**
365 * batadv_is_wifi_hardif - check if the given hardif is a wifi interface
366 * @hard_iface: the device to check
367 *
368 * Return: true if the net device is a 802.11 wireless device, false otherwise.
369 */
370bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface)
371{
372 if (!hard_iface)
373 return false;
374
375 return hard_iface->wifi_flags != 0;
376}
377
378/**
379 * batadv_hardif_no_broadcast - check whether (re)broadcast is necessary
380 * @if_outgoing: the outgoing interface checked and considered for (re)broadcast
381 * @orig_addr: the originator of this packet
382 * @orig_neigh: originator address of the forwarder we just got the packet from
383 * (NULL if we originated)
384 *
385 * Checks whether a packet needs to be (re)broadcasted on the given interface.
386 *
387 * Return:
388 * BATADV_HARDIF_BCAST_NORECIPIENT: No neighbor on interface
389 * BATADV_HARDIF_BCAST_DUPFWD: Just one neighbor, but it is the forwarder
390 * BATADV_HARDIF_BCAST_DUPORIG: Just one neighbor, but it is the originator
391 * BATADV_HARDIF_BCAST_OK: Several neighbors, must broadcast
392 */
393int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
394 u8 *orig_addr, u8 *orig_neigh)
395{
396 struct batadv_hardif_neigh_node *hardif_neigh;
397 struct hlist_node *first;
398 int ret = BATADV_HARDIF_BCAST_OK;
399
400 rcu_read_lock();
401
402 /* 0 neighbors -> no (re)broadcast */
403 first = rcu_dereference(hlist_first_rcu(&if_outgoing->neigh_list));
404 if (!first) {
405 ret = BATADV_HARDIF_BCAST_NORECIPIENT;
406 goto out;
407 }
408
409 /* >1 neighbors -> (re)brodcast */
410 if (rcu_dereference(hlist_next_rcu(first)))
411 goto out;
412
413 hardif_neigh = hlist_entry(first, struct batadv_hardif_neigh_node,
414 list);
415
416 /* 1 neighbor, is the originator -> no rebroadcast */
417 if (orig_addr && batadv_compare_eth(hardif_neigh->orig, orig_addr)) {
418 ret = BATADV_HARDIF_BCAST_DUPORIG;
419 /* 1 neighbor, is the one we received from -> no rebroadcast */
420 } else if (orig_neigh &&
421 batadv_compare_eth(hardif_neigh->orig, orig_neigh)) {
422 ret = BATADV_HARDIF_BCAST_DUPFWD;
423 }
424
425out:
426 rcu_read_unlock();
427 return ret;
428}
429
231static struct batadv_hard_iface * 430static struct batadv_hard_iface *
232batadv_hardif_get_active(const struct net_device *soft_iface) 431batadv_hardif_get_active(const struct net_device *soft_iface)
233{ 432{
@@ -697,7 +896,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
697 kref_init(&hard_iface->refcount); 896 kref_init(&hard_iface->refcount);
698 897
699 hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; 898 hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
700 if (batadv_is_wifi_netdev(net_dev)) 899 hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev);
900 if (batadv_is_wifi_hardif(hard_iface))
701 hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; 901 hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
702 902
703 batadv_v_hardif_init(hard_iface); 903 batadv_v_hardif_init(hard_iface);
@@ -806,6 +1006,11 @@ static int batadv_hard_if_event(struct notifier_block *this,
806 if (hard_iface == primary_if) 1006 if (hard_iface == primary_if)
807 batadv_primary_if_update_addr(bat_priv, NULL); 1007 batadv_primary_if_update_addr(bat_priv, NULL);
808 break; 1008 break;
1009 case NETDEV_CHANGEUPPER:
1010 hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev);
1011 if (batadv_is_wifi_hardif(hard_iface))
1012 hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
1013 break;
809 default: 1014 default:
810 break; 1015 break;
811 } 1016 }
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index a76724d369bf..d6309a423629 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -40,6 +40,20 @@ enum batadv_hard_if_state {
40}; 40};
41 41
42/** 42/**
43 * enum batadv_hard_if_bcast - broadcast avoidance options
44 * @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface
45 * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no recipient
46 * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it from
47 * @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator
48 */
49enum batadv_hard_if_bcast {
50 BATADV_HARDIF_BCAST_OK = 0,
51 BATADV_HARDIF_BCAST_NORECIPIENT,
52 BATADV_HARDIF_BCAST_DUPFWD,
53 BATADV_HARDIF_BCAST_DUPORIG,
54};
55
56/**
43 * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal 57 * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
44 * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface 58 * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
45 * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed 59 * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
@@ -51,8 +65,9 @@ enum batadv_hard_if_cleanup {
51 65
52extern struct notifier_block batadv_hard_if_notifier; 66extern struct notifier_block batadv_hard_if_notifier;
53 67
54bool batadv_is_wifi_netdev(struct net_device *net_device); 68struct net_device *batadv_get_real_netdev(struct net_device *net_device);
55bool batadv_is_wifi_iface(int ifindex); 69bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface);
70bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);
56struct batadv_hard_iface* 71struct batadv_hard_iface*
57batadv_hardif_get_by_netdev(const struct net_device *net_dev); 72batadv_hardif_get_by_netdev(const struct net_device *net_dev);
58int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, 73int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
@@ -63,6 +78,8 @@ void batadv_hardif_remove_interfaces(void);
63int batadv_hardif_min_mtu(struct net_device *soft_iface); 78int batadv_hardif_min_mtu(struct net_device *soft_iface);
64void batadv_update_min_mtu(struct net_device *soft_iface); 79void batadv_update_min_mtu(struct net_device *soft_iface);
65void batadv_hardif_release(struct kref *ref); 80void batadv_hardif_release(struct kref *ref);
81int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
82 u8 *orig_addr, u8 *orig_neigh);
66 83
67/** 84/**
68 * batadv_hardif_put - decrement the hard interface refcounter and possibly 85 * batadv_hardif_put - decrement the hard interface refcounter and possibly
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index cbbf87075f06..557a7044cfbc 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -61,36 +61,6 @@ void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
61/* free only the hashtable and the hash itself. */ 61/* free only the hashtable and the hash itself. */
62void batadv_hash_destroy(struct batadv_hashtable *hash); 62void batadv_hash_destroy(struct batadv_hashtable *hash);
63 63
64/* remove the hash structure. if hashdata_free_cb != NULL, this function will be
65 * called to remove the elements inside of the hash. if you don't remove the
66 * elements, memory might be leaked.
67 */
68static inline void batadv_hash_delete(struct batadv_hashtable *hash,
69 batadv_hashdata_free_cb free_cb,
70 void *arg)
71{
72 struct hlist_head *head;
73 struct hlist_node *node, *node_tmp;
74 spinlock_t *list_lock; /* spinlock to protect write access */
75 u32 i;
76
77 for (i = 0; i < hash->size; i++) {
78 head = &hash->table[i];
79 list_lock = &hash->list_locks[i];
80
81 spin_lock_bh(list_lock);
82 hlist_for_each_safe(node, node_tmp, head) {
83 hlist_del_rcu(node);
84
85 if (free_cb)
86 free_cb(node, arg);
87 }
88 spin_unlock_bh(list_lock);
89 }
90
91 batadv_hash_destroy(hash);
92}
93
94/** 64/**
95 * batadv_hash_add - adds data to the hashtable 65 * batadv_hash_add - adds data to the hashtable
96 * @hash: storage hash table 66 * @hash: storage hash table
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 378cc1119d66..b310f381ae02 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -38,7 +38,6 @@
38#include <linux/skbuff.h> 38#include <linux/skbuff.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/spinlock.h> 40#include <linux/spinlock.h>
41#include <linux/stat.h>
42#include <linux/stddef.h> 41#include <linux/stddef.h>
43#include <linux/string.h> 42#include <linux/string.h>
44#include <linux/uaccess.h> 43#include <linux/uaccess.h>
@@ -322,8 +321,8 @@ int batadv_socket_setup(struct batadv_priv *bat_priv)
322 if (!bat_priv->debug_dir) 321 if (!bat_priv->debug_dir)
323 goto err; 322 goto err;
324 323
325 d = debugfs_create_file(BATADV_ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR, 324 d = debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir,
326 bat_priv->debug_dir, bat_priv, &batadv_fops); 325 bat_priv, &batadv_fops);
327 if (!d) 326 if (!d)
328 goto err; 327 goto err;
329 328
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 56dc532f7a2c..c73c31769aba 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -31,7 +31,6 @@
31#include <linux/sched.h> /* for linux/wait.h */ 31#include <linux/sched.h> /* for linux/wait.h */
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/stat.h>
35#include <linux/stddef.h> 34#include <linux/stddef.h>
36#include <linux/types.h> 35#include <linux/types.h>
37#include <linux/uaccess.h> 36#include <linux/uaccess.h>
@@ -212,8 +211,7 @@ int batadv_debug_log_setup(struct batadv_priv *bat_priv)
212 spin_lock_init(&bat_priv->debug_log->lock); 211 spin_lock_init(&bat_priv->debug_log->lock);
213 init_waitqueue_head(&bat_priv->debug_log->queue_wait); 212 init_waitqueue_head(&bat_priv->debug_log->queue_wait);
214 213
215 d = debugfs_create_file("log", S_IFREG | S_IRUSR, 214 d = debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv,
216 bat_priv->debug_dir, bat_priv,
217 &batadv_log_fops); 215 &batadv_log_fops);
218 if (!d) 216 if (!d)
219 goto err; 217 goto err;
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index d2905a855d1b..3284a7b0325d 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -71,12 +71,12 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
71__printf(2, 3); 71__printf(2, 3);
72 72
73/* possibly ratelimited debug output */ 73/* possibly ratelimited debug output */
74#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ 74#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
75 do { \ 75 do { \
76 if (atomic_read(&bat_priv->log_level) & type && \ 76 if (atomic_read(&(bat_priv)->log_level) & (type) && \
77 (!ratelimited || net_ratelimit())) \ 77 (!(ratelimited) || net_ratelimit())) \
78 batadv_debug_log(bat_priv, fmt, ## arg);\ 78 batadv_debug_log(bat_priv, fmt, ## arg); \
79 } \ 79 } \
80 while (0) 80 while (0)
81#else /* !CONFIG_BATMAN_ADV_DEBUG */ 81#else /* !CONFIG_BATMAN_ADV_DEBUG */
82__printf(4, 5) 82__printf(4, 5)
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 2c017ab47557..d46415edd3be 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -23,6 +23,7 @@
23#include <linux/crc32c.h> 23#include <linux/crc32c.h>
24#include <linux/errno.h> 24#include <linux/errno.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/genetlink.h>
26#include <linux/if_ether.h> 27#include <linux/if_ether.h>
27#include <linux/if_vlan.h> 28#include <linux/if_vlan.h>
28#include <linux/init.h> 29#include <linux/init.h>
@@ -44,6 +45,7 @@
44#include <linux/workqueue.h> 45#include <linux/workqueue.h>
45#include <net/dsfield.h> 46#include <net/dsfield.h>
46#include <net/rtnetlink.h> 47#include <net/rtnetlink.h>
48#include <uapi/linux/batman_adv.h>
47 49
48#include "bat_algo.h" 50#include "bat_algo.h"
49#include "bat_iv_ogm.h" 51#include "bat_iv_ogm.h"
@@ -160,7 +162,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
160 162
161 INIT_HLIST_HEAD(&bat_priv->forw_bat_list); 163 INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
162 INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); 164 INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
163 INIT_HLIST_HEAD(&bat_priv->gw.list); 165 INIT_HLIST_HEAD(&bat_priv->gw.gateway_list);
164#ifdef CONFIG_BATMAN_ADV_MCAST 166#ifdef CONFIG_BATMAN_ADV_MCAST
165 INIT_HLIST_HEAD(&bat_priv->mcast.want_all_unsnoopables_list); 167 INIT_HLIST_HEAD(&bat_priv->mcast.want_all_unsnoopables_list);
166 INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv4_list); 168 INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv4_list);
@@ -402,6 +404,8 @@ void batadv_skb_set_priority(struct sk_buff *skb, int offset)
402static int batadv_recv_unhandled_packet(struct sk_buff *skb, 404static int batadv_recv_unhandled_packet(struct sk_buff *skb,
403 struct batadv_hard_iface *recv_if) 405 struct batadv_hard_iface *recv_if)
404{ 406{
407 kfree_skb(skb);
408
405 return NET_RX_DROP; 409 return NET_RX_DROP;
406} 410}
407 411
@@ -416,7 +420,6 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
416 struct batadv_ogm_packet *batadv_ogm_packet; 420 struct batadv_ogm_packet *batadv_ogm_packet;
417 struct batadv_hard_iface *hard_iface; 421 struct batadv_hard_iface *hard_iface;
418 u8 idx; 422 u8 idx;
419 int ret;
420 423
421 hard_iface = container_of(ptype, struct batadv_hard_iface, 424 hard_iface = container_of(ptype, struct batadv_hard_iface,
422 batman_adv_ptype); 425 batman_adv_ptype);
@@ -466,14 +469,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
466 /* reset control block to avoid left overs from previous users */ 469 /* reset control block to avoid left overs from previous users */
467 memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); 470 memset(skb->cb, 0, sizeof(struct batadv_skb_cb));
468 471
469 /* all receive handlers return whether they received or reused
470 * the supplied skb. if not, we have to free the skb.
471 */
472 idx = batadv_ogm_packet->packet_type; 472 idx = batadv_ogm_packet->packet_type;
473 ret = (*batadv_rx_handler[idx])(skb, hard_iface); 473 (*batadv_rx_handler[idx])(skb, hard_iface);
474
475 if (ret == NET_RX_DROP)
476 kfree_skb(skb);
477 474
478 batadv_hardif_put(hard_iface); 475 batadv_hardif_put(hard_iface);
479 476
@@ -653,3 +650,4 @@ MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
653MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE); 650MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
654MODULE_VERSION(BATADV_SOURCE_VERSION); 651MODULE_VERSION(BATADV_SOURCE_VERSION);
655MODULE_ALIAS_RTNL_LINK("batadv"); 652MODULE_ALIAS_RTNL_LINK("batadv");
653MODULE_ALIAS_GENL_FAMILY(BATADV_NL_NAME);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 09af21e27639..a6cc8040a21d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
24#define BATADV_DRIVER_DEVICE "batman-adv" 24#define BATADV_DRIVER_DEVICE "batman-adv"
25 25
26#ifndef BATADV_SOURCE_VERSION 26#ifndef BATADV_SOURCE_VERSION
27#define BATADV_SOURCE_VERSION "2016.4" 27#define BATADV_SOURCE_VERSION "2016.5"
28#endif 28#endif
29 29
30/* B.A.T.M.A.N. parameters */ 30/* B.A.T.M.A.N. parameters */
@@ -48,6 +48,7 @@
48#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ 48#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
49#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */ 49#define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */
50#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */ 50#define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */
51#define BATADV_MCAST_WORK_PERIOD 500 /* 0.5 seconds */
51#define BATADV_DAT_ENTRY_TIMEOUT (5 * 60000) /* 5 mins in milliseconds */ 52#define BATADV_DAT_ENTRY_TIMEOUT (5 * 60000) /* 5 mins in milliseconds */
52/* sliding packet range of received originator messages in sequence numbers 53/* sliding packet range of received originator messages in sequence numbers
53 * (should be a multiple of our word size) 54 * (should be a multiple of our word size)
@@ -185,7 +186,6 @@ enum batadv_uev_type {
185 186
186#include <linux/bitops.h> /* for packet.h */ 187#include <linux/bitops.h> /* for packet.h */
187#include <linux/compiler.h> 188#include <linux/compiler.h>
188#include <linux/cpumask.h>
189#include <linux/etherdevice.h> 189#include <linux/etherdevice.h>
190#include <linux/if_ether.h> /* for packet.h */ 190#include <linux/if_ether.h> /* for packet.h */
191#include <linux/if_vlan.h> 191#include <linux/if_vlan.h>
@@ -200,8 +200,8 @@ struct packet_type;
200struct seq_file; 200struct seq_file;
201struct sk_buff; 201struct sk_buff;
202 202
203#define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \ 203#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \
204 (int)(vid & VLAN_VID_MASK) : -1) 204 (int)((vid) & VLAN_VID_MASK) : -1)
205 205
206extern struct list_head batadv_hardif_list; 206extern struct list_head batadv_hardif_list;
207 207
@@ -284,26 +284,6 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
284 284
285#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) 285#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
286 286
287/**
288 * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
289 * @bat_priv: the bat priv with all the soft interface information
290 * @idx: index of counter to sum up
291 *
292 * Return: sum of all cpu-local counters
293 */
294static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
295{
296 u64 *counters, sum = 0;
297 int cpu;
298
299 for_each_possible_cpu(cpu) {
300 counters = per_cpu_ptr(bat_priv->bat_counters, cpu);
301 sum += counters[idx];
302 }
303
304 return sum;
305}
306
307/* Define a macro to reach the control buffer of the skb. The members of the 287/* Define a macro to reach the control buffer of the skb. The members of the
308 * control buffer are defined in struct batadv_skb_cb in types.h. 288 * control buffer are defined in struct batadv_skb_cb in types.h.
309 * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h. 289 * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 13661f43386f..090a69fc342e 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -33,6 +33,7 @@
33#include <linux/in6.h> 33#include <linux/in6.h>
34#include <linux/ip.h> 34#include <linux/ip.h>
35#include <linux/ipv6.h> 35#include <linux/ipv6.h>
36#include <linux/jiffies.h>
36#include <linux/kernel.h> 37#include <linux/kernel.h>
37#include <linux/kref.h> 38#include <linux/kref.h>
38#include <linux/list.h> 39#include <linux/list.h>
@@ -48,6 +49,7 @@
48#include <linux/stddef.h> 49#include <linux/stddef.h>
49#include <linux/string.h> 50#include <linux/string.h>
50#include <linux/types.h> 51#include <linux/types.h>
52#include <linux/workqueue.h>
51#include <net/addrconf.h> 53#include <net/addrconf.h>
52#include <net/if_inet6.h> 54#include <net/if_inet6.h>
53#include <net/ip.h> 55#include <net/ip.h>
@@ -60,6 +62,18 @@
60#include "translation-table.h" 62#include "translation-table.h"
61#include "tvlv.h" 63#include "tvlv.h"
62 64
65static void batadv_mcast_mla_update(struct work_struct *work);
66
67/**
68 * batadv_mcast_start_timer - schedule the multicast periodic worker
69 * @bat_priv: the bat priv with all the soft interface information
70 */
71static void batadv_mcast_start_timer(struct batadv_priv *bat_priv)
72{
73 queue_delayed_work(batadv_event_workqueue, &bat_priv->mcast.work,
74 msecs_to_jiffies(BATADV_MCAST_WORK_PERIOD));
75}
76
63/** 77/**
64 * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists 78 * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists
65 * @soft_iface: netdev struct of the mesh interface 79 * @soft_iface: netdev struct of the mesh interface
@@ -231,19 +245,15 @@ out:
231 245
232/** 246/**
233 * batadv_mcast_mla_list_free - free a list of multicast addresses 247 * batadv_mcast_mla_list_free - free a list of multicast addresses
234 * @bat_priv: the bat priv with all the soft interface information
235 * @mcast_list: the list to free 248 * @mcast_list: the list to free
236 * 249 *
237 * Removes and frees all items in the given mcast_list. 250 * Removes and frees all items in the given mcast_list.
238 */ 251 */
239static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv, 252static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list)
240 struct hlist_head *mcast_list)
241{ 253{
242 struct batadv_hw_addr *mcast_entry; 254 struct batadv_hw_addr *mcast_entry;
243 struct hlist_node *tmp; 255 struct hlist_node *tmp;
244 256
245 lockdep_assert_held(&bat_priv->tt.commit_lock);
246
247 hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { 257 hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) {
248 hlist_del(&mcast_entry->list); 258 hlist_del(&mcast_entry->list);
249 kfree(mcast_entry); 259 kfree(mcast_entry);
@@ -259,6 +269,8 @@ static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv,
259 * translation table except the ones listed in the given mcast_list. 269 * translation table except the ones listed in the given mcast_list.
260 * 270 *
261 * If mcast_list is NULL then all are retracted. 271 * If mcast_list is NULL then all are retracted.
272 *
273 * Do not call outside of the mcast worker! (or cancel mcast worker first)
262 */ 274 */
263static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, 275static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
264 struct hlist_head *mcast_list) 276 struct hlist_head *mcast_list)
@@ -266,7 +278,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
266 struct batadv_hw_addr *mcast_entry; 278 struct batadv_hw_addr *mcast_entry;
267 struct hlist_node *tmp; 279 struct hlist_node *tmp;
268 280
269 lockdep_assert_held(&bat_priv->tt.commit_lock); 281 WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
270 282
271 hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list, 283 hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list,
272 list) { 284 list) {
@@ -291,6 +303,8 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
291 * 303 *
292 * Adds multicast listener announcements from the given mcast_list to the 304 * Adds multicast listener announcements from the given mcast_list to the
293 * translation table if they have not been added yet. 305 * translation table if they have not been added yet.
306 *
307 * Do not call outside of the mcast worker! (or cancel mcast worker first)
294 */ 308 */
295static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, 309static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
296 struct hlist_head *mcast_list) 310 struct hlist_head *mcast_list)
@@ -298,7 +312,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
298 struct batadv_hw_addr *mcast_entry; 312 struct batadv_hw_addr *mcast_entry;
299 struct hlist_node *tmp; 313 struct hlist_node *tmp;
300 314
301 lockdep_assert_held(&bat_priv->tt.commit_lock); 315 WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
302 316
303 if (!mcast_list) 317 if (!mcast_list)
304 return; 318 return;
@@ -532,13 +546,18 @@ update:
532} 546}
533 547
534/** 548/**
535 * batadv_mcast_mla_update - update the own MLAs 549 * __batadv_mcast_mla_update - update the own MLAs
536 * @bat_priv: the bat priv with all the soft interface information 550 * @bat_priv: the bat priv with all the soft interface information
537 * 551 *
538 * Updates the own multicast listener announcements in the translation 552 * Updates the own multicast listener announcements in the translation
539 * table as well as the own, announced multicast tvlv container. 553 * table as well as the own, announced multicast tvlv container.
554 *
555 * Note that non-conflicting reads and writes to bat_priv->mcast.mla_list
556 * in batadv_mcast_mla_tt_retract() and batadv_mcast_mla_tt_add() are
557 * ensured by the non-parallel execution of the worker this function
558 * belongs to.
540 */ 559 */
541void batadv_mcast_mla_update(struct batadv_priv *bat_priv) 560static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
542{ 561{
543 struct net_device *soft_iface = bat_priv->soft_iface; 562 struct net_device *soft_iface = bat_priv->soft_iface;
544 struct hlist_head mcast_list = HLIST_HEAD_INIT; 563 struct hlist_head mcast_list = HLIST_HEAD_INIT;
@@ -560,7 +579,30 @@ update:
560 batadv_mcast_mla_tt_add(bat_priv, &mcast_list); 579 batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
561 580
562out: 581out:
563 batadv_mcast_mla_list_free(bat_priv, &mcast_list); 582 batadv_mcast_mla_list_free(&mcast_list);
583}
584
585/**
586 * batadv_mcast_mla_update - update the own MLAs
587 * @work: kernel work struct
588 *
589 * Updates the own multicast listener announcements in the translation
590 * table as well as the own, announced multicast tvlv container.
591 *
592 * In the end, reschedules the work timer.
593 */
594static void batadv_mcast_mla_update(struct work_struct *work)
595{
596 struct delayed_work *delayed_work;
597 struct batadv_priv_mcast *priv_mcast;
598 struct batadv_priv *bat_priv;
599
600 delayed_work = to_delayed_work(work);
601 priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work);
602 bat_priv = container_of(priv_mcast, struct batadv_priv, mcast);
603
604 __batadv_mcast_mla_update(bat_priv);
605 batadv_mcast_start_timer(bat_priv);
564} 606}
565 607
566/** 608/**
@@ -1132,6 +1174,9 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
1132 batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, 1174 batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
1133 NULL, BATADV_TVLV_MCAST, 2, 1175 NULL, BATADV_TVLV_MCAST, 2,
1134 BATADV_TVLV_HANDLER_OGM_CIFNOTFND); 1176 BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
1177
1178 INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
1179 batadv_mcast_start_timer(bat_priv);
1135} 1180}
1136 1181
1137#ifdef CONFIG_BATMAN_ADV_DEBUGFS 1182#ifdef CONFIG_BATMAN_ADV_DEBUGFS
@@ -1243,12 +1288,13 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
1243 */ 1288 */
1244void batadv_mcast_free(struct batadv_priv *bat_priv) 1289void batadv_mcast_free(struct batadv_priv *bat_priv)
1245{ 1290{
1291 cancel_delayed_work_sync(&bat_priv->mcast.work);
1292
1246 batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); 1293 batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
1247 batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); 1294 batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
1248 1295
1249 spin_lock_bh(&bat_priv->tt.commit_lock); 1296 /* safely calling outside of worker, as worker was canceled above */
1250 batadv_mcast_mla_tt_retract(bat_priv, NULL); 1297 batadv_mcast_mla_tt_retract(bat_priv, NULL);
1251 spin_unlock_bh(&bat_priv->tt.commit_lock);
1252} 1298}
1253 1299
1254/** 1300/**
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 1fb00ba84907..2cddaf52a21d 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -39,8 +39,6 @@ enum batadv_forw_mode {
39 39
40#ifdef CONFIG_BATMAN_ADV_MCAST 40#ifdef CONFIG_BATMAN_ADV_MCAST
41 41
42void batadv_mcast_mla_update(struct batadv_priv *bat_priv);
43
44enum batadv_forw_mode 42enum batadv_forw_mode
45batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, 43batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
46 struct batadv_orig_node **mcast_single_orig); 44 struct batadv_orig_node **mcast_single_orig);
@@ -55,10 +53,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
55 53
56#else 54#else
57 55
58static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
59{
60}
61
62static inline enum batadv_forw_mode 56static inline enum batadv_forw_mode
63batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, 57batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
64 struct batadv_orig_node **mcast_single_orig) 58 struct batadv_orig_node **mcast_single_orig)
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 64cb6acbe0a6..062738163bdc 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -20,11 +20,14 @@
20 20
21#include <linux/atomic.h> 21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h> 22#include <linux/byteorder/generic.h>
23#include <linux/cache.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
25#include <linux/export.h>
24#include <linux/fs.h> 26#include <linux/fs.h>
25#include <linux/genetlink.h> 27#include <linux/genetlink.h>
26#include <linux/if_ether.h> 28#include <linux/if_ether.h>
27#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/kernel.h>
28#include <linux/netdevice.h> 31#include <linux/netdevice.h>
29#include <linux/netlink.h> 32#include <linux/netlink.h>
30#include <linux/printk.h> 33#include <linux/printk.h>
@@ -48,14 +51,7 @@
48#include "tp_meter.h" 51#include "tp_meter.h"
49#include "translation-table.h" 52#include "translation-table.h"
50 53
51struct genl_family batadv_netlink_family = { 54struct genl_family batadv_netlink_family;
52 .id = GENL_ID_GENERATE,
53 .hdrsize = 0,
54 .name = BATADV_NL_NAME,
55 .version = 1,
56 .maxattr = BATADV_ATTR_MAX,
57 .netnsok = true,
58};
59 55
60/* multicast groups */ 56/* multicast groups */
61enum batadv_netlink_multicast_groups { 57enum batadv_netlink_multicast_groups {
@@ -534,7 +530,7 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
534 return msg->len; 530 return msg->len;
535} 531}
536 532
537static struct genl_ops batadv_netlink_ops[] = { 533static const struct genl_ops batadv_netlink_ops[] = {
538 { 534 {
539 .cmd = BATADV_CMD_GET_MESH_INFO, 535 .cmd = BATADV_CMD_GET_MESH_INFO,
540 .flags = GENL_ADMIN_PERM, 536 .flags = GENL_ADMIN_PERM,
@@ -610,6 +606,19 @@ static struct genl_ops batadv_netlink_ops[] = {
610 606
611}; 607};
612 608
609struct genl_family batadv_netlink_family __ro_after_init = {
610 .hdrsize = 0,
611 .name = BATADV_NL_NAME,
612 .version = 1,
613 .maxattr = BATADV_ATTR_MAX,
614 .netnsok = true,
615 .module = THIS_MODULE,
616 .ops = batadv_netlink_ops,
617 .n_ops = ARRAY_SIZE(batadv_netlink_ops),
618 .mcgrps = batadv_netlink_mcgrps,
619 .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
620};
621
613/** 622/**
614 * batadv_netlink_register - register batadv genl netlink family 623 * batadv_netlink_register - register batadv genl netlink family
615 */ 624 */
@@ -617,9 +626,7 @@ void __init batadv_netlink_register(void)
617{ 626{
618 int ret; 627 int ret;
619 628
620 ret = genl_register_family_with_ops_groups(&batadv_netlink_family, 629 ret = genl_register_family(&batadv_netlink_family);
621 batadv_netlink_ops,
622 batadv_netlink_mcgrps);
623 if (ret) 630 if (ret)
624 pr_warn("unable to register netlink family"); 631 pr_warn("unable to register netlink family");
625} 632}
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index e3baf697a35c..ab5a3bf0765f 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -44,7 +44,6 @@
44#include <linux/skbuff.h> 44#include <linux/skbuff.h>
45#include <linux/slab.h> 45#include <linux/slab.h>
46#include <linux/spinlock.h> 46#include <linux/spinlock.h>
47#include <linux/stat.h>
48#include <linux/stddef.h> 47#include <linux/stddef.h>
49#include <linux/string.h> 48#include <linux/string.h>
50#include <linux/workqueue.h> 49#include <linux/workqueue.h>
@@ -261,10 +260,16 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
261/** 260/**
262 * batadv_nc_packet_free - frees nc packet 261 * batadv_nc_packet_free - frees nc packet
263 * @nc_packet: the nc packet to free 262 * @nc_packet: the nc packet to free
263 * @dropped: whether the packet is freed because is is dropped
264 */ 264 */
265static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) 265static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet,
266 bool dropped)
266{ 267{
267 kfree_skb(nc_packet->skb); 268 if (dropped)
269 kfree_skb(nc_packet->skb);
270 else
271 consume_skb(nc_packet->skb);
272
268 batadv_nc_path_put(nc_packet->nc_path); 273 batadv_nc_path_put(nc_packet->nc_path);
269 kfree(nc_packet); 274 kfree(nc_packet);
270} 275}
@@ -577,7 +582,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
577{ 582{
578 batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node); 583 batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node);
579 nc_packet->skb = NULL; 584 nc_packet->skb = NULL;
580 batadv_nc_packet_free(nc_packet); 585 batadv_nc_packet_free(nc_packet, false);
581} 586}
582 587
583/** 588/**
@@ -611,7 +616,7 @@ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
611 616
612 /* purge nc packet */ 617 /* purge nc packet */
613 list_del(&nc_packet->list); 618 list_del(&nc_packet->list);
614 batadv_nc_packet_free(nc_packet); 619 batadv_nc_packet_free(nc_packet, true);
615 620
616 res = true; 621 res = true;
617 622
@@ -1209,11 +1214,11 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1209 } 1214 }
1210 1215
1211 /* skb_src is now coded into skb_dest, so free it */ 1216 /* skb_src is now coded into skb_dest, so free it */
1212 kfree_skb(skb_src); 1217 consume_skb(skb_src);
1213 1218
1214 /* avoid duplicate free of skb from nc_packet */ 1219 /* avoid duplicate free of skb from nc_packet */
1215 nc_packet->skb = NULL; 1220 nc_packet->skb = NULL;
1216 batadv_nc_packet_free(nc_packet); 1221 batadv_nc_packet_free(nc_packet, false);
1217 1222
1218 /* Send the coded packet and return true */ 1223 /* Send the coded packet and return true */
1219 batadv_send_unicast_skb(skb_dest, first_dest); 1224 batadv_send_unicast_skb(skb_dest, first_dest);
@@ -1400,7 +1405,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
1400 /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free 1405 /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free
1401 * our ref 1406 * our ref
1402 */ 1407 */
1403 kfree_skb(skb); 1408 consume_skb(skb);
1404} 1409}
1405 1410
1406/** 1411/**
@@ -1724,7 +1729,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1724 ether_addr_copy(unicast_packet->dest, orig_dest); 1729 ether_addr_copy(unicast_packet->dest, orig_dest);
1725 unicast_packet->ttvn = ttvn; 1730 unicast_packet->ttvn = ttvn;
1726 1731
1727 batadv_nc_packet_free(nc_packet); 1732 batadv_nc_packet_free(nc_packet, false);
1728 return unicast_packet; 1733 return unicast_packet;
1729} 1734}
1730 1735
@@ -1814,11 +1819,11 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1814 1819
1815 /* Check if network coding is enabled */ 1820 /* Check if network coding is enabled */
1816 if (!atomic_read(&bat_priv->network_coding)) 1821 if (!atomic_read(&bat_priv->network_coding))
1817 return NET_RX_DROP; 1822 goto free_skb;
1818 1823
1819 /* Make sure we can access (and remove) header */ 1824 /* Make sure we can access (and remove) header */
1820 if (unlikely(!pskb_may_pull(skb, hdr_size))) 1825 if (unlikely(!pskb_may_pull(skb, hdr_size)))
1821 return NET_RX_DROP; 1826 goto free_skb;
1822 1827
1823 coded_packet = (struct batadv_coded_packet *)skb->data; 1828 coded_packet = (struct batadv_coded_packet *)skb->data;
1824 ethhdr = eth_hdr(skb); 1829 ethhdr = eth_hdr(skb);
@@ -1826,7 +1831,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1826 /* Verify frame is destined for us */ 1831 /* Verify frame is destined for us */
1827 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && 1832 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
1828 !batadv_is_my_mac(bat_priv, coded_packet->second_dest)) 1833 !batadv_is_my_mac(bat_priv, coded_packet->second_dest))
1829 return NET_RX_DROP; 1834 goto free_skb;
1830 1835
1831 /* Update stat counter */ 1836 /* Update stat counter */
1832 if (batadv_is_my_mac(bat_priv, coded_packet->second_dest)) 1837 if (batadv_is_my_mac(bat_priv, coded_packet->second_dest))
@@ -1836,7 +1841,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1836 coded_packet); 1841 coded_packet);
1837 if (!nc_packet) { 1842 if (!nc_packet) {
1838 batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); 1843 batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
1839 return NET_RX_DROP; 1844 goto free_skb;
1840 } 1845 }
1841 1846
1842 /* Make skb's linear, because decoding accesses the entire buffer */ 1847 /* Make skb's linear, because decoding accesses the entire buffer */
@@ -1861,7 +1866,10 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1861 return batadv_recv_unicast_packet(skb, recv_if); 1866 return batadv_recv_unicast_packet(skb, recv_if);
1862 1867
1863free_nc_packet: 1868free_nc_packet:
1864 batadv_nc_packet_free(nc_packet); 1869 batadv_nc_packet_free(nc_packet, true);
1870free_skb:
1871 kfree_skb(skb);
1872
1865 return NET_RX_DROP; 1873 return NET_RX_DROP;
1866} 1874}
1867 1875
@@ -1961,17 +1969,16 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
1961 if (!nc_dir) 1969 if (!nc_dir)
1962 goto out; 1970 goto out;
1963 1971
1964 file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir, 1972 file = debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq);
1965 &bat_priv->nc.min_tq);
1966 if (!file) 1973 if (!file)
1967 goto out; 1974 goto out;
1968 1975
1969 file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir, 1976 file = debugfs_create_u32("max_fwd_delay", 0644, nc_dir,
1970 &bat_priv->nc.max_fwd_delay); 1977 &bat_priv->nc.max_fwd_delay);
1971 if (!file) 1978 if (!file)
1972 goto out; 1979 goto out;
1973 1980
1974 file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir, 1981 file = debugfs_create_u32("max_buffer_time", 0644, nc_dir,
1975 &bat_priv->nc.max_buffer_time); 1982 &bat_priv->nc.max_buffer_time);
1976 if (!file) 1983 if (!file)
1977 goto out; 1984 goto out;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 7c8d16086f0f..8f3b2969cc4e 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -364,7 +364,7 @@ struct batadv_orig_ifinfo *
364batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, 364batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
365 struct batadv_hard_iface *if_outgoing) 365 struct batadv_hard_iface *if_outgoing)
366{ 366{
367 struct batadv_orig_ifinfo *orig_ifinfo = NULL; 367 struct batadv_orig_ifinfo *orig_ifinfo;
368 unsigned long reset_time; 368 unsigned long reset_time;
369 369
370 spin_lock_bh(&orig_node->neigh_list_lock); 370 spin_lock_bh(&orig_node->neigh_list_lock);
@@ -512,15 +512,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
512 * batadv_hardif_neigh_create - create a hardif neighbour node 512 * batadv_hardif_neigh_create - create a hardif neighbour node
513 * @hard_iface: the interface this neighbour is connected to 513 * @hard_iface: the interface this neighbour is connected to
514 * @neigh_addr: the interface address of the neighbour to retrieve 514 * @neigh_addr: the interface address of the neighbour to retrieve
515 * @orig_node: originator object representing the neighbour
515 * 516 *
516 * Return: the hardif neighbour node if found or created or NULL otherwise. 517 * Return: the hardif neighbour node if found or created or NULL otherwise.
517 */ 518 */
518static struct batadv_hardif_neigh_node * 519static struct batadv_hardif_neigh_node *
519batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, 520batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
520 const u8 *neigh_addr) 521 const u8 *neigh_addr,
522 struct batadv_orig_node *orig_node)
521{ 523{
522 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 524 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
523 struct batadv_hardif_neigh_node *hardif_neigh = NULL; 525 struct batadv_hardif_neigh_node *hardif_neigh;
524 526
525 spin_lock_bh(&hard_iface->neigh_list_lock); 527 spin_lock_bh(&hard_iface->neigh_list_lock);
526 528
@@ -536,6 +538,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
536 kref_get(&hard_iface->refcount); 538 kref_get(&hard_iface->refcount);
537 INIT_HLIST_NODE(&hardif_neigh->list); 539 INIT_HLIST_NODE(&hardif_neigh->list);
538 ether_addr_copy(hardif_neigh->addr, neigh_addr); 540 ether_addr_copy(hardif_neigh->addr, neigh_addr);
541 ether_addr_copy(hardif_neigh->orig, orig_node->orig);
539 hardif_neigh->if_incoming = hard_iface; 542 hardif_neigh->if_incoming = hard_iface;
540 hardif_neigh->last_seen = jiffies; 543 hardif_neigh->last_seen = jiffies;
541 544
@@ -556,21 +559,23 @@ out:
556 * node 559 * node
557 * @hard_iface: the interface this neighbour is connected to 560 * @hard_iface: the interface this neighbour is connected to
558 * @neigh_addr: the interface address of the neighbour to retrieve 561 * @neigh_addr: the interface address of the neighbour to retrieve
562 * @orig_node: originator object representing the neighbour
559 * 563 *
560 * Return: the hardif neighbour node if found or created or NULL otherwise. 564 * Return: the hardif neighbour node if found or created or NULL otherwise.
561 */ 565 */
562static struct batadv_hardif_neigh_node * 566static struct batadv_hardif_neigh_node *
563batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, 567batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
564 const u8 *neigh_addr) 568 const u8 *neigh_addr,
569 struct batadv_orig_node *orig_node)
565{ 570{
566 struct batadv_hardif_neigh_node *hardif_neigh = NULL; 571 struct batadv_hardif_neigh_node *hardif_neigh;
567 572
568 /* first check without locking to avoid the overhead */ 573 /* first check without locking to avoid the overhead */
569 hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr); 574 hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr);
570 if (hardif_neigh) 575 if (hardif_neigh)
571 return hardif_neigh; 576 return hardif_neigh;
572 577
573 return batadv_hardif_neigh_create(hard_iface, neigh_addr); 578 return batadv_hardif_neigh_create(hard_iface, neigh_addr, orig_node);
574} 579}
575 580
576/** 581/**
@@ -630,7 +635,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
630 goto out; 635 goto out;
631 636
632 hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface, 637 hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface,
633 neigh_addr); 638 neigh_addr, orig_node);
634 if (!hardif_neigh) 639 if (!hardif_neigh)
635 goto out; 640 goto out;
636 641
@@ -683,7 +688,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
683 struct batadv_hard_iface *hard_iface, 688 struct batadv_hard_iface *hard_iface,
684 const u8 *neigh_addr) 689 const u8 *neigh_addr)
685{ 690{
686 struct batadv_neigh_node *neigh_node = NULL; 691 struct batadv_neigh_node *neigh_node;
687 692
688 /* first check without locking to avoid the overhead */ 693 /* first check without locking to avoid the overhead */
689 neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); 694 neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
@@ -1021,7 +1026,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
1021 batadv_orig_node_vlan_put(vlan); 1026 batadv_orig_node_vlan_put(vlan);
1022 1027
1023 for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { 1028 for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
1024 INIT_HLIST_HEAD(&orig_node->fragments[i].head); 1029 INIT_HLIST_HEAD(&orig_node->fragments[i].fragment_list);
1025 spin_lock_init(&orig_node->fragments[i].lock); 1030 spin_lock_init(&orig_node->fragments[i].lock);
1026 orig_node->fragments[i].size = 0; 1031 orig_node->fragments[i].size = 0;
1027 } 1032 }
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 6afc0b86950e..7a36bcfa0ba0 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -21,7 +21,7 @@
21#include <asm/byteorder.h> 21#include <asm/byteorder.h>
22#include <linux/types.h> 22#include <linux/types.h>
23 23
24#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0) 24#define batadv_tp_is_error(n) ((u8)(n) > 127 ? 1 : 0)
25 25
26/** 26/**
27 * enum batadv_packettype - types for batman-adv encapsulated packets 27 * enum batadv_packettype - types for batman-adv encapsulated packets
@@ -252,16 +252,6 @@ struct batadv_elp_packet {
252#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet) 252#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
253 253
254/** 254/**
255 * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes
256 * @BATADV_TP_START: start a throughput meter run
257 * @BATADV_TP_STOP: stop a throughput meter run
258 */
259enum batadv_icmp_user_cmd_type {
260 BATADV_TP_START = 0,
261 BATADV_TP_STOP = 2,
262};
263
264/**
265 * struct batadv_icmp_header - common members among all the ICMP packets 255 * struct batadv_icmp_header - common members among all the ICMP packets
266 * @packet_type: batman-adv packet type, part of the general header 256 * @packet_type: batman-adv packet type, part of the general header
267 * @version: batman-adv protocol version, part of the genereal header 257 * @version: batman-adv protocol version, part of the genereal header
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7e8dc648b95a..6713bdf414cd 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -196,8 +196,8 @@ bool batadv_check_management_packet(struct sk_buff *skb,
196 if (!is_broadcast_ether_addr(ethhdr->h_dest)) 196 if (!is_broadcast_ether_addr(ethhdr->h_dest))
197 return false; 197 return false;
198 198
199 /* packet with broadcast sender address */ 199 /* packet with invalid sender address */
200 if (is_broadcast_ether_addr(ethhdr->h_source)) 200 if (!is_valid_ether_addr(ethhdr->h_source))
201 return false; 201 return false;
202 202
203 /* create a copy of the skb, if needed, to modify it. */ 203 /* create a copy of the skb, if needed, to modify it. */
@@ -262,11 +262,11 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
262 icmph->ttl = BATADV_TTL; 262 icmph->ttl = BATADV_TTL;
263 263
264 res = batadv_send_skb_to_orig(skb, orig_node, NULL); 264 res = batadv_send_skb_to_orig(skb, orig_node, NULL);
265 if (res == -1) 265 if (res == NET_XMIT_SUCCESS)
266 goto out; 266 ret = NET_RX_SUCCESS;
267
268 ret = NET_RX_SUCCESS;
269 267
268 /* skb was consumed */
269 skb = NULL;
270 break; 270 break;
271 case BATADV_TP: 271 case BATADV_TP:
272 if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet))) 272 if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet)))
@@ -274,6 +274,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
274 274
275 batadv_tp_meter_recv(bat_priv, skb); 275 batadv_tp_meter_recv(bat_priv, skb);
276 ret = NET_RX_SUCCESS; 276 ret = NET_RX_SUCCESS;
277 /* skb was consumed */
278 skb = NULL;
277 goto out; 279 goto out;
278 default: 280 default:
279 /* drop unknown type */ 281 /* drop unknown type */
@@ -284,6 +286,9 @@ out:
284 batadv_hardif_put(primary_if); 286 batadv_hardif_put(primary_if);
285 if (orig_node) 287 if (orig_node)
286 batadv_orig_node_put(orig_node); 288 batadv_orig_node_put(orig_node);
289
290 kfree_skb(skb);
291
287 return ret; 292 return ret;
288} 293}
289 294
@@ -325,14 +330,20 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
325 icmp_packet->ttl = BATADV_TTL; 330 icmp_packet->ttl = BATADV_TTL;
326 331
327 res = batadv_send_skb_to_orig(skb, orig_node, NULL); 332 res = batadv_send_skb_to_orig(skb, orig_node, NULL);
328 if (res != -1) 333 if (res == NET_RX_SUCCESS)
329 ret = NET_RX_SUCCESS; 334 ret = NET_XMIT_SUCCESS;
335
336 /* skb was consumed */
337 skb = NULL;
330 338
331out: 339out:
332 if (primary_if) 340 if (primary_if)
333 batadv_hardif_put(primary_if); 341 batadv_hardif_put(primary_if);
334 if (orig_node) 342 if (orig_node)
335 batadv_orig_node_put(orig_node); 343 batadv_orig_node_put(orig_node);
344
345 kfree_skb(skb);
346
336 return ret; 347 return ret;
337} 348}
338 349
@@ -349,21 +360,21 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
349 360
350 /* drop packet if it has not necessary minimum size */ 361 /* drop packet if it has not necessary minimum size */
351 if (unlikely(!pskb_may_pull(skb, hdr_size))) 362 if (unlikely(!pskb_may_pull(skb, hdr_size)))
352 goto out; 363 goto free_skb;
353 364
354 ethhdr = eth_hdr(skb); 365 ethhdr = eth_hdr(skb);
355 366
356 /* packet with unicast indication but broadcast recipient */ 367 /* packet with unicast indication but non-unicast recipient */
357 if (is_broadcast_ether_addr(ethhdr->h_dest)) 368 if (!is_valid_ether_addr(ethhdr->h_dest))
358 goto out; 369 goto free_skb;
359 370
360 /* packet with broadcast sender address */ 371 /* packet with broadcast/multicast sender address */
361 if (is_broadcast_ether_addr(ethhdr->h_source)) 372 if (is_multicast_ether_addr(ethhdr->h_source))
362 goto out; 373 goto free_skb;
363 374
364 /* not for me */ 375 /* not for me */
365 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) 376 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
366 goto out; 377 goto free_skb;
367 378
368 icmph = (struct batadv_icmp_header *)skb->data; 379 icmph = (struct batadv_icmp_header *)skb->data;
369 380
@@ -372,17 +383,17 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
372 icmph->msg_type == BATADV_ECHO_REQUEST) && 383 icmph->msg_type == BATADV_ECHO_REQUEST) &&
373 (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { 384 (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
374 if (skb_linearize(skb) < 0) 385 if (skb_linearize(skb) < 0)
375 goto out; 386 goto free_skb;
376 387
377 /* create a copy of the skb, if needed, to modify it. */ 388 /* create a copy of the skb, if needed, to modify it. */
378 if (skb_cow(skb, ETH_HLEN) < 0) 389 if (skb_cow(skb, ETH_HLEN) < 0)
379 goto out; 390 goto free_skb;
380 391
381 ethhdr = eth_hdr(skb); 392 ethhdr = eth_hdr(skb);
382 icmph = (struct batadv_icmp_header *)skb->data; 393 icmph = (struct batadv_icmp_header *)skb->data;
383 icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph; 394 icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
384 if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) 395 if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
385 goto out; 396 goto free_skb;
386 397
387 ether_addr_copy(icmp_packet_rr->rr[icmp_packet_rr->rr_cur], 398 ether_addr_copy(icmp_packet_rr->rr[icmp_packet_rr->rr_cur],
388 ethhdr->h_dest); 399 ethhdr->h_dest);
@@ -400,11 +411,11 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
400 /* get routing information */ 411 /* get routing information */
401 orig_node = batadv_orig_hash_find(bat_priv, icmph->dst); 412 orig_node = batadv_orig_hash_find(bat_priv, icmph->dst);
402 if (!orig_node) 413 if (!orig_node)
403 goto out; 414 goto free_skb;
404 415
405 /* create a copy of the skb, if needed, to modify it. */ 416 /* create a copy of the skb, if needed, to modify it. */
406 if (skb_cow(skb, ETH_HLEN) < 0) 417 if (skb_cow(skb, ETH_HLEN) < 0)
407 goto out; 418 goto put_orig_node;
408 419
409 icmph = (struct batadv_icmp_header *)skb->data; 420 icmph = (struct batadv_icmp_header *)skb->data;
410 421
@@ -413,12 +424,18 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
413 424
414 /* route it */ 425 /* route it */
415 res = batadv_send_skb_to_orig(skb, orig_node, recv_if); 426 res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
416 if (res != -1) 427 if (res == NET_XMIT_SUCCESS)
417 ret = NET_RX_SUCCESS; 428 ret = NET_RX_SUCCESS;
418 429
419out: 430 /* skb was consumed */
431 skb = NULL;
432
433put_orig_node:
420 if (orig_node) 434 if (orig_node)
421 batadv_orig_node_put(orig_node); 435 batadv_orig_node_put(orig_node);
436free_skb:
437 kfree_skb(skb);
438
422 return ret; 439 return ret;
423} 440}
424 441
@@ -445,12 +462,12 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
445 462
446 ethhdr = eth_hdr(skb); 463 ethhdr = eth_hdr(skb);
447 464
448 /* packet with unicast indication but broadcast recipient */ 465 /* packet with unicast indication but non-unicast recipient */
449 if (is_broadcast_ether_addr(ethhdr->h_dest)) 466 if (!is_valid_ether_addr(ethhdr->h_dest))
450 return -EBADR; 467 return -EBADR;
451 468
452 /* packet with broadcast sender address */ 469 /* packet with broadcast/multicast sender address */
453 if (is_broadcast_ether_addr(ethhdr->h_source)) 470 if (is_multicast_ether_addr(ethhdr->h_source))
454 return -EBADR; 471 return -EBADR;
455 472
456 /* not for me */ 473 /* not for me */
@@ -667,18 +684,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
667 if (unicast_packet->ttl < 2) { 684 if (unicast_packet->ttl < 2) {
668 pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n", 685 pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n",
669 ethhdr->h_source, unicast_packet->dest); 686 ethhdr->h_source, unicast_packet->dest);
670 goto out; 687 goto free_skb;
671 } 688 }
672 689
673 /* get routing information */ 690 /* get routing information */
674 orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); 691 orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest);
675 692
676 if (!orig_node) 693 if (!orig_node)
677 goto out; 694 goto free_skb;
678 695
679 /* create a copy of the skb, if needed, to modify it. */ 696 /* create a copy of the skb, if needed, to modify it. */
680 if (skb_cow(skb, ETH_HLEN) < 0) 697 if (skb_cow(skb, ETH_HLEN) < 0)
681 goto out; 698 goto put_orig_node;
682 699
683 /* decrement ttl */ 700 /* decrement ttl */
684 unicast_packet = (struct batadv_unicast_packet *)skb->data; 701 unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -702,8 +719,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
702 719
703 len = skb->len; 720 len = skb->len;
704 res = batadv_send_skb_to_orig(skb, orig_node, recv_if); 721 res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
705 if (res == -1) 722 if (res == NET_XMIT_SUCCESS)
706 goto out; 723 ret = NET_RX_SUCCESS;
724
725 /* skb was consumed */
726 skb = NULL;
707 727
708 /* translate transmit result into receive result */ 728 /* translate transmit result into receive result */
709 if (res == NET_XMIT_SUCCESS) { 729 if (res == NET_XMIT_SUCCESS) {
@@ -713,11 +733,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
713 len + ETH_HLEN); 733 len + ETH_HLEN);
714 } 734 }
715 735
716 ret = NET_RX_SUCCESS; 736put_orig_node:
737 batadv_orig_node_put(orig_node);
738free_skb:
739 kfree_skb(skb);
717 740
718out:
719 if (orig_node)
720 batadv_orig_node_put(orig_node);
721 return ret; 741 return ret;
722} 742}
723 743
@@ -902,14 +922,18 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
902 922
903 check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); 923 check = batadv_check_unicast_packet(bat_priv, skb, hdr_size);
904 if (check < 0) 924 if (check < 0)
905 return NET_RX_DROP; 925 goto free_skb;
906 926
907 /* we don't know about this type, drop it. */ 927 /* we don't know about this type, drop it. */
908 unicast_packet = (struct batadv_unicast_packet *)skb->data; 928 unicast_packet = (struct batadv_unicast_packet *)skb->data;
909 if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) 929 if (batadv_is_my_mac(bat_priv, unicast_packet->dest))
910 return NET_RX_DROP; 930 goto free_skb;
911 931
912 return batadv_route_unicast_packet(skb, recv_if); 932 return batadv_route_unicast_packet(skb, recv_if);
933
934free_skb:
935 kfree_skb(skb);
936 return NET_RX_DROP;
913} 937}
914 938
915int batadv_recv_unicast_packet(struct sk_buff *skb, 939int batadv_recv_unicast_packet(struct sk_buff *skb,
@@ -923,6 +947,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
923 int check, hdr_size = sizeof(*unicast_packet); 947 int check, hdr_size = sizeof(*unicast_packet);
924 enum batadv_subtype subtype; 948 enum batadv_subtype subtype;
925 bool is4addr; 949 bool is4addr;
950 int ret = NET_RX_DROP;
926 951
927 unicast_packet = (struct batadv_unicast_packet *)skb->data; 952 unicast_packet = (struct batadv_unicast_packet *)skb->data;
928 unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; 953 unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
@@ -942,9 +967,9 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
942 batadv_nc_skb_store_sniffed_unicast(bat_priv, skb); 967 batadv_nc_skb_store_sniffed_unicast(bat_priv, skb);
943 968
944 if (check < 0) 969 if (check < 0)
945 return NET_RX_DROP; 970 goto free_skb;
946 if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) 971 if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
947 return NET_RX_DROP; 972 goto free_skb;
948 973
949 /* packet for me */ 974 /* packet for me */
950 if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { 975 if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
@@ -982,7 +1007,14 @@ rx_success:
982 return NET_RX_SUCCESS; 1007 return NET_RX_SUCCESS;
983 } 1008 }
984 1009
985 return batadv_route_unicast_packet(skb, recv_if); 1010 ret = batadv_route_unicast_packet(skb, recv_if);
1011 /* skb was consumed */
1012 skb = NULL;
1013
1014free_skb:
1015 kfree_skb(skb);
1016
1017 return ret;
986} 1018}
987 1019
988/** 1020/**
@@ -1004,15 +1036,15 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
1004 int ret = NET_RX_DROP; 1036 int ret = NET_RX_DROP;
1005 1037
1006 if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) 1038 if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
1007 return NET_RX_DROP; 1039 goto free_skb;
1008 1040
1009 /* the header is likely to be modified while forwarding */ 1041 /* the header is likely to be modified while forwarding */
1010 if (skb_cow(skb, hdr_size) < 0) 1042 if (skb_cow(skb, hdr_size) < 0)
1011 return NET_RX_DROP; 1043 goto free_skb;
1012 1044
1013 /* packet needs to be linearized to access the tvlv content */ 1045 /* packet needs to be linearized to access the tvlv content */
1014 if (skb_linearize(skb) < 0) 1046 if (skb_linearize(skb) < 0)
1015 return NET_RX_DROP; 1047 goto free_skb;
1016 1048
1017 unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)skb->data; 1049 unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)skb->data;
1018 1050
@@ -1020,17 +1052,21 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
1020 tvlv_buff_len = ntohs(unicast_tvlv_packet->tvlv_len); 1052 tvlv_buff_len = ntohs(unicast_tvlv_packet->tvlv_len);
1021 1053
1022 if (tvlv_buff_len > skb->len - hdr_size) 1054 if (tvlv_buff_len > skb->len - hdr_size)
1023 return NET_RX_DROP; 1055 goto free_skb;
1024 1056
1025 ret = batadv_tvlv_containers_process(bat_priv, false, NULL, 1057 ret = batadv_tvlv_containers_process(bat_priv, false, NULL,
1026 unicast_tvlv_packet->src, 1058 unicast_tvlv_packet->src,
1027 unicast_tvlv_packet->dst, 1059 unicast_tvlv_packet->dst,
1028 tvlv_buff, tvlv_buff_len); 1060 tvlv_buff, tvlv_buff_len);
1029 1061
1030 if (ret != NET_RX_SUCCESS) 1062 if (ret != NET_RX_SUCCESS) {
1031 ret = batadv_route_unicast_packet(skb, recv_if); 1063 ret = batadv_route_unicast_packet(skb, recv_if);
1032 else 1064 /* skb was consumed */
1033 consume_skb(skb); 1065 skb = NULL;
1066 }
1067
1068free_skb:
1069 kfree_skb(skb);
1034 1070
1035 return ret; 1071 return ret;
1036} 1072}
@@ -1056,20 +1092,22 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
1056 1092
1057 if (batadv_check_unicast_packet(bat_priv, skb, 1093 if (batadv_check_unicast_packet(bat_priv, skb,
1058 sizeof(*frag_packet)) < 0) 1094 sizeof(*frag_packet)) < 0)
1059 goto out; 1095 goto free_skb;
1060 1096
1061 frag_packet = (struct batadv_frag_packet *)skb->data; 1097 frag_packet = (struct batadv_frag_packet *)skb->data;
1062 orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig); 1098 orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig);
1063 if (!orig_node_src) 1099 if (!orig_node_src)
1064 goto out; 1100 goto free_skb;
1065 1101
1066 skb->priority = frag_packet->priority + 256; 1102 skb->priority = frag_packet->priority + 256;
1067 1103
1068 /* Route the fragment if it is not for us and too big to be merged. */ 1104 /* Route the fragment if it is not for us and too big to be merged. */
1069 if (!batadv_is_my_mac(bat_priv, frag_packet->dest) && 1105 if (!batadv_is_my_mac(bat_priv, frag_packet->dest) &&
1070 batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) { 1106 batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) {
1107 /* skb was consumed */
1108 skb = NULL;
1071 ret = NET_RX_SUCCESS; 1109 ret = NET_RX_SUCCESS;
1072 goto out; 1110 goto put_orig_node;
1073 } 1111 }
1074 1112
1075 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX); 1113 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX);
@@ -1077,20 +1115,24 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
1077 1115
1078 /* Add fragment to buffer and merge if possible. */ 1116 /* Add fragment to buffer and merge if possible. */
1079 if (!batadv_frag_skb_buffer(&skb, orig_node_src)) 1117 if (!batadv_frag_skb_buffer(&skb, orig_node_src))
1080 goto out; 1118 goto put_orig_node;
1081 1119
1082 /* Deliver merged packet to the appropriate handler, if it was 1120 /* Deliver merged packet to the appropriate handler, if it was
1083 * merged 1121 * merged
1084 */ 1122 */
1085 if (skb) 1123 if (skb) {
1086 batadv_batman_skb_recv(skb, recv_if->net_dev, 1124 batadv_batman_skb_recv(skb, recv_if->net_dev,
1087 &recv_if->batman_adv_ptype, NULL); 1125 &recv_if->batman_adv_ptype, NULL);
1126 /* skb was consumed */
1127 skb = NULL;
1128 }
1088 1129
1089 ret = NET_RX_SUCCESS; 1130 ret = NET_RX_SUCCESS;
1090 1131
1091out: 1132put_orig_node:
1092 if (orig_node_src) 1133 batadv_orig_node_put(orig_node_src);
1093 batadv_orig_node_put(orig_node_src); 1134free_skb:
1135 kfree_skb(skb);
1094 1136
1095 return ret; 1137 return ret;
1096} 1138}
@@ -1109,35 +1151,35 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1109 1151
1110 /* drop packet if it has not necessary minimum size */ 1152 /* drop packet if it has not necessary minimum size */
1111 if (unlikely(!pskb_may_pull(skb, hdr_size))) 1153 if (unlikely(!pskb_may_pull(skb, hdr_size)))
1112 goto out; 1154 goto free_skb;
1113 1155
1114 ethhdr = eth_hdr(skb); 1156 ethhdr = eth_hdr(skb);
1115 1157
1116 /* packet with broadcast indication but unicast recipient */ 1158 /* packet with broadcast indication but unicast recipient */
1117 if (!is_broadcast_ether_addr(ethhdr->h_dest)) 1159 if (!is_broadcast_ether_addr(ethhdr->h_dest))
1118 goto out; 1160 goto free_skb;
1119 1161
1120 /* packet with broadcast sender address */ 1162 /* packet with broadcast/multicast sender address */
1121 if (is_broadcast_ether_addr(ethhdr->h_source)) 1163 if (is_multicast_ether_addr(ethhdr->h_source))
1122 goto out; 1164 goto free_skb;
1123 1165
1124 /* ignore broadcasts sent by myself */ 1166 /* ignore broadcasts sent by myself */
1125 if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) 1167 if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
1126 goto out; 1168 goto free_skb;
1127 1169
1128 bcast_packet = (struct batadv_bcast_packet *)skb->data; 1170 bcast_packet = (struct batadv_bcast_packet *)skb->data;
1129 1171
1130 /* ignore broadcasts originated by myself */ 1172 /* ignore broadcasts originated by myself */
1131 if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) 1173 if (batadv_is_my_mac(bat_priv, bcast_packet->orig))
1132 goto out; 1174 goto free_skb;
1133 1175
1134 if (bcast_packet->ttl < 2) 1176 if (bcast_packet->ttl < 2)
1135 goto out; 1177 goto free_skb;
1136 1178
1137 orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig); 1179 orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig);
1138 1180
1139 if (!orig_node) 1181 if (!orig_node)
1140 goto out; 1182 goto free_skb;
1141 1183
1142 spin_lock_bh(&orig_node->bcast_seqno_lock); 1184 spin_lock_bh(&orig_node->bcast_seqno_lock);
1143 1185
@@ -1165,18 +1207,18 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1165 1207
1166 /* check whether this has been sent by another originator before */ 1208 /* check whether this has been sent by another originator before */
1167 if (batadv_bla_check_bcast_duplist(bat_priv, skb)) 1209 if (batadv_bla_check_bcast_duplist(bat_priv, skb))
1168 goto out; 1210 goto free_skb;
1169 1211
1170 batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet)); 1212 batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
1171 1213
1172 /* rebroadcast packet */ 1214 /* rebroadcast packet */
1173 batadv_add_bcast_packet_to_list(bat_priv, skb, 1); 1215 batadv_add_bcast_packet_to_list(bat_priv, skb, 1, false);
1174 1216
1175 /* don't hand the broadcast up if it is from an originator 1217 /* don't hand the broadcast up if it is from an originator
1176 * from the same backbone. 1218 * from the same backbone.
1177 */ 1219 */
1178 if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size)) 1220 if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size))
1179 goto out; 1221 goto free_skb;
1180 1222
1181 if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size)) 1223 if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size))
1182 goto rx_success; 1224 goto rx_success;
@@ -1192,6 +1234,8 @@ rx_success:
1192 1234
1193spin_unlock: 1235spin_unlock:
1194 spin_unlock_bh(&orig_node->bcast_seqno_lock); 1236 spin_unlock_bh(&orig_node->bcast_seqno_lock);
1237free_skb:
1238 kfree_skb(skb);
1195out: 1239out:
1196 if (orig_node) 1240 if (orig_node)
1197 batadv_orig_node_put(orig_node); 1241 batadv_orig_node_put(orig_node);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 8d4e1f578574..49021b7124f3 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -19,6 +19,7 @@
19#include "main.h" 19#include "main.h"
20 20
21#include <linux/atomic.h> 21#include <linux/atomic.h>
22#include <linux/bug.h>
22#include <linux/byteorder/generic.h> 23#include <linux/byteorder/generic.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
24#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
@@ -64,8 +65,11 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
64 * If neigh_node is NULL, then the packet is broadcasted using hard_iface, 65 * If neigh_node is NULL, then the packet is broadcasted using hard_iface,
65 * otherwise it is sent as unicast to the given neighbor. 66 * otherwise it is sent as unicast to the given neighbor.
66 * 67 *
67 * Return: NET_TX_DROP in case of error or the result of dev_queue_xmit(skb) 68 * Regardless of the return value, the skb is consumed.
68 * otherwise 69 *
70 * Return: A negative errno code is returned on a failure. A success does not
71 * guarantee the frame will be transmitted as it may be dropped due
72 * to congestion or traffic shaping.
69 */ 73 */
70int batadv_send_skb_packet(struct sk_buff *skb, 74int batadv_send_skb_packet(struct sk_buff *skb,
71 struct batadv_hard_iface *hard_iface, 75 struct batadv_hard_iface *hard_iface,
@@ -73,7 +77,6 @@ int batadv_send_skb_packet(struct sk_buff *skb,
73{ 77{
74 struct batadv_priv *bat_priv; 78 struct batadv_priv *bat_priv;
75 struct ethhdr *ethhdr; 79 struct ethhdr *ethhdr;
76 int ret;
77 80
78 bat_priv = netdev_priv(hard_iface->soft_iface); 81 bat_priv = netdev_priv(hard_iface->soft_iface);
79 82
@@ -111,15 +114,8 @@ int batadv_send_skb_packet(struct sk_buff *skb,
111 /* dev_queue_xmit() returns a negative result on error. However on 114 /* dev_queue_xmit() returns a negative result on error. However on
112 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP 115 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
113 * (which is > 0). This will not be treated as an error. 116 * (which is > 0). This will not be treated as an error.
114 *
115 * a negative value cannot be returned because it could be interepreted
116 * as not consumed skb by callers of batadv_send_skb_to_orig.
117 */ 117 */
118 ret = dev_queue_xmit(skb); 118 return dev_queue_xmit(skb);
119 if (ret < 0)
120 ret = NET_XMIT_DROP;
121
122 return ret;
123send_skb_err: 119send_skb_err:
124 kfree_skb(skb); 120 kfree_skb(skb);
125 return NET_XMIT_DROP; 121 return NET_XMIT_DROP;
@@ -165,11 +161,9 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
165 * host, NULL can be passed as recv_if and no interface alternating is 161 * host, NULL can be passed as recv_if and no interface alternating is
166 * attempted. 162 * attempted.
167 * 163 *
168 * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the 164 * Return: negative errno code on a failure, -EINPROGRESS if the skb is
169 * skb is buffered for later transmit or the NET_XMIT status returned by the 165 * buffered for later transmit or the NET_XMIT status returned by the
170 * lower routine if the packet has been passed down. 166 * lower routine if the packet has been passed down.
171 *
172 * If the returning value is not -1 the skb has been consumed.
173 */ 167 */
174int batadv_send_skb_to_orig(struct sk_buff *skb, 168int batadv_send_skb_to_orig(struct sk_buff *skb,
175 struct batadv_orig_node *orig_node, 169 struct batadv_orig_node *orig_node,
@@ -177,12 +171,14 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
177{ 171{
178 struct batadv_priv *bat_priv = orig_node->bat_priv; 172 struct batadv_priv *bat_priv = orig_node->bat_priv;
179 struct batadv_neigh_node *neigh_node; 173 struct batadv_neigh_node *neigh_node;
180 int ret = -1; 174 int ret;
181 175
182 /* batadv_find_router() increases neigh_nodes refcount if found. */ 176 /* batadv_find_router() increases neigh_nodes refcount if found. */
183 neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); 177 neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
184 if (!neigh_node) 178 if (!neigh_node) {
185 goto out; 179 ret = -EINVAL;
180 goto free_skb;
181 }
186 182
187 /* Check if the skb is too large to send in one piece and fragment 183 /* Check if the skb is too large to send in one piece and fragment
188 * it if needed. 184 * it if needed.
@@ -191,8 +187,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
191 skb->len > neigh_node->if_incoming->net_dev->mtu) { 187 skb->len > neigh_node->if_incoming->net_dev->mtu) {
192 /* Fragment and send packet. */ 188 /* Fragment and send packet. */
193 ret = batadv_frag_send_packet(skb, orig_node, neigh_node); 189 ret = batadv_frag_send_packet(skb, orig_node, neigh_node);
190 /* skb was consumed */
191 skb = NULL;
194 192
195 goto out; 193 goto put_neigh_node;
196 } 194 }
197 195
198 /* try to network code the packet, if it is received on an interface 196 /* try to network code the packet, if it is received on an interface
@@ -204,9 +202,13 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
204 else 202 else
205 ret = batadv_send_unicast_skb(skb, neigh_node); 203 ret = batadv_send_unicast_skb(skb, neigh_node);
206 204
207out: 205 /* skb was consumed */
208 if (neigh_node) 206 skb = NULL;
209 batadv_neigh_node_put(neigh_node); 207
208put_neigh_node:
209 batadv_neigh_node_put(neigh_node);
210free_skb:
211 kfree_skb(skb);
210 212
211 return ret; 213 return ret;
212} 214}
@@ -327,7 +329,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
327{ 329{
328 struct batadv_unicast_packet *unicast_packet; 330 struct batadv_unicast_packet *unicast_packet;
329 struct ethhdr *ethhdr; 331 struct ethhdr *ethhdr;
330 int res, ret = NET_XMIT_DROP; 332 int ret = NET_XMIT_DROP;
331 333
332 if (!orig_node) 334 if (!orig_node)
333 goto out; 335 goto out;
@@ -364,13 +366,12 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
364 if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) 366 if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid))
365 unicast_packet->ttvn = unicast_packet->ttvn - 1; 367 unicast_packet->ttvn = unicast_packet->ttvn - 1;
366 368
367 res = batadv_send_skb_to_orig(skb, orig_node, NULL); 369 ret = batadv_send_skb_to_orig(skb, orig_node, NULL);
368 if (res != -1) 370 /* skb was consumed */
369 ret = NET_XMIT_SUCCESS; 371 skb = NULL;
370 372
371out: 373out:
372 if (ret == NET_XMIT_DROP) 374 kfree_skb(skb);
373 kfree_skb(skb);
374 return ret; 375 return ret;
375} 376}
376 377
@@ -451,13 +452,19 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
451/** 452/**
452 * batadv_forw_packet_free - free a forwarding packet 453 * batadv_forw_packet_free - free a forwarding packet
453 * @forw_packet: The packet to free 454 * @forw_packet: The packet to free
455 * @dropped: whether the packet is freed because is is dropped
454 * 456 *
455 * This frees a forwarding packet and releases any resources it might 457 * This frees a forwarding packet and releases any resources it might
456 * have claimed. 458 * have claimed.
457 */ 459 */
458void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) 460void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
461 bool dropped)
459{ 462{
460 kfree_skb(forw_packet->skb); 463 if (dropped)
464 kfree_skb(forw_packet->skb);
465 else
466 consume_skb(forw_packet->skb);
467
461 if (forw_packet->if_incoming) 468 if (forw_packet->if_incoming)
462 batadv_hardif_put(forw_packet->if_incoming); 469 batadv_hardif_put(forw_packet->if_incoming);
463 if (forw_packet->if_outgoing) 470 if (forw_packet->if_outgoing)
@@ -514,6 +521,8 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
514 if (if_outgoing) 521 if (if_outgoing)
515 kref_get(&if_outgoing->refcount); 522 kref_get(&if_outgoing->refcount);
516 523
524 INIT_HLIST_NODE(&forw_packet->list);
525 INIT_HLIST_NODE(&forw_packet->cleanup_list);
517 forw_packet->skb = NULL; 526 forw_packet->skb = NULL;
518 forw_packet->queue_left = queue_left; 527 forw_packet->queue_left = queue_left;
519 forw_packet->if_incoming = if_incoming; 528 forw_packet->if_incoming = if_incoming;
@@ -529,19 +538,191 @@ err:
529 return NULL; 538 return NULL;
530} 539}
531 540
541/**
542 * batadv_forw_packet_was_stolen - check whether someone stole this packet
543 * @forw_packet: the forwarding packet to check
544 *
545 * This function checks whether the given forwarding packet was claimed by
546 * someone else for free().
547 *
548 * Return: True if someone stole it, false otherwise.
549 */
550static bool
551batadv_forw_packet_was_stolen(struct batadv_forw_packet *forw_packet)
552{
553 return !hlist_unhashed(&forw_packet->cleanup_list);
554}
555
556/**
557 * batadv_forw_packet_steal - claim a forw_packet for free()
558 * @forw_packet: the forwarding packet to steal
559 * @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock)
560 *
561 * This function tries to steal a specific forw_packet from global
562 * visibility for the purpose of getting it for free(). That means
563 * the caller is *not* allowed to requeue it afterwards.
564 *
565 * Return: True if stealing was successful. False if someone else stole it
566 * before us.
567 */
568bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet,
569 spinlock_t *lock)
570{
571 /* did purging routine steal it earlier? */
572 spin_lock_bh(lock);
573 if (batadv_forw_packet_was_stolen(forw_packet)) {
574 spin_unlock_bh(lock);
575 return false;
576 }
577
578 hlist_del_init(&forw_packet->list);
579
580 /* Just to spot misuse of this function */
581 hlist_add_fake(&forw_packet->cleanup_list);
582
583 spin_unlock_bh(lock);
584 return true;
585}
586
587/**
588 * batadv_forw_packet_list_steal - claim a list of forward packets for free()
589 * @forw_list: the to be stolen forward packets
590 * @cleanup_list: a backup pointer, to be able to dispose the packet later
591 * @hard_iface: the interface to steal forward packets from
592 *
593 * This function claims responsibility to free any forw_packet queued on the
594 * given hard_iface. If hard_iface is NULL forwarding packets on all hard
595 * interfaces will be claimed.
596 *
597 * The packets are being moved from the forw_list to the cleanup_list and
598 * by that allows already running threads to notice the claiming.
599 */
532static void 600static void
533_batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, 601batadv_forw_packet_list_steal(struct hlist_head *forw_list,
534 struct batadv_forw_packet *forw_packet, 602 struct hlist_head *cleanup_list,
535 unsigned long send_time) 603 const struct batadv_hard_iface *hard_iface)
536{ 604{
537 /* add new packet to packet list */ 605 struct batadv_forw_packet *forw_packet;
538 spin_lock_bh(&bat_priv->forw_bcast_list_lock); 606 struct hlist_node *safe_tmp_node;
539 hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list); 607
540 spin_unlock_bh(&bat_priv->forw_bcast_list_lock); 608 hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
609 forw_list, list) {
610 /* if purge_outstanding_packets() was called with an argument
611 * we delete only packets belonging to the given interface
612 */
613 if (hard_iface &&
614 (forw_packet->if_incoming != hard_iface) &&
615 (forw_packet->if_outgoing != hard_iface))
616 continue;
617
618 hlist_del(&forw_packet->list);
619 hlist_add_head(&forw_packet->cleanup_list, cleanup_list);
620 }
621}
622
623/**
624 * batadv_forw_packet_list_free - free a list of forward packets
625 * @head: a list of to be freed forw_packets
626 *
627 * This function cancels the scheduling of any packet in the provided list,
628 * waits for any possibly running packet forwarding thread to finish and
629 * finally, safely frees this forward packet.
630 *
631 * This function might sleep.
632 */
633static void batadv_forw_packet_list_free(struct hlist_head *head)
634{
635 struct batadv_forw_packet *forw_packet;
636 struct hlist_node *safe_tmp_node;
637
638 hlist_for_each_entry_safe(forw_packet, safe_tmp_node, head,
639 cleanup_list) {
640 cancel_delayed_work_sync(&forw_packet->delayed_work);
641
642 hlist_del(&forw_packet->cleanup_list);
643 batadv_forw_packet_free(forw_packet, true);
644 }
645}
646
647/**
648 * batadv_forw_packet_queue - try to queue a forwarding packet
649 * @forw_packet: the forwarding packet to queue
650 * @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock)
651 * @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list)
652 * @send_time: timestamp (jiffies) when the packet is to be sent
653 *
654 * This function tries to (re)queue a forwarding packet. Requeuing
655 * is prevented if the according interface is shutting down
656 * (e.g. if batadv_forw_packet_list_steal() was called for this
657 * packet earlier).
658 *
659 * Calling batadv_forw_packet_queue() after a call to
660 * batadv_forw_packet_steal() is forbidden!
661 *
662 * Caller needs to ensure that forw_packet->delayed_work was initialized.
663 */
664static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet,
665 spinlock_t *lock, struct hlist_head *head,
666 unsigned long send_time)
667{
668 spin_lock_bh(lock);
669
670 /* did purging routine steal it from us? */
671 if (batadv_forw_packet_was_stolen(forw_packet)) {
672 /* If you got it for free() without trouble, then
673 * don't get back into the queue after stealing...
674 */
675 WARN_ONCE(hlist_fake(&forw_packet->cleanup_list),
676 "Requeuing after batadv_forw_packet_steal() not allowed!\n");
541 677
542 /* start timer for this packet */ 678 spin_unlock_bh(lock);
543 queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work, 679 return;
544 send_time); 680 }
681
682 hlist_del_init(&forw_packet->list);
683 hlist_add_head(&forw_packet->list, head);
684
685 queue_delayed_work(batadv_event_workqueue,
686 &forw_packet->delayed_work,
687 send_time - jiffies);
688 spin_unlock_bh(lock);
689}
690
691/**
692 * batadv_forw_packet_bcast_queue - try to queue a broadcast packet
693 * @bat_priv: the bat priv with all the soft interface information
694 * @forw_packet: the forwarding packet to queue
695 * @send_time: timestamp (jiffies) when the packet is to be sent
696 *
697 * This function tries to (re)queue a broadcast packet.
698 *
699 * Caller needs to ensure that forw_packet->delayed_work was initialized.
700 */
701static void
702batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv,
703 struct batadv_forw_packet *forw_packet,
704 unsigned long send_time)
705{
706 batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bcast_list_lock,
707 &bat_priv->forw_bcast_list, send_time);
708}
709
710/**
711 * batadv_forw_packet_ogmv1_queue - try to queue an OGMv1 packet
712 * @bat_priv: the bat priv with all the soft interface information
713 * @forw_packet: the forwarding packet to queue
714 * @send_time: timestamp (jiffies) when the packet is to be sent
715 *
716 * This function tries to (re)queue an OGMv1 packet.
717 *
718 * Caller needs to ensure that forw_packet->delayed_work was initialized.
719 */
720void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
721 struct batadv_forw_packet *forw_packet,
722 unsigned long send_time)
723{
724 batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bat_list_lock,
725 &bat_priv->forw_bat_list, send_time);
545} 726}
546 727
547/** 728/**
@@ -549,6 +730,7 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
549 * @bat_priv: the bat priv with all the soft interface information 730 * @bat_priv: the bat priv with all the soft interface information
550 * @skb: broadcast packet to add 731 * @skb: broadcast packet to add
551 * @delay: number of jiffies to wait before sending 732 * @delay: number of jiffies to wait before sending
733 * @own_packet: true if it is a self-generated broadcast packet
552 * 734 *
553 * add a broadcast packet to the queue and setup timers. broadcast packets 735 * add a broadcast packet to the queue and setup timers. broadcast packets
554 * are sent multiple times to increase probability for being received. 736 * are sent multiple times to increase probability for being received.
@@ -560,9 +742,10 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
560 */ 742 */
561int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, 743int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
562 const struct sk_buff *skb, 744 const struct sk_buff *skb,
563 unsigned long delay) 745 unsigned long delay,
746 bool own_packet)
564{ 747{
565 struct batadv_hard_iface *primary_if = NULL; 748 struct batadv_hard_iface *primary_if;
566 struct batadv_forw_packet *forw_packet; 749 struct batadv_forw_packet *forw_packet;
567 struct batadv_bcast_packet *bcast_packet; 750 struct batadv_bcast_packet *bcast_packet;
568 struct sk_buff *newskb; 751 struct sk_buff *newskb;
@@ -586,18 +769,17 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
586 bcast_packet = (struct batadv_bcast_packet *)newskb->data; 769 bcast_packet = (struct batadv_bcast_packet *)newskb->data;
587 bcast_packet->ttl--; 770 bcast_packet->ttl--;
588 771
589 skb_reset_mac_header(newskb);
590
591 forw_packet->skb = newskb; 772 forw_packet->skb = newskb;
773 forw_packet->own = own_packet;
592 774
593 INIT_DELAYED_WORK(&forw_packet->delayed_work, 775 INIT_DELAYED_WORK(&forw_packet->delayed_work,
594 batadv_send_outstanding_bcast_packet); 776 batadv_send_outstanding_bcast_packet);
595 777
596 _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay); 778 batadv_forw_packet_bcast_queue(bat_priv, forw_packet, jiffies + delay);
597 return NETDEV_TX_OK; 779 return NETDEV_TX_OK;
598 780
599err_packet_free: 781err_packet_free:
600 batadv_forw_packet_free(forw_packet); 782 batadv_forw_packet_free(forw_packet, true);
601err: 783err:
602 return NETDEV_TX_BUSY; 784 return NETDEV_TX_BUSY;
603} 785}
@@ -605,11 +787,18 @@ err:
605static void batadv_send_outstanding_bcast_packet(struct work_struct *work) 787static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
606{ 788{
607 struct batadv_hard_iface *hard_iface; 789 struct batadv_hard_iface *hard_iface;
790 struct batadv_hardif_neigh_node *neigh_node;
608 struct delayed_work *delayed_work; 791 struct delayed_work *delayed_work;
609 struct batadv_forw_packet *forw_packet; 792 struct batadv_forw_packet *forw_packet;
793 struct batadv_bcast_packet *bcast_packet;
610 struct sk_buff *skb1; 794 struct sk_buff *skb1;
611 struct net_device *soft_iface; 795 struct net_device *soft_iface;
612 struct batadv_priv *bat_priv; 796 struct batadv_priv *bat_priv;
797 unsigned long send_time = jiffies + msecs_to_jiffies(5);
798 bool dropped = false;
799 u8 *neigh_addr;
800 u8 *orig_neigh;
801 int ret = 0;
613 802
614 delayed_work = to_delayed_work(work); 803 delayed_work = to_delayed_work(work);
615 forw_packet = container_of(delayed_work, struct batadv_forw_packet, 804 forw_packet = container_of(delayed_work, struct batadv_forw_packet,
@@ -617,15 +806,17 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
617 soft_iface = forw_packet->if_incoming->soft_iface; 806 soft_iface = forw_packet->if_incoming->soft_iface;
618 bat_priv = netdev_priv(soft_iface); 807 bat_priv = netdev_priv(soft_iface);
619 808
620 spin_lock_bh(&bat_priv->forw_bcast_list_lock); 809 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) {
621 hlist_del(&forw_packet->list); 810 dropped = true;
622 spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
623
624 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
625 goto out; 811 goto out;
812 }
626 813
627 if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) 814 if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) {
815 dropped = true;
628 goto out; 816 goto out;
817 }
818
819 bcast_packet = (struct batadv_bcast_packet *)forw_packet->skb->data;
629 820
630 /* rebroadcast packet */ 821 /* rebroadcast packet */
631 rcu_read_lock(); 822 rcu_read_lock();
@@ -636,6 +827,49 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
636 if (forw_packet->num_packets >= hard_iface->num_bcasts) 827 if (forw_packet->num_packets >= hard_iface->num_bcasts)
637 continue; 828 continue;
638 829
830 if (forw_packet->own) {
831 neigh_node = NULL;
832 } else {
833 neigh_addr = eth_hdr(forw_packet->skb)->h_source;
834 neigh_node = batadv_hardif_neigh_get(hard_iface,
835 neigh_addr);
836 }
837
838 orig_neigh = neigh_node ? neigh_node->orig : NULL;
839
840 ret = batadv_hardif_no_broadcast(hard_iface, bcast_packet->orig,
841 orig_neigh);
842
843 if (ret) {
844 char *type;
845
846 switch (ret) {
847 case BATADV_HARDIF_BCAST_NORECIPIENT:
848 type = "no neighbor";
849 break;
850 case BATADV_HARDIF_BCAST_DUPFWD:
851 type = "single neighbor is source";
852 break;
853 case BATADV_HARDIF_BCAST_DUPORIG:
854 type = "single neighbor is originator";
855 break;
856 default:
857 type = "unknown";
858 }
859
860 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n",
861 bcast_packet->orig,
862 hard_iface->net_dev->name, type);
863
864 if (neigh_node)
865 batadv_hardif_neigh_put(neigh_node);
866
867 continue;
868 }
869
870 if (neigh_node)
871 batadv_hardif_neigh_put(neigh_node);
872
639 if (!kref_get_unless_zero(&hard_iface->refcount)) 873 if (!kref_get_unless_zero(&hard_iface->refcount))
640 continue; 874 continue;
641 875
@@ -652,22 +886,34 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
652 886
653 /* if we still have some more bcasts to send */ 887 /* if we still have some more bcasts to send */
654 if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { 888 if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) {
655 _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, 889 batadv_forw_packet_bcast_queue(bat_priv, forw_packet,
656 msecs_to_jiffies(5)); 890 send_time);
657 return; 891 return;
658 } 892 }
659 893
660out: 894out:
661 batadv_forw_packet_free(forw_packet); 895 /* do we get something for free()? */
896 if (batadv_forw_packet_steal(forw_packet,
897 &bat_priv->forw_bcast_list_lock))
898 batadv_forw_packet_free(forw_packet, dropped);
662} 899}
663 900
901/**
902 * batadv_purge_outstanding_packets - stop/purge scheduled bcast/OGMv1 packets
903 * @bat_priv: the bat priv with all the soft interface information
904 * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on
905 *
906 * This method cancels and purges any broadcast and OGMv1 packet on the given
907 * hard_iface. If hard_iface is NULL, broadcast and OGMv1 packets on all hard
908 * interfaces will be canceled and purged.
909 *
910 * This function might sleep.
911 */
664void 912void
665batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, 913batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
666 const struct batadv_hard_iface *hard_iface) 914 const struct batadv_hard_iface *hard_iface)
667{ 915{
668 struct batadv_forw_packet *forw_packet; 916 struct hlist_head head = HLIST_HEAD_INIT;
669 struct hlist_node *safe_tmp_node;
670 bool pending;
671 917
672 if (hard_iface) 918 if (hard_iface)
673 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 919 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -677,57 +923,18 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
677 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 923 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
678 "purge_outstanding_packets()\n"); 924 "purge_outstanding_packets()\n");
679 925
680 /* free bcast list */ 926 /* claim bcast list for free() */
681 spin_lock_bh(&bat_priv->forw_bcast_list_lock); 927 spin_lock_bh(&bat_priv->forw_bcast_list_lock);
682 hlist_for_each_entry_safe(forw_packet, safe_tmp_node, 928 batadv_forw_packet_list_steal(&bat_priv->forw_bcast_list, &head,
683 &bat_priv->forw_bcast_list, list) { 929 hard_iface);
684 /* if purge_outstanding_packets() was called with an argument
685 * we delete only packets belonging to the given interface
686 */
687 if ((hard_iface) &&
688 (forw_packet->if_incoming != hard_iface) &&
689 (forw_packet->if_outgoing != hard_iface))
690 continue;
691
692 spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
693
694 /* batadv_send_outstanding_bcast_packet() will lock the list to
695 * delete the item from the list
696 */
697 pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
698 spin_lock_bh(&bat_priv->forw_bcast_list_lock);
699
700 if (pending) {
701 hlist_del(&forw_packet->list);
702 batadv_forw_packet_free(forw_packet);
703 }
704 }
705 spin_unlock_bh(&bat_priv->forw_bcast_list_lock); 930 spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
706 931
707 /* free batman packet list */ 932 /* claim batman packet list for free() */
708 spin_lock_bh(&bat_priv->forw_bat_list_lock); 933 spin_lock_bh(&bat_priv->forw_bat_list_lock);
709 hlist_for_each_entry_safe(forw_packet, safe_tmp_node, 934 batadv_forw_packet_list_steal(&bat_priv->forw_bat_list, &head,
710 &bat_priv->forw_bat_list, list) { 935 hard_iface);
711 /* if purge_outstanding_packets() was called with an argument
712 * we delete only packets belonging to the given interface
713 */
714 if ((hard_iface) &&
715 (forw_packet->if_incoming != hard_iface) &&
716 (forw_packet->if_outgoing != hard_iface))
717 continue;
718
719 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
720
721 /* send_outstanding_bat_packet() will lock the list to
722 * delete the item from the list
723 */
724 pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
725 spin_lock_bh(&bat_priv->forw_bat_list_lock);
726
727 if (pending) {
728 hlist_del(&forw_packet->list);
729 batadv_forw_packet_free(forw_packet);
730 }
731 }
732 spin_unlock_bh(&bat_priv->forw_bat_list_lock); 936 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
937
938 /* then cancel or wait for packet workers to finish and free */
939 batadv_forw_packet_list_free(&head);
733} 940}
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 999f78683d9e..a94e1e8639ca 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -21,18 +21,24 @@
21#include "main.h" 21#include "main.h"
22 22
23#include <linux/compiler.h> 23#include <linux/compiler.h>
24#include <linux/spinlock.h>
24#include <linux/types.h> 25#include <linux/types.h>
25 26
26#include "packet.h" 27#include "packet.h"
27 28
28struct sk_buff; 29struct sk_buff;
29 30
30void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet); 31void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
32 bool dropped);
31struct batadv_forw_packet * 33struct batadv_forw_packet *
32batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, 34batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
33 struct batadv_hard_iface *if_outgoing, 35 struct batadv_hard_iface *if_outgoing,
34 atomic_t *queue_left, 36 atomic_t *queue_left,
35 struct batadv_priv *bat_priv); 37 struct batadv_priv *bat_priv);
38bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l);
39void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
40 struct batadv_forw_packet *forw_packet,
41 unsigned long send_time);
36 42
37int batadv_send_skb_to_orig(struct sk_buff *skb, 43int batadv_send_skb_to_orig(struct sk_buff *skb,
38 struct batadv_orig_node *orig_node, 44 struct batadv_orig_node *orig_node,
@@ -46,7 +52,8 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
46 struct batadv_neigh_node *neigh_node); 52 struct batadv_neigh_node *neigh_node);
47int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, 53int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
48 const struct sk_buff *skb, 54 const struct sk_buff *skb,
49 unsigned long delay); 55 unsigned long delay,
56 bool own_packet);
50void 57void
51batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, 58batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
52 const struct batadv_hard_iface *hard_iface); 59 const struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 49e16b6e0ba3..7b3494ae6ad9 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -22,6 +22,7 @@
22#include <linux/byteorder/generic.h> 22#include <linux/byteorder/generic.h>
23#include <linux/cache.h> 23#include <linux/cache.h>
24#include <linux/compiler.h> 24#include <linux/compiler.h>
25#include <linux/cpumask.h>
25#include <linux/errno.h> 26#include <linux/errno.h>
26#include <linux/etherdevice.h> 27#include <linux/etherdevice.h>
27#include <linux/ethtool.h> 28#include <linux/ethtool.h>
@@ -116,6 +117,26 @@ static int batadv_interface_release(struct net_device *dev)
116 return 0; 117 return 0;
117} 118}
118 119
120/**
121 * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
122 * @bat_priv: the bat priv with all the soft interface information
123 * @idx: index of counter to sum up
124 *
125 * Return: sum of all cpu-local counters
126 */
127static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
128{
129 u64 *counters, sum = 0;
130 int cpu;
131
132 for_each_possible_cpu(cpu) {
133 counters = per_cpu_ptr(bat_priv->bat_counters, cpu);
134 sum += counters[idx];
135 }
136
137 return sum;
138}
139
119static struct net_device_stats *batadv_interface_stats(struct net_device *dev) 140static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
120{ 141{
121 struct batadv_priv *bat_priv = netdev_priv(dev); 142 struct batadv_priv *bat_priv = netdev_priv(dev);
@@ -336,12 +357,12 @@ send:
336 seqno = atomic_inc_return(&bat_priv->bcast_seqno); 357 seqno = atomic_inc_return(&bat_priv->bcast_seqno);
337 bcast_packet->seqno = htonl(seqno); 358 bcast_packet->seqno = htonl(seqno);
338 359
339 batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay); 360 batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay, true);
340 361
341 /* a copy is stored in the bcast list, therefore removing 362 /* a copy is stored in the bcast list, therefore removing
342 * the original skb. 363 * the original skb.
343 */ 364 */
344 kfree_skb(skb); 365 consume_skb(skb);
345 366
346 /* unicast packet */ 367 /* unicast packet */
347 } else { 368 } else {
@@ -365,7 +386,7 @@ send:
365 ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint, 386 ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint,
366 vid); 387 vid);
367 } 388 }
368 if (ret == NET_XMIT_DROP) 389 if (ret != NET_XMIT_SUCCESS)
369 goto dropped_freed; 390 goto dropped_freed;
370 } 391 }
371 392
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 02d96f224c60..17c844196eb2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -33,7 +33,6 @@
33#include <linux/rcupdate.h> 33#include <linux/rcupdate.h>
34#include <linux/rtnetlink.h> 34#include <linux/rtnetlink.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/stat.h>
37#include <linux/stddef.h> 36#include <linux/stddef.h>
38#include <linux/string.h> 37#include <linux/string.h>
39#include <linux/stringify.h> 38#include <linux/stringify.h>
@@ -666,41 +665,36 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
666 return count; 665 return count;
667} 666}
668 667
669BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); 668BATADV_ATTR_SIF_BOOL(aggregated_ogms, 0644, NULL);
670BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); 669BATADV_ATTR_SIF_BOOL(bonding, 0644, NULL);
671#ifdef CONFIG_BATMAN_ADV_BLA 670#ifdef CONFIG_BATMAN_ADV_BLA
672BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, 671BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, 0644, batadv_bla_status_update);
673 batadv_bla_status_update);
674#endif 672#endif
675#ifdef CONFIG_BATMAN_ADV_DAT 673#ifdef CONFIG_BATMAN_ADV_DAT
676BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, 674BATADV_ATTR_SIF_BOOL(distributed_arp_table, 0644, batadv_dat_status_update);
677 batadv_dat_status_update);
678#endif 675#endif
679BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu); 676BATADV_ATTR_SIF_BOOL(fragmentation, 0644, batadv_update_min_mtu);
680static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL); 677static BATADV_ATTR(routing_algo, 0444, batadv_show_bat_algo, NULL);
681static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode, 678static BATADV_ATTR(gw_mode, 0644, batadv_show_gw_mode, batadv_store_gw_mode);
682 batadv_store_gw_mode); 679BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, 0644, 2 * BATADV_JITTER,
683BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR, 680 INT_MAX, NULL);
684 2 * BATADV_JITTER, INT_MAX, NULL); 681BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, 0644, 0, BATADV_TQ_MAX_VALUE,
685BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0, 682 NULL);
686 BATADV_TQ_MAX_VALUE, NULL); 683static BATADV_ATTR(gw_sel_class, 0644, batadv_show_gw_sel_class,
687static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class,
688 batadv_store_gw_sel_class); 684 batadv_store_gw_sel_class);
689static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, 685static BATADV_ATTR(gw_bandwidth, 0644, batadv_show_gw_bwidth,
690 batadv_store_gw_bwidth); 686 batadv_store_gw_bwidth);
691#ifdef CONFIG_BATMAN_ADV_MCAST 687#ifdef CONFIG_BATMAN_ADV_MCAST
692BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL); 688BATADV_ATTR_SIF_BOOL(multicast_mode, 0644, NULL);
693#endif 689#endif
694#ifdef CONFIG_BATMAN_ADV_DEBUG 690#ifdef CONFIG_BATMAN_ADV_DEBUG
695BATADV_ATTR_SIF_UINT(log_level, log_level, S_IRUGO | S_IWUSR, 0, 691BATADV_ATTR_SIF_UINT(log_level, log_level, 0644, 0, BATADV_DBG_ALL, NULL);
696 BATADV_DBG_ALL, NULL);
697#endif 692#endif
698#ifdef CONFIG_BATMAN_ADV_NC 693#ifdef CONFIG_BATMAN_ADV_NC
699BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, 694BATADV_ATTR_SIF_BOOL(network_coding, 0644, batadv_nc_status_update);
700 batadv_nc_status_update);
701#endif 695#endif
702static BATADV_ATTR(isolation_mark, S_IRUGO | S_IWUSR, 696static BATADV_ATTR(isolation_mark, 0644, batadv_show_isolation_mark,
703 batadv_show_isolation_mark, batadv_store_isolation_mark); 697 batadv_store_isolation_mark);
704 698
705static struct batadv_attribute *batadv_mesh_attrs[] = { 699static struct batadv_attribute *batadv_mesh_attrs[] = {
706 &batadv_attr_aggregated_ogms, 700 &batadv_attr_aggregated_ogms,
@@ -731,7 +725,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
731 NULL, 725 NULL,
732}; 726};
733 727
734BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); 728BATADV_ATTR_VLAN_BOOL(ap_isolation, 0644, NULL);
735 729
736/* array of vlan specific sysfs attributes */ 730/* array of vlan specific sysfs attributes */
737static struct batadv_attribute *batadv_vlan_attrs[] = { 731static struct batadv_attribute *batadv_vlan_attrs[] = {
@@ -1116,14 +1110,13 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj,
1116 1110
1117#endif 1111#endif
1118 1112
1119static BATADV_ATTR(mesh_iface, S_IRUGO | S_IWUSR, batadv_show_mesh_iface, 1113static BATADV_ATTR(mesh_iface, 0644, batadv_show_mesh_iface,
1120 batadv_store_mesh_iface); 1114 batadv_store_mesh_iface);
1121static BATADV_ATTR(iface_status, S_IRUGO, batadv_show_iface_status, NULL); 1115static BATADV_ATTR(iface_status, 0444, batadv_show_iface_status, NULL);
1122#ifdef CONFIG_BATMAN_ADV_BATMAN_V 1116#ifdef CONFIG_BATMAN_ADV_BATMAN_V
1123BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, S_IRUGO | S_IWUSR, 1117BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, 0644,
1124 2 * BATADV_JITTER, INT_MAX, NULL); 1118 2 * BATADV_JITTER, INT_MAX, NULL);
1125static BATADV_ATTR(throughput_override, S_IRUGO | S_IWUSR, 1119static BATADV_ATTR(throughput_override, 0644, batadv_show_throughput_override,
1126 batadv_show_throughput_override,
1127 batadv_store_throughput_override); 1120 batadv_store_throughput_override);
1128#endif 1121#endif
1129 1122
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8af1611b8ab2..981e8c5b07e9 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -615,9 +615,6 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
615 batadv_tp_fill_prerandom(tp_vars, data, data_len); 615 batadv_tp_fill_prerandom(tp_vars, data, data_len);
616 616
617 r = batadv_send_skb_to_orig(skb, orig_node, NULL); 617 r = batadv_send_skb_to_orig(skb, orig_node, NULL);
618 if (r == -1)
619 kfree_skb(skb);
620
621 if (r == NET_XMIT_SUCCESS) 618 if (r == NET_XMIT_SUCCESS)
622 return 0; 619 return 0;
623 620
@@ -1207,9 +1204,6 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
1207 1204
1208 /* send the ack */ 1205 /* send the ack */
1209 r = batadv_send_skb_to_orig(skb, orig_node, NULL); 1206 r = batadv_send_skb_to_orig(skb, orig_node, NULL);
1210 if (r == -1)
1211 kfree_skb(skb);
1212
1213 if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { 1207 if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
1214 ret = BATADV_TP_REASON_DST_UNREACHABLE; 1208 ret = BATADV_TP_REASON_DST_UNREACHABLE;
1215 goto out; 1209 goto out;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0dc85eb1cb7a..30ecbfb40adf 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -56,7 +56,6 @@
56#include "hard-interface.h" 56#include "hard-interface.h"
57#include "hash.h" 57#include "hash.h"
58#include "log.h" 58#include "log.h"
59#include "multicast.h"
60#include "netlink.h" 59#include "netlink.h"
61#include "originator.h" 60#include "originator.h"
62#include "packet.h" 61#include "packet.h"
@@ -647,6 +646,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
647 struct net *net = dev_net(soft_iface); 646 struct net *net = dev_net(soft_iface);
648 struct batadv_softif_vlan *vlan; 647 struct batadv_softif_vlan *vlan;
649 struct net_device *in_dev = NULL; 648 struct net_device *in_dev = NULL;
649 struct batadv_hard_iface *in_hardif = NULL;
650 struct hlist_head *head; 650 struct hlist_head *head;
651 struct batadv_tt_orig_list_entry *orig_entry; 651 struct batadv_tt_orig_list_entry *orig_entry;
652 int hash_added, table_size, packet_size_max; 652 int hash_added, table_size, packet_size_max;
@@ -658,6 +658,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
658 if (ifindex != BATADV_NULL_IFINDEX) 658 if (ifindex != BATADV_NULL_IFINDEX)
659 in_dev = dev_get_by_index(net, ifindex); 659 in_dev = dev_get_by_index(net, ifindex);
660 660
661 if (in_dev)
662 in_hardif = batadv_hardif_get_by_netdev(in_dev);
663
661 tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid); 664 tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
662 665
663 if (!is_multicast_ether_addr(addr)) 666 if (!is_multicast_ether_addr(addr))
@@ -731,7 +734,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
731 */ 734 */
732 tt_local->common.flags = BATADV_TT_CLIENT_NEW; 735 tt_local->common.flags = BATADV_TT_CLIENT_NEW;
733 tt_local->common.vid = vid; 736 tt_local->common.vid = vid;
734 if (batadv_is_wifi_netdev(in_dev)) 737 if (batadv_is_wifi_hardif(in_hardif))
735 tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; 738 tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
736 kref_init(&tt_local->common.refcount); 739 kref_init(&tt_local->common.refcount);
737 tt_local->last_seen = jiffies; 740 tt_local->last_seen = jiffies;
@@ -791,7 +794,7 @@ check_roaming:
791 */ 794 */
792 remote_flags = tt_local->common.flags & BATADV_TT_REMOTE_MASK; 795 remote_flags = tt_local->common.flags & BATADV_TT_REMOTE_MASK;
793 796
794 if (batadv_is_wifi_netdev(in_dev)) 797 if (batadv_is_wifi_hardif(in_hardif))
795 tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; 798 tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
796 else 799 else
797 tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI; 800 tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI;
@@ -815,6 +818,8 @@ check_roaming:
815 818
816 ret = true; 819 ret = true;
817out: 820out:
821 if (in_hardif)
822 batadv_hardif_put(in_hardif);
818 if (in_dev) 823 if (in_dev)
819 dev_put(in_dev); 824 dev_put(in_dev);
820 if (tt_local) 825 if (tt_local)
@@ -3795,9 +3800,6 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
3795{ 3800{
3796 lockdep_assert_held(&bat_priv->tt.commit_lock); 3801 lockdep_assert_held(&bat_priv->tt.commit_lock);
3797 3802
3798 /* Update multicast addresses in local translation table */
3799 batadv_mcast_mla_update(bat_priv);
3800
3801 if (atomic_read(&bat_priv->tt.local_changes) < 1) { 3803 if (atomic_read(&bat_priv->tt.local_changes) < 1) {
3802 if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt)) 3804 if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))
3803 batadv_tt_tvlv_container_update(bat_priv); 3805 batadv_tt_tvlv_container_update(bat_priv);
@@ -3835,8 +3837,8 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
3835bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, 3837bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
3836 unsigned short vid) 3838 unsigned short vid)
3837{ 3839{
3838 struct batadv_tt_local_entry *tt_local_entry = NULL; 3840 struct batadv_tt_local_entry *tt_local_entry;
3839 struct batadv_tt_global_entry *tt_global_entry = NULL; 3841 struct batadv_tt_global_entry *tt_global_entry;
3840 struct batadv_softif_vlan *vlan; 3842 struct batadv_softif_vlan *vlan;
3841 bool ret = false; 3843 bool ret = false;
3842 3844
@@ -3845,27 +3847,24 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
3845 return false; 3847 return false;
3846 3848
3847 if (!atomic_read(&vlan->ap_isolation)) 3849 if (!atomic_read(&vlan->ap_isolation))
3848 goto out; 3850 goto vlan_put;
3849 3851
3850 tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid); 3852 tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid);
3851 if (!tt_local_entry) 3853 if (!tt_local_entry)
3852 goto out; 3854 goto vlan_put;
3853 3855
3854 tt_global_entry = batadv_tt_global_hash_find(bat_priv, src, vid); 3856 tt_global_entry = batadv_tt_global_hash_find(bat_priv, src, vid);
3855 if (!tt_global_entry) 3857 if (!tt_global_entry)
3856 goto out; 3858 goto local_entry_put;
3857 3859
3858 if (!_batadv_is_ap_isolated(tt_local_entry, tt_global_entry)) 3860 if (_batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
3859 goto out; 3861 ret = true;
3860
3861 ret = true;
3862 3862
3863out: 3863 batadv_tt_global_entry_put(tt_global_entry);
3864local_entry_put:
3865 batadv_tt_local_entry_put(tt_local_entry);
3866vlan_put:
3864 batadv_softif_vlan_put(vlan); 3867 batadv_softif_vlan_put(vlan);
3865 if (tt_global_entry)
3866 batadv_tt_global_entry_put(tt_global_entry);
3867 if (tt_local_entry)
3868 batadv_tt_local_entry_put(tt_local_entry);
3869 return ret; 3868 return ret;
3870} 3869}
3871 3870
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 77654f055f24..a783420356ae 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -600,7 +600,6 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
600 unsigned char *tvlv_buff; 600 unsigned char *tvlv_buff;
601 unsigned int tvlv_len; 601 unsigned int tvlv_len;
602 ssize_t hdr_len = sizeof(*unicast_tvlv_packet); 602 ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
603 int res;
604 603
605 orig_node = batadv_orig_hash_find(bat_priv, dst); 604 orig_node = batadv_orig_hash_find(bat_priv, dst);
606 if (!orig_node) 605 if (!orig_node)
@@ -633,9 +632,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
633 tvlv_buff += sizeof(*tvlv_hdr); 632 tvlv_buff += sizeof(*tvlv_hdr);
634 memcpy(tvlv_buff, tvlv_value, tvlv_value_len); 633 memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
635 634
636 res = batadv_send_skb_to_orig(skb, orig_node, NULL); 635 batadv_send_skb_to_orig(skb, orig_node, NULL);
637 if (res == -1)
638 kfree_skb(skb);
639out: 636out:
640 batadv_orig_node_put(orig_node); 637 batadv_orig_node_put(orig_node);
641} 638}
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index b3dd1a381aad..e913aee28c98 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -119,12 +119,28 @@ struct batadv_hard_iface_bat_v {
119}; 119};
120 120
121/** 121/**
122 * enum batadv_hard_iface_wifi_flags - Flags describing the wifi configuration
123 * of a batadv_hard_iface
124 * @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device
125 * @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device
126 * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device
127 * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi device
128 */
129enum batadv_hard_iface_wifi_flags {
130 BATADV_HARDIF_WIFI_WEXT_DIRECT = BIT(0),
131 BATADV_HARDIF_WIFI_CFG80211_DIRECT = BIT(1),
132 BATADV_HARDIF_WIFI_WEXT_INDIRECT = BIT(2),
133 BATADV_HARDIF_WIFI_CFG80211_INDIRECT = BIT(3),
134};
135
136/**
122 * struct batadv_hard_iface - network device known to batman-adv 137 * struct batadv_hard_iface - network device known to batman-adv
123 * @list: list node for batadv_hardif_list 138 * @list: list node for batadv_hardif_list
124 * @if_num: identificator of the interface 139 * @if_num: identificator of the interface
125 * @if_status: status of the interface for batman-adv 140 * @if_status: status of the interface for batman-adv
126 * @net_dev: pointer to the net_device
127 * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) 141 * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
142 * @wifi_flags: flags whether this is (directly or indirectly) a wifi interface
143 * @net_dev: pointer to the net_device
128 * @hardif_obj: kobject of the per interface sysfs "mesh" directory 144 * @hardif_obj: kobject of the per interface sysfs "mesh" directory
129 * @refcount: number of contexts the object is used 145 * @refcount: number of contexts the object is used
130 * @batman_adv_ptype: packet type describing packets that should be processed by 146 * @batman_adv_ptype: packet type describing packets that should be processed by
@@ -141,8 +157,9 @@ struct batadv_hard_iface {
141 struct list_head list; 157 struct list_head list;
142 s16 if_num; 158 s16 if_num;
143 char if_status; 159 char if_status;
144 struct net_device *net_dev;
145 u8 num_bcasts; 160 u8 num_bcasts;
161 u32 wifi_flags;
162 struct net_device *net_dev;
146 struct kobject *hardif_obj; 163 struct kobject *hardif_obj;
147 struct kref refcount; 164 struct kref refcount;
148 struct packet_type batman_adv_ptype; 165 struct packet_type batman_adv_ptype;
@@ -184,7 +201,7 @@ struct batadv_orig_ifinfo {
184 201
185/** 202/**
186 * struct batadv_frag_table_entry - head in the fragment buffer table 203 * struct batadv_frag_table_entry - head in the fragment buffer table
187 * @head: head of list with fragments 204 * @fragment_list: head of list with fragments
188 * @lock: lock to protect the list of fragments 205 * @lock: lock to protect the list of fragments
189 * @timestamp: time (jiffie) of last received fragment 206 * @timestamp: time (jiffie) of last received fragment
190 * @seqno: sequence number of the fragments in the list 207 * @seqno: sequence number of the fragments in the list
@@ -192,8 +209,8 @@ struct batadv_orig_ifinfo {
192 * @total_size: expected size of the assembled packet 209 * @total_size: expected size of the assembled packet
193 */ 210 */
194struct batadv_frag_table_entry { 211struct batadv_frag_table_entry {
195 struct hlist_head head; 212 struct hlist_head fragment_list;
196 spinlock_t lock; /* protects head */ 213 spinlock_t lock; /* protects fragment_list */
197 unsigned long timestamp; 214 unsigned long timestamp;
198 u16 seqno; 215 u16 seqno;
199 u16 size; 216 u16 size;
@@ -408,6 +425,7 @@ struct batadv_hardif_neigh_node_bat_v {
408 * struct batadv_hardif_neigh_node - unique neighbor per hard-interface 425 * struct batadv_hardif_neigh_node - unique neighbor per hard-interface
409 * @list: list node for batadv_hard_iface::neigh_list 426 * @list: list node for batadv_hard_iface::neigh_list
410 * @addr: the MAC address of the neighboring interface 427 * @addr: the MAC address of the neighboring interface
428 * @orig: the address of the originator this neighbor node belongs to
411 * @if_incoming: pointer to incoming hard-interface 429 * @if_incoming: pointer to incoming hard-interface
412 * @last_seen: when last packet via this neighbor was received 430 * @last_seen: when last packet via this neighbor was received
413 * @bat_v: B.A.T.M.A.N. V private data 431 * @bat_v: B.A.T.M.A.N. V private data
@@ -417,6 +435,7 @@ struct batadv_hardif_neigh_node_bat_v {
417struct batadv_hardif_neigh_node { 435struct batadv_hardif_neigh_node {
418 struct hlist_node list; 436 struct hlist_node list;
419 u8 addr[ETH_ALEN]; 437 u8 addr[ETH_ALEN];
438 u8 orig[ETH_ALEN];
420 struct batadv_hard_iface *if_incoming; 439 struct batadv_hard_iface *if_incoming;
421 unsigned long last_seen; 440 unsigned long last_seen;
422#ifdef CONFIG_BATMAN_ADV_BATMAN_V 441#ifdef CONFIG_BATMAN_ADV_BATMAN_V
@@ -706,8 +725,8 @@ struct batadv_priv_debug_log {
706 725
707/** 726/**
708 * struct batadv_priv_gw - per mesh interface gateway data 727 * struct batadv_priv_gw - per mesh interface gateway data
709 * @list: list of available gateway nodes 728 * @gateway_list: list of available gateway nodes
710 * @list_lock: lock protecting gw_list & curr_gw 729 * @list_lock: lock protecting gateway_list & curr_gw
711 * @curr_gw: pointer to currently selected gateway node 730 * @curr_gw: pointer to currently selected gateway node
712 * @mode: gateway operation: off, client or server (see batadv_gw_modes) 731 * @mode: gateway operation: off, client or server (see batadv_gw_modes)
713 * @sel_class: gateway selection class (applies if gw_mode client) 732 * @sel_class: gateway selection class (applies if gw_mode client)
@@ -716,8 +735,8 @@ struct batadv_priv_debug_log {
716 * @reselect: bool indicating a gateway re-selection is in progress 735 * @reselect: bool indicating a gateway re-selection is in progress
717 */ 736 */
718struct batadv_priv_gw { 737struct batadv_priv_gw {
719 struct hlist_head list; 738 struct hlist_head gateway_list;
720 spinlock_t list_lock; /* protects gw_list & curr_gw */ 739 spinlock_t list_lock; /* protects gateway_list & curr_gw */
721 struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ 740 struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */
722 atomic_t mode; 741 atomic_t mode;
723 atomic_t sel_class; 742 atomic_t sel_class;
@@ -785,9 +804,10 @@ struct batadv_mcast_querier_state {
785 * @num_want_all_ipv6: counter for items in want_all_ipv6_list 804 * @num_want_all_ipv6: counter for items in want_all_ipv6_list
786 * @want_lists_lock: lock for protecting modifications to mcast want lists 805 * @want_lists_lock: lock for protecting modifications to mcast want lists
787 * (traversals are rcu-locked) 806 * (traversals are rcu-locked)
807 * @work: work queue callback item for multicast TT and TVLV updates
788 */ 808 */
789struct batadv_priv_mcast { 809struct batadv_priv_mcast {
790 struct hlist_head mla_list; 810 struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */
791 struct hlist_head want_all_unsnoopables_list; 811 struct hlist_head want_all_unsnoopables_list;
792 struct hlist_head want_all_ipv4_list; 812 struct hlist_head want_all_ipv4_list;
793 struct hlist_head want_all_ipv6_list; 813 struct hlist_head want_all_ipv6_list;
@@ -802,6 +822,7 @@ struct batadv_priv_mcast {
802 atomic_t num_want_all_ipv6; 822 atomic_t num_want_all_ipv6;
803 /* protects want_all_{unsnoopables,ipv4,ipv6}_list */ 823 /* protects want_all_{unsnoopables,ipv4,ipv6}_list */
804 spinlock_t want_lists_lock; 824 spinlock_t want_lists_lock;
825 struct delayed_work work;
805}; 826};
806#endif 827#endif
807 828
@@ -1363,7 +1384,8 @@ struct batadv_skb_cb {
1363 1384
1364/** 1385/**
1365 * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded 1386 * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
1366 * @list: list node for batadv_socket_client::queue_list 1387 * @list: list node for batadv_priv::forw_{bat,bcast}_list
1388 * @cleanup_list: list node for purging functions
1367 * @send_time: execution time for delayed_work (packet sending) 1389 * @send_time: execution time for delayed_work (packet sending)
1368 * @own: bool for locally generated packets (local OGMs are re-scheduled after 1390 * @own: bool for locally generated packets (local OGMs are re-scheduled after
1369 * sending) 1391 * sending)
@@ -1380,6 +1402,7 @@ struct batadv_skb_cb {
1380 */ 1402 */
1381struct batadv_forw_packet { 1403struct batadv_forw_packet {
1382 struct hlist_node list; 1404 struct hlist_node list;
1405 struct hlist_node cleanup_list;
1383 unsigned long send_time; 1406 unsigned long send_time;
1384 u8 own; 1407 u8 own;
1385 struct sk_buff *skb; 1408 struct sk_buff *skb;
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index b3ff12eb9b6d..4bfaa19a5573 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -20,5 +20,3 @@ bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
20bluetooth-$(CONFIG_BT_LEDS) += leds.o 20bluetooth-$(CONFIG_BT_LEDS) += leds.o
21bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o 21bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
22bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o 22bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
23
24subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index f4fcb4a9d5c1..2b875edf77e1 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -211,7 +211,6 @@ static const struct net_device_ops bnep_netdev_ops = {
211 .ndo_set_rx_mode = bnep_net_set_mc_list, 211 .ndo_set_rx_mode = bnep_net_set_mc_list,
212 .ndo_set_mac_address = bnep_net_set_mac_addr, 212 .ndo_set_mac_address = bnep_net_set_mac_addr,
213 .ndo_tx_timeout = bnep_net_timeout, 213 .ndo_tx_timeout = bnep_net_timeout,
214 .ndo_change_mtu = eth_change_mtu,
215 214
216}; 215};
217 216
@@ -222,6 +221,8 @@ void bnep_net_setup(struct net_device *dev)
222 dev->addr_len = ETH_ALEN; 221 dev->addr_len = ETH_ALEN;
223 222
224 ether_setup(dev); 223 ether_setup(dev);
224 dev->min_mtu = 0;
225 dev->max_mtu = ETH_MAX_MTU;
225 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 226 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
226 dev->netdev_ops = &bnep_netdev_ops; 227 dev->netdev_ops = &bnep_netdev_ops;
227 228
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 577f1c01454a..ce0b5dd01953 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2127,7 +2127,7 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
2127 struct sk_buff **frag; 2127 struct sk_buff **frag;
2128 int sent = 0; 2128 int sent = 0;
2129 2129
2130 if (copy_from_iter(skb_put(skb, count), count, &msg->msg_iter) != count) 2130 if (!copy_from_iter_full(skb_put(skb, count), count, &msg->msg_iter))
2131 return -EFAULT; 2131 return -EFAULT;
2132 2132
2133 sent += count; 2133 sent += count;
@@ -2147,8 +2147,8 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
2147 2147
2148 *frag = tmp; 2148 *frag = tmp;
2149 2149
2150 if (copy_from_iter(skb_put(*frag, count), count, 2150 if (!copy_from_iter_full(skb_put(*frag, count), count,
2151 &msg->msg_iter) != count) 2151 &msg->msg_iter))
2152 return -EFAULT; 2152 return -EFAULT;
2153 2153
2154 sent += count; 2154 sent += count;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 43faf2aea2ab..fae391f1871f 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -57,7 +57,7 @@
57#define SMP_TIMEOUT msecs_to_jiffies(30000) 57#define SMP_TIMEOUT msecs_to_jiffies(30000)
58 58
59#define AUTH_REQ_MASK(dev) (hci_dev_test_flag(dev, HCI_SC_ENABLED) ? \ 59#define AUTH_REQ_MASK(dev) (hci_dev_test_flag(dev, HCI_SC_ENABLED) ? \
60 0x1f : 0x07) 60 0x3f : 0x07)
61#define KEY_DIST_MASK 0x07 61#define KEY_DIST_MASK 0x07
62 62
63/* Maximum message length that can be passed to aes_cmac */ 63/* Maximum message length that can be passed to aes_cmac */
@@ -76,6 +76,7 @@ enum {
76 SMP_FLAG_DHKEY_PENDING, 76 SMP_FLAG_DHKEY_PENDING,
77 SMP_FLAG_REMOTE_OOB, 77 SMP_FLAG_REMOTE_OOB,
78 SMP_FLAG_LOCAL_OOB, 78 SMP_FLAG_LOCAL_OOB,
79 SMP_FLAG_CT2,
79}; 80};
80 81
81struct smp_dev { 82struct smp_dev {
@@ -357,6 +358,22 @@ static int smp_h6(struct crypto_shash *tfm_cmac, const u8 w[16],
357 return err; 358 return err;
358} 359}
359 360
361static int smp_h7(struct crypto_shash *tfm_cmac, const u8 w[16],
362 const u8 salt[16], u8 res[16])
363{
364 int err;
365
366 SMP_DBG("w %16phN salt %16phN", w, salt);
367
368 err = aes_cmac(tfm_cmac, salt, w, 16, res);
369 if (err)
370 return err;
371
372 SMP_DBG("res %16phN", res);
373
374 return err;
375}
376
360/* The following functions map to the legacy SMP crypto functions e, c1, 377/* The following functions map to the legacy SMP crypto functions e, c1,
361 * s1 and ah. 378 * s1 and ah.
362 */ 379 */
@@ -1130,20 +1147,31 @@ static void sc_add_ltk(struct smp_chan *smp)
1130 1147
1131static void sc_generate_link_key(struct smp_chan *smp) 1148static void sc_generate_link_key(struct smp_chan *smp)
1132{ 1149{
1133 /* These constants are as specified in the core specification. 1150 /* From core spec. Spells out in ASCII as 'lebr'. */
1134 * In ASCII they spell out to 'tmp1' and 'lebr'.
1135 */
1136 const u8 tmp1[4] = { 0x31, 0x70, 0x6d, 0x74 };
1137 const u8 lebr[4] = { 0x72, 0x62, 0x65, 0x6c }; 1151 const u8 lebr[4] = { 0x72, 0x62, 0x65, 0x6c };
1138 1152
1139 smp->link_key = kzalloc(16, GFP_KERNEL); 1153 smp->link_key = kzalloc(16, GFP_KERNEL);
1140 if (!smp->link_key) 1154 if (!smp->link_key)
1141 return; 1155 return;
1142 1156
1143 if (smp_h6(smp->tfm_cmac, smp->tk, tmp1, smp->link_key)) { 1157 if (test_bit(SMP_FLAG_CT2, &smp->flags)) {
1144 kzfree(smp->link_key); 1158 /* SALT = 0x00000000000000000000000000000000746D7031 */
1145 smp->link_key = NULL; 1159 const u8 salt[16] = { 0x31, 0x70, 0x6d, 0x74 };
1146 return; 1160
1161 if (smp_h7(smp->tfm_cmac, smp->tk, salt, smp->link_key)) {
1162 kzfree(smp->link_key);
1163 smp->link_key = NULL;
1164 return;
1165 }
1166 } else {
1167 /* From core spec. Spells out in ASCII as 'tmp1'. */
1168 const u8 tmp1[4] = { 0x31, 0x70, 0x6d, 0x74 };
1169
1170 if (smp_h6(smp->tfm_cmac, smp->tk, tmp1, smp->link_key)) {
1171 kzfree(smp->link_key);
1172 smp->link_key = NULL;
1173 return;
1174 }
1147 } 1175 }
1148 1176
1149 if (smp_h6(smp->tfm_cmac, smp->link_key, lebr, smp->link_key)) { 1177 if (smp_h6(smp->tfm_cmac, smp->link_key, lebr, smp->link_key)) {
@@ -1169,10 +1197,7 @@ static void smp_allow_key_dist(struct smp_chan *smp)
1169 1197
1170static void sc_generate_ltk(struct smp_chan *smp) 1198static void sc_generate_ltk(struct smp_chan *smp)
1171{ 1199{
1172 /* These constants are as specified in the core specification. 1200 /* From core spec. Spells out in ASCII as 'brle'. */
1173 * In ASCII they spell out to 'tmp2' and 'brle'.
1174 */
1175 const u8 tmp2[4] = { 0x32, 0x70, 0x6d, 0x74 };
1176 const u8 brle[4] = { 0x65, 0x6c, 0x72, 0x62 }; 1201 const u8 brle[4] = { 0x65, 0x6c, 0x72, 0x62 };
1177 struct hci_conn *hcon = smp->conn->hcon; 1202 struct hci_conn *hcon = smp->conn->hcon;
1178 struct hci_dev *hdev = hcon->hdev; 1203 struct hci_dev *hdev = hcon->hdev;
@@ -1187,8 +1212,19 @@ static void sc_generate_ltk(struct smp_chan *smp)
1187 if (key->type == HCI_LK_DEBUG_COMBINATION) 1212 if (key->type == HCI_LK_DEBUG_COMBINATION)
1188 set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); 1213 set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
1189 1214
1190 if (smp_h6(smp->tfm_cmac, key->val, tmp2, smp->tk)) 1215 if (test_bit(SMP_FLAG_CT2, &smp->flags)) {
1191 return; 1216 /* SALT = 0x00000000000000000000000000000000746D7032 */
1217 const u8 salt[16] = { 0x32, 0x70, 0x6d, 0x74 };
1218
1219 if (smp_h7(smp->tfm_cmac, key->val, salt, smp->tk))
1220 return;
1221 } else {
1222 /* From core spec. Spells out in ASCII as 'tmp2'. */
1223 const u8 tmp2[4] = { 0x32, 0x70, 0x6d, 0x74 };
1224
1225 if (smp_h6(smp->tfm_cmac, key->val, tmp2, smp->tk))
1226 return;
1227 }
1192 1228
1193 if (smp_h6(smp->tfm_cmac, smp->tk, brle, smp->tk)) 1229 if (smp_h6(smp->tfm_cmac, smp->tk, brle, smp->tk))
1194 return; 1230 return;
@@ -1669,6 +1705,7 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
1669 if (!rsp) { 1705 if (!rsp) {
1670 memset(req, 0, sizeof(*req)); 1706 memset(req, 0, sizeof(*req));
1671 1707
1708 req->auth_req = SMP_AUTH_CT2;
1672 req->init_key_dist = local_dist; 1709 req->init_key_dist = local_dist;
1673 req->resp_key_dist = remote_dist; 1710 req->resp_key_dist = remote_dist;
1674 req->max_key_size = conn->hcon->enc_key_size; 1711 req->max_key_size = conn->hcon->enc_key_size;
@@ -1680,6 +1717,7 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
1680 1717
1681 memset(rsp, 0, sizeof(*rsp)); 1718 memset(rsp, 0, sizeof(*rsp));
1682 1719
1720 rsp->auth_req = SMP_AUTH_CT2;
1683 rsp->max_key_size = conn->hcon->enc_key_size; 1721 rsp->max_key_size = conn->hcon->enc_key_size;
1684 rsp->init_key_dist = req->init_key_dist & remote_dist; 1722 rsp->init_key_dist = req->init_key_dist & remote_dist;
1685 rsp->resp_key_dist = req->resp_key_dist & local_dist; 1723 rsp->resp_key_dist = req->resp_key_dist & local_dist;
@@ -1744,6 +1782,9 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
1744 1782
1745 build_bredr_pairing_cmd(smp, req, &rsp); 1783 build_bredr_pairing_cmd(smp, req, &rsp);
1746 1784
1785 if (req->auth_req & SMP_AUTH_CT2)
1786 set_bit(SMP_FLAG_CT2, &smp->flags);
1787
1747 key_size = min(req->max_key_size, rsp.max_key_size); 1788 key_size = min(req->max_key_size, rsp.max_key_size);
1748 if (check_enc_key_size(conn, key_size)) 1789 if (check_enc_key_size(conn, key_size))
1749 return SMP_ENC_KEY_SIZE; 1790 return SMP_ENC_KEY_SIZE;
@@ -1761,9 +1802,13 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
1761 1802
1762 build_pairing_cmd(conn, req, &rsp, auth); 1803 build_pairing_cmd(conn, req, &rsp, auth);
1763 1804
1764 if (rsp.auth_req & SMP_AUTH_SC) 1805 if (rsp.auth_req & SMP_AUTH_SC) {
1765 set_bit(SMP_FLAG_SC, &smp->flags); 1806 set_bit(SMP_FLAG_SC, &smp->flags);
1766 1807
1808 if (rsp.auth_req & SMP_AUTH_CT2)
1809 set_bit(SMP_FLAG_CT2, &smp->flags);
1810 }
1811
1767 if (conn->hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT) 1812 if (conn->hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT)
1768 sec_level = BT_SECURITY_MEDIUM; 1813 sec_level = BT_SECURITY_MEDIUM;
1769 else 1814 else
@@ -1917,6 +1962,9 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
1917 */ 1962 */
1918 smp->remote_key_dist &= rsp->resp_key_dist; 1963 smp->remote_key_dist &= rsp->resp_key_dist;
1919 1964
1965 if ((req->auth_req & SMP_AUTH_CT2) && (auth & SMP_AUTH_CT2))
1966 set_bit(SMP_FLAG_CT2, &smp->flags);
1967
1920 /* For BR/EDR this means we're done and can start phase 3 */ 1968 /* For BR/EDR this means we're done and can start phase 3 */
1921 if (conn->hcon->type == ACL_LINK) { 1969 if (conn->hcon->type == ACL_LINK) {
1922 /* Clear bits which are generated but not distributed */ 1970 /* Clear bits which are generated but not distributed */
@@ -2312,8 +2360,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
2312 2360
2313 authreq = seclevel_to_authreq(sec_level); 2361 authreq = seclevel_to_authreq(sec_level);
2314 2362
2315 if (hci_dev_test_flag(hcon->hdev, HCI_SC_ENABLED)) 2363 if (hci_dev_test_flag(hcon->hdev, HCI_SC_ENABLED)) {
2316 authreq |= SMP_AUTH_SC; 2364 authreq |= SMP_AUTH_SC;
2365 if (hci_dev_test_flag(hcon->hdev, HCI_SSP_ENABLED))
2366 authreq |= SMP_AUTH_CT2;
2367 }
2317 2368
2318 /* Require MITM if IO Capability allows or the security level 2369 /* Require MITM if IO Capability allows or the security level
2319 * requires it. 2370 * requires it.
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index ffcc70b6b199..0ff6247eaa6c 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -57,6 +57,7 @@ struct smp_cmd_pairing {
57#define SMP_AUTH_MITM 0x04 57#define SMP_AUTH_MITM 0x04
58#define SMP_AUTH_SC 0x08 58#define SMP_AUTH_SC 0x08
59#define SMP_AUTH_KEYPRESS 0x10 59#define SMP_AUTH_KEYPRESS 0x10
60#define SMP_AUTH_CT2 0x20
60 61
61#define SMP_CMD_PAIRING_CONFIRM 0x03 62#define SMP_CMD_PAIRING_CONFIRM 0x03
62struct smp_cmd_pairing_confirm { 63struct smp_cmd_pairing_confirm {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 89a687f3c0a3..ed3b3192fb00 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -19,7 +19,7 @@
19#include <linux/list.h> 19#include <linux/list.h>
20#include <linux/netfilter_bridge.h> 20#include <linux/netfilter_bridge.h>
21 21
22#include <asm/uaccess.h> 22#include <linux/uaccess.h>
23#include "br_private.h" 23#include "br_private.h"
24 24
25#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ 25#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \
@@ -185,7 +185,7 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
185static int br_change_mtu(struct net_device *dev, int new_mtu) 185static int br_change_mtu(struct net_device *dev, int new_mtu)
186{ 186{
187 struct net_bridge *br = netdev_priv(dev); 187 struct net_bridge *br = netdev_priv(dev);
188 if (new_mtu < 68 || new_mtu > br_min_mtu(br)) 188 if (new_mtu > br_min_mtu(br))
189 return -EINVAL; 189 return -EINVAL;
190 190
191 dev->mtu = new_mtu; 191 dev->mtu = new_mtu;
@@ -409,7 +409,8 @@ void br_dev_setup(struct net_device *dev)
409 br->bridge_max_age = br->max_age = 20 * HZ; 409 br->bridge_max_age = br->max_age = 20 * HZ;
410 br->bridge_hello_time = br->hello_time = 2 * HZ; 410 br->bridge_hello_time = br->hello_time = 2 * HZ;
411 br->bridge_forward_delay = br->forward_delay = 15 * HZ; 411 br->bridge_forward_delay = br->forward_delay = 15 * HZ;
412 br->ageing_time = BR_DEFAULT_AGEING_TIME; 412 br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
413 dev->max_mtu = ETH_MAX_MTU;
413 414
414 br_netfilter_rtable_init(br); 415 br_netfilter_rtable_init(br);
415 br_stp_timer_init(br); 416 br_stp_timer_init(br);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6b43c8c88f19..e4a4176171c9 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -535,9 +535,8 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
535 */ 535 */
536 if (fdb->is_local) 536 if (fdb->is_local)
537 return 0; 537 return 0;
538 br_warn(br, "adding interface %s with same address " 538 br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
539 "as a received packet\n", 539 source ? source->dev->name : br->dev->name, addr, vid);
540 source ? source->dev->name : br->dev->name);
541 fdb_delete(br, fdb); 540 fdb_delete(br, fdb);
542 } 541 }
543 542
@@ -583,9 +582,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
583 /* attempt to update an entry for a local interface */ 582 /* attempt to update an entry for a local interface */
584 if (unlikely(fdb->is_local)) { 583 if (unlikely(fdb->is_local)) {
585 if (net_ratelimit()) 584 if (net_ratelimit())
586 br_warn(br, "received packet on %s with " 585 br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n",
587 "own address as source address\n", 586 source->dev->name, addr, vid);
588 source->dev->name);
589 } else { 587 } else {
590 /* fastpath: update of existing entry */ 588 /* fastpath: update of existing entry */
591 if (unlikely(source != fdb->dst)) { 589 if (unlikely(source != fdb->dst)) {
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index d99b2009771a..da8157c57eb1 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -18,7 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/times.h> 19#include <linux/times.h>
20#include <net/net_namespace.h> 20#include <net/net_namespace.h>
21#include <asm/uaccess.h> 21#include <linux/uaccess.h>
22#include "br_private.h" 22#include "br_private.h"
23 23
24static int get_bridge_ifindices(struct net *net, int *indices, int num) 24static int get_bridge_ifindices(struct net *net, int *indices, int num)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2136e45f5277..b30e77e8427c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -25,6 +25,7 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/timer.h> 26#include <linux/timer.h>
27#include <linux/inetdevice.h> 27#include <linux/inetdevice.h>
28#include <linux/mroute.h>
28#include <net/ip.h> 29#include <net/ip.h>
29#if IS_ENABLED(CONFIG_IPV6) 30#if IS_ENABLED(CONFIG_IPV6)
30#include <net/ipv6.h> 31#include <net/ipv6.h>
@@ -364,13 +365,18 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
364 __be32 group, 365 __be32 group,
365 u8 *igmp_type) 366 u8 *igmp_type)
366{ 367{
368 struct igmpv3_query *ihv3;
369 size_t igmp_hdr_size;
367 struct sk_buff *skb; 370 struct sk_buff *skb;
368 struct igmphdr *ih; 371 struct igmphdr *ih;
369 struct ethhdr *eth; 372 struct ethhdr *eth;
370 struct iphdr *iph; 373 struct iphdr *iph;
371 374
375 igmp_hdr_size = sizeof(*ih);
376 if (br->multicast_igmp_version == 3)
377 igmp_hdr_size = sizeof(*ihv3);
372 skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) + 378 skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
373 sizeof(*ih) + 4); 379 igmp_hdr_size + 4);
374 if (!skb) 380 if (!skb)
375 goto out; 381 goto out;
376 382
@@ -395,7 +401,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
395 iph->version = 4; 401 iph->version = 4;
396 iph->ihl = 6; 402 iph->ihl = 6;
397 iph->tos = 0xc0; 403 iph->tos = 0xc0;
398 iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4); 404 iph->tot_len = htons(sizeof(*iph) + igmp_hdr_size + 4);
399 iph->id = 0; 405 iph->id = 0;
400 iph->frag_off = htons(IP_DF); 406 iph->frag_off = htons(IP_DF);
401 iph->ttl = 1; 407 iph->ttl = 1;
@@ -411,17 +417,37 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
411 skb_put(skb, 24); 417 skb_put(skb, 24);
412 418
413 skb_set_transport_header(skb, skb->len); 419 skb_set_transport_header(skb, skb->len);
414 ih = igmp_hdr(skb);
415 *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY; 420 *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
416 ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
417 ih->code = (group ? br->multicast_last_member_interval :
418 br->multicast_query_response_interval) /
419 (HZ / IGMP_TIMER_SCALE);
420 ih->group = group;
421 ih->csum = 0;
422 ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
423 skb_put(skb, sizeof(*ih));
424 421
422 switch (br->multicast_igmp_version) {
423 case 2:
424 ih = igmp_hdr(skb);
425 ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
426 ih->code = (group ? br->multicast_last_member_interval :
427 br->multicast_query_response_interval) /
428 (HZ / IGMP_TIMER_SCALE);
429 ih->group = group;
430 ih->csum = 0;
431 ih->csum = ip_compute_csum((void *)ih, sizeof(*ih));
432 break;
433 case 3:
434 ihv3 = igmpv3_query_hdr(skb);
435 ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
436 ihv3->code = (group ? br->multicast_last_member_interval :
437 br->multicast_query_response_interval) /
438 (HZ / IGMP_TIMER_SCALE);
439 ihv3->group = group;
440 ihv3->qqic = br->multicast_query_interval / HZ;
441 ihv3->nsrcs = 0;
442 ihv3->resv = 0;
443 ihv3->suppress = 0;
444 ihv3->qrv = 2;
445 ihv3->csum = 0;
446 ihv3->csum = ip_compute_csum((void *)ihv3, sizeof(*ihv3));
447 break;
448 }
449
450 skb_put(skb, igmp_hdr_size);
425 __skb_pull(skb, sizeof(*eth)); 451 __skb_pull(skb, sizeof(*eth));
426 452
427out: 453out:
@@ -433,15 +459,20 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
433 const struct in6_addr *grp, 459 const struct in6_addr *grp,
434 u8 *igmp_type) 460 u8 *igmp_type)
435{ 461{
436 struct sk_buff *skb; 462 struct mld2_query *mld2q;
463 unsigned long interval;
437 struct ipv6hdr *ip6h; 464 struct ipv6hdr *ip6h;
438 struct mld_msg *mldq; 465 struct mld_msg *mldq;
466 size_t mld_hdr_size;
467 struct sk_buff *skb;
439 struct ethhdr *eth; 468 struct ethhdr *eth;
440 u8 *hopopt; 469 u8 *hopopt;
441 unsigned long interval;
442 470
471 mld_hdr_size = sizeof(*mldq);
472 if (br->multicast_mld_version == 2)
473 mld_hdr_size = sizeof(*mld2q);
443 skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) + 474 skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
444 8 + sizeof(*mldq)); 475 8 + mld_hdr_size);
445 if (!skb) 476 if (!skb)
446 goto out; 477 goto out;
447 478
@@ -460,7 +491,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
460 ip6h = ipv6_hdr(skb); 491 ip6h = ipv6_hdr(skb);
461 492
462 *(__force __be32 *)ip6h = htonl(0x60000000); 493 *(__force __be32 *)ip6h = htonl(0x60000000);
463 ip6h->payload_len = htons(8 + sizeof(*mldq)); 494 ip6h->payload_len = htons(8 + mld_hdr_size);
464 ip6h->nexthdr = IPPROTO_HOPOPTS; 495 ip6h->nexthdr = IPPROTO_HOPOPTS;
465 ip6h->hop_limit = 1; 496 ip6h->hop_limit = 1;
466 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); 497 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
@@ -488,26 +519,47 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
488 519
489 /* ICMPv6 */ 520 /* ICMPv6 */
490 skb_set_transport_header(skb, skb->len); 521 skb_set_transport_header(skb, skb->len);
491 mldq = (struct mld_msg *) icmp6_hdr(skb);
492
493 interval = ipv6_addr_any(grp) ? 522 interval = ipv6_addr_any(grp) ?
494 br->multicast_query_response_interval : 523 br->multicast_query_response_interval :
495 br->multicast_last_member_interval; 524 br->multicast_last_member_interval;
496
497 *igmp_type = ICMPV6_MGM_QUERY; 525 *igmp_type = ICMPV6_MGM_QUERY;
498 mldq->mld_type = ICMPV6_MGM_QUERY; 526 switch (br->multicast_mld_version) {
499 mldq->mld_code = 0; 527 case 1:
500 mldq->mld_cksum = 0; 528 mldq = (struct mld_msg *)icmp6_hdr(skb);
501 mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); 529 mldq->mld_type = ICMPV6_MGM_QUERY;
502 mldq->mld_reserved = 0; 530 mldq->mld_code = 0;
503 mldq->mld_mca = *grp; 531 mldq->mld_cksum = 0;
504 532 mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
505 /* checksum */ 533 mldq->mld_reserved = 0;
506 mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 534 mldq->mld_mca = *grp;
507 sizeof(*mldq), IPPROTO_ICMPV6, 535 mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
508 csum_partial(mldq, 536 sizeof(*mldq), IPPROTO_ICMPV6,
509 sizeof(*mldq), 0)); 537 csum_partial(mldq,
510 skb_put(skb, sizeof(*mldq)); 538 sizeof(*mldq),
539 0));
540 break;
541 case 2:
542 mld2q = (struct mld2_query *)icmp6_hdr(skb);
543 mld2q->mld2q_mrc = ntohs((u16)jiffies_to_msecs(interval));
544 mld2q->mld2q_type = ICMPV6_MGM_QUERY;
545 mld2q->mld2q_code = 0;
546 mld2q->mld2q_cksum = 0;
547 mld2q->mld2q_resv1 = 0;
548 mld2q->mld2q_resv2 = 0;
549 mld2q->mld2q_suppress = 0;
550 mld2q->mld2q_qrv = 2;
551 mld2q->mld2q_nsrcs = 0;
552 mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
553 mld2q->mld2q_mca = *grp;
554 mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
555 sizeof(*mld2q),
556 IPPROTO_ICMPV6,
557 csum_partial(mld2q,
558 sizeof(*mld2q),
559 0));
560 break;
561 }
562 skb_put(skb, mld_hdr_size);
511 563
512 __skb_pull(skb, sizeof(*eth)); 564 __skb_pull(skb, sizeof(*eth));
513 565
@@ -607,7 +659,8 @@ err:
607} 659}
608 660
609struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, 661struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
610 struct net_bridge_port *port, struct br_ip *group) 662 struct net_bridge_port *p,
663 struct br_ip *group)
611{ 664{
612 struct net_bridge_mdb_htable *mdb; 665 struct net_bridge_mdb_htable *mdb;
613 struct net_bridge_mdb_entry *mp; 666 struct net_bridge_mdb_entry *mp;
@@ -623,7 +676,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
623 } 676 }
624 677
625 hash = br_ip_hash(mdb, group); 678 hash = br_ip_hash(mdb, group);
626 mp = br_multicast_get_group(br, port, group, hash); 679 mp = br_multicast_get_group(br, p, group, hash);
627 switch (PTR_ERR(mp)) { 680 switch (PTR_ERR(mp)) {
628 case 0: 681 case 0:
629 break; 682 break;
@@ -680,9 +733,9 @@ static int br_multicast_add_group(struct net_bridge *br,
680 struct net_bridge_port *port, 733 struct net_bridge_port *port,
681 struct br_ip *group) 734 struct br_ip *group)
682{ 735{
683 struct net_bridge_mdb_entry *mp;
684 struct net_bridge_port_group *p;
685 struct net_bridge_port_group __rcu **pp; 736 struct net_bridge_port_group __rcu **pp;
737 struct net_bridge_port_group *p;
738 struct net_bridge_mdb_entry *mp;
686 unsigned long now = jiffies; 739 unsigned long now = jiffies;
687 int err; 740 int err;
688 741
@@ -860,9 +913,9 @@ static void br_multicast_send_query(struct net_bridge *br,
860 struct net_bridge_port *port, 913 struct net_bridge_port *port,
861 struct bridge_mcast_own_query *own_query) 914 struct bridge_mcast_own_query *own_query)
862{ 915{
863 unsigned long time;
864 struct br_ip br_group;
865 struct bridge_mcast_other_query *other_query = NULL; 916 struct bridge_mcast_other_query *other_query = NULL;
917 struct br_ip br_group;
918 unsigned long time;
866 919
867 if (!netif_running(br->dev) || br->multicast_disabled || 920 if (!netif_running(br->dev) || br->multicast_disabled ||
868 !br->multicast_querier) 921 !br->multicast_querier)
@@ -1638,6 +1691,21 @@ static void br_multicast_err_count(const struct net_bridge *br,
1638 u64_stats_update_end(&pstats->syncp); 1691 u64_stats_update_end(&pstats->syncp);
1639} 1692}
1640 1693
1694static void br_multicast_pim(struct net_bridge *br,
1695 struct net_bridge_port *port,
1696 const struct sk_buff *skb)
1697{
1698 unsigned int offset = skb_transport_offset(skb);
1699 struct pimhdr *pimhdr, _pimhdr;
1700
1701 pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr);
1702 if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION ||
1703 pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
1704 return;
1705
1706 br_multicast_mark_router(br, port);
1707}
1708
1641static int br_multicast_ipv4_rcv(struct net_bridge *br, 1709static int br_multicast_ipv4_rcv(struct net_bridge *br,
1642 struct net_bridge_port *port, 1710 struct net_bridge_port *port,
1643 struct sk_buff *skb, 1711 struct sk_buff *skb,
@@ -1650,8 +1718,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1650 err = ip_mc_check_igmp(skb, &skb_trimmed); 1718 err = ip_mc_check_igmp(skb, &skb_trimmed);
1651 1719
1652 if (err == -ENOMSG) { 1720 if (err == -ENOMSG) {
1653 if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) 1721 if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) {
1654 BR_INPUT_SKB_CB(skb)->mrouters_only = 1; 1722 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1723 } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
1724 if (ip_hdr(skb)->protocol == IPPROTO_PIM)
1725 br_multicast_pim(br, port, skb);
1726 }
1655 return 0; 1727 return 0;
1656 } else if (err < 0) { 1728 } else if (err < 0) {
1657 br_multicast_err_count(br, port, skb->protocol); 1729 br_multicast_err_count(br, port, skb->protocol);
@@ -1811,7 +1883,9 @@ void br_multicast_init(struct net_bridge *br)
1811 1883
1812 br->ip4_other_query.delay_time = 0; 1884 br->ip4_other_query.delay_time = 0;
1813 br->ip4_querier.port = NULL; 1885 br->ip4_querier.port = NULL;
1886 br->multicast_igmp_version = 2;
1814#if IS_ENABLED(CONFIG_IPV6) 1887#if IS_ENABLED(CONFIG_IPV6)
1888 br->multicast_mld_version = 1;
1815 br->ip6_other_query.delay_time = 0; 1889 br->ip6_other_query.delay_time = 0;
1816 br->ip6_querier.port = NULL; 1890 br->ip6_querier.port = NULL;
1817#endif 1891#endif
@@ -2112,6 +2186,44 @@ unlock:
2112 return err; 2186 return err;
2113} 2187}
2114 2188
2189int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
2190{
2191 /* Currently we support only version 2 and 3 */
2192 switch (val) {
2193 case 2:
2194 case 3:
2195 break;
2196 default:
2197 return -EINVAL;
2198 }
2199
2200 spin_lock_bh(&br->multicast_lock);
2201 br->multicast_igmp_version = val;
2202 spin_unlock_bh(&br->multicast_lock);
2203
2204 return 0;
2205}
2206
2207#if IS_ENABLED(CONFIG_IPV6)
2208int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
2209{
2210 /* Currently we support version 1 and 2 */
2211 switch (val) {
2212 case 1:
2213 case 2:
2214 break;
2215 default:
2216 return -EINVAL;
2217 }
2218
2219 spin_lock_bh(&br->multicast_lock);
2220 br->multicast_mld_version = val;
2221 spin_unlock_bh(&br->multicast_lock);
2222
2223 return 0;
2224}
2225#endif
2226
2115/** 2227/**
2116 * br_multicast_list_adjacent - Returns snooped multicast addresses 2228 * br_multicast_list_adjacent - Returns snooped multicast addresses
2117 * @dev: The bridge port adjacent to which to retrieve addresses 2229 * @dev: The bridge port adjacent to which to retrieve addresses
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2fe9345c1407..8ca6a929bf12 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -40,13 +40,13 @@
40#include <net/netfilter/br_netfilter.h> 40#include <net/netfilter/br_netfilter.h>
41#include <net/netns/generic.h> 41#include <net/netns/generic.h>
42 42
43#include <asm/uaccess.h> 43#include <linux/uaccess.h>
44#include "br_private.h" 44#include "br_private.h"
45#ifdef CONFIG_SYSCTL 45#ifdef CONFIG_SYSCTL
46#include <linux/sysctl.h> 46#include <linux/sysctl.h>
47#endif 47#endif
48 48
49static int brnf_net_id __read_mostly; 49static unsigned int brnf_net_id __read_mostly;
50 50
51struct brnf_net { 51struct brnf_net {
52 bool enabled; 52 bool enabled;
@@ -561,8 +561,8 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
561 } 561 }
562 nf_bridge_push_encap_header(skb); 562 nf_bridge_push_encap_header(skb);
563 563
564 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb, 564 br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev,
565 in, skb->dev, br_forward_finish, 1); 565 br_forward_finish);
566 return 0; 566 return 0;
567} 567}
568 568
@@ -845,8 +845,10 @@ static unsigned int ip_sabotage_in(void *priv,
845 struct sk_buff *skb, 845 struct sk_buff *skb,
846 const struct nf_hook_state *state) 846 const struct nf_hook_state *state)
847{ 847{
848 if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) 848 if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) {
849 return NF_STOP; 849 state->okfn(state->net, state->sk, skb);
850 return NF_STOLEN;
851 }
850 852
851 return NF_ACCEPT; 853 return NF_ACCEPT;
852} 854}
@@ -1006,20 +1008,20 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
1006 struct nf_hook_state state; 1008 struct nf_hook_state state;
1007 int ret; 1009 int ret;
1008 1010
1009 elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); 1011 for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
1010 1012 elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
1011 while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF)) 1013 elem = rcu_dereference(elem->next))
1012 elem = rcu_dereference(elem->next); 1014 ;
1013 1015
1014 if (!elem) 1016 if (!elem)
1015 return okfn(net, sk, skb); 1017 return okfn(net, sk, skb);
1016 1018
1017 /* We may already have this, but read-locks nest anyway */ 1019 /* We may already have this, but read-locks nest anyway */
1018 rcu_read_lock(); 1020 rcu_read_lock();
1019 nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1, 1021 nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
1020 NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); 1022 sk, net, okfn);
1021 1023
1022 ret = nf_hook_slow(skb, &state); 1024 ret = nf_hook_slow(skb, &state, elem);
1023 rcu_read_unlock(); 1025 rcu_read_unlock();
1024 if (ret == 1) 1026 if (ret == 1)
1025 ret = okfn(net, sk, skb); 1027 ret = okfn(net, sk, skb);
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 5989661c659f..96c072e71ea2 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -38,7 +38,7 @@
38#include <net/route.h> 38#include <net/route.h>
39#include <net/netfilter/br_netfilter.h> 39#include <net/netfilter/br_netfilter.h>
40 40
41#include <asm/uaccess.h> 41#include <linux/uaccess.h>
42#include "br_private.h" 42#include "br_private.h"
43#ifdef CONFIG_SYSCTL 43#ifdef CONFIG_SYSCTL
44#include <linux/sysctl.h> 44#include <linux/sysctl.h>
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e99037c6f7b7..71c7453268c1 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -858,6 +858,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
858 [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 }, 858 [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
859 [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 }, 859 [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
860 [IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 }, 860 [IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
861 [IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
862 [IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
861}; 863};
862 864
863static int br_changelink(struct net_device *brdev, struct nlattr *tb[], 865static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1069,6 +1071,26 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
1069 mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]); 1071 mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]);
1070 br->multicast_stats_enabled = !!mcast_stats; 1072 br->multicast_stats_enabled = !!mcast_stats;
1071 } 1073 }
1074
1075 if (data[IFLA_BR_MCAST_IGMP_VERSION]) {
1076 __u8 igmp_version;
1077
1078 igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]);
1079 err = br_multicast_set_igmp_version(br, igmp_version);
1080 if (err)
1081 return err;
1082 }
1083
1084#if IS_ENABLED(CONFIG_IPV6)
1085 if (data[IFLA_BR_MCAST_MLD_VERSION]) {
1086 __u8 mld_version;
1087
1088 mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]);
1089 err = br_multicast_set_mld_version(br, mld_version);
1090 if (err)
1091 return err;
1092 }
1093#endif
1072#endif 1094#endif
1073#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 1095#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1074 if (data[IFLA_BR_NF_CALL_IPTABLES]) { 1096 if (data[IFLA_BR_NF_CALL_IPTABLES]) {
@@ -1135,6 +1157,8 @@ static size_t br_get_size(const struct net_device *brdev)
1135 nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */ 1157 nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */
1136 nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */ 1158 nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */
1137 nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ 1159 nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
1160 nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_IGMP_VERSION */
1161 nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_MLD_VERSION */
1138#endif 1162#endif
1139#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 1163#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1140 nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */ 1164 nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */
@@ -1210,9 +1234,15 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
1210 nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT, 1234 nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
1211 br->multicast_last_member_count) || 1235 br->multicast_last_member_count) ||
1212 nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT, 1236 nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
1213 br->multicast_startup_query_count)) 1237 br->multicast_startup_query_count) ||
1238 nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION,
1239 br->multicast_igmp_version))
1214 return -EMSGSIZE; 1240 return -EMSGSIZE;
1215 1241#if IS_ENABLED(CONFIG_IPV6)
1242 if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION,
1243 br->multicast_mld_version))
1244 return -EMSGSIZE;
1245#endif
1216 clockval = jiffies_to_clock_t(br->multicast_last_member_interval); 1246 clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
1217 if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval, 1247 if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval,
1218 IFLA_BR_PAD)) 1248 IFLA_BR_PAD))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1b63177e0ccd..8ce621e8345c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -300,10 +300,11 @@ struct net_bridge
300 unsigned long max_age; 300 unsigned long max_age;
301 unsigned long hello_time; 301 unsigned long hello_time;
302 unsigned long forward_delay; 302 unsigned long forward_delay;
303 unsigned long bridge_max_age;
304 unsigned long ageing_time; 303 unsigned long ageing_time;
304 unsigned long bridge_max_age;
305 unsigned long bridge_hello_time; 305 unsigned long bridge_hello_time;
306 unsigned long bridge_forward_delay; 306 unsigned long bridge_forward_delay;
307 unsigned long bridge_ageing_time;
307 308
308 u8 group_addr[ETH_ALEN]; 309 u8 group_addr[ETH_ALEN];
309 bool group_addr_set; 310 bool group_addr_set;
@@ -333,6 +334,8 @@ struct net_bridge
333 u32 multicast_last_member_count; 334 u32 multicast_last_member_count;
334 u32 multicast_startup_query_count; 335 u32 multicast_startup_query_count;
335 336
337 u8 multicast_igmp_version;
338
336 unsigned long multicast_last_member_interval; 339 unsigned long multicast_last_member_interval;
337 unsigned long multicast_membership_interval; 340 unsigned long multicast_membership_interval;
338 unsigned long multicast_querier_interval; 341 unsigned long multicast_querier_interval;
@@ -353,6 +356,7 @@ struct net_bridge
353 struct bridge_mcast_other_query ip6_other_query; 356 struct bridge_mcast_other_query ip6_other_query;
354 struct bridge_mcast_own_query ip6_own_query; 357 struct bridge_mcast_own_query ip6_own_query;
355 struct bridge_mcast_querier ip6_querier; 358 struct bridge_mcast_querier ip6_querier;
359 u8 multicast_mld_version;
356#endif /* IS_ENABLED(CONFIG_IPV6) */ 360#endif /* IS_ENABLED(CONFIG_IPV6) */
357#endif 361#endif
358 362
@@ -582,6 +586,10 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
582int br_multicast_toggle(struct net_bridge *br, unsigned long val); 586int br_multicast_toggle(struct net_bridge *br, unsigned long val);
583int br_multicast_set_querier(struct net_bridge *br, unsigned long val); 587int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
584int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); 588int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
589int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
590#if IS_ENABLED(CONFIG_IPV6)
591int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
592#endif
585struct net_bridge_mdb_entry * 593struct net_bridge_mdb_entry *
586br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst); 594br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst);
587struct net_bridge_mdb_entry * 595struct net_bridge_mdb_entry *
@@ -992,6 +1000,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
992int br_set_forward_delay(struct net_bridge *br, unsigned long x); 1000int br_set_forward_delay(struct net_bridge *br, unsigned long x);
993int br_set_hello_time(struct net_bridge *br, unsigned long x); 1001int br_set_hello_time(struct net_bridge *br, unsigned long x);
994int br_set_max_age(struct net_bridge *br, unsigned long x); 1002int br_set_max_age(struct net_bridge *br, unsigned long x);
1003int __set_ageing_time(struct net_device *dev, unsigned long t);
995int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time); 1004int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time);
996 1005
997 1006
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index 2fe910c4e170..3f7543a29b76 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -61,6 +61,7 @@ void br_received_tcn_bpdu(struct net_bridge_port *p);
61void br_transmit_config(struct net_bridge_port *p); 61void br_transmit_config(struct net_bridge_port *p);
62void br_transmit_tcn(struct net_bridge *br); 62void br_transmit_tcn(struct net_bridge *br);
63void br_topology_change_detection(struct net_bridge *br); 63void br_topology_change_detection(struct net_bridge *br);
64void __br_set_topology_change(struct net_bridge *br, unsigned char val);
64 65
65/* br_stp_bpdu.c */ 66/* br_stp_bpdu.c */
66void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); 67void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 9258b8ef14ff..71fd1a4e63cc 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -234,7 +234,7 @@ static void br_record_config_timeout_values(struct net_bridge *br,
234 br->max_age = bpdu->max_age; 234 br->max_age = bpdu->max_age;
235 br->hello_time = bpdu->hello_time; 235 br->hello_time = bpdu->hello_time;
236 br->forward_delay = bpdu->forward_delay; 236 br->forward_delay = bpdu->forward_delay;
237 br->topology_change = bpdu->topology_change; 237 __br_set_topology_change(br, bpdu->topology_change);
238} 238}
239 239
240/* called under bridge lock */ 240/* called under bridge lock */
@@ -344,7 +344,7 @@ void br_topology_change_detection(struct net_bridge *br)
344 isroot ? "propagating" : "sending tcn bpdu"); 344 isroot ? "propagating" : "sending tcn bpdu");
345 345
346 if (isroot) { 346 if (isroot) {
347 br->topology_change = 1; 347 __br_set_topology_change(br, 1);
348 mod_timer(&br->topology_change_timer, jiffies 348 mod_timer(&br->topology_change_timer, jiffies
349 + br->bridge_forward_delay + br->bridge_max_age); 349 + br->bridge_forward_delay + br->bridge_max_age);
350 } else if (!br->topology_change_detected) { 350 } else if (!br->topology_change_detected) {
@@ -562,6 +562,24 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
562 562
563} 563}
564 564
565/* called under bridge lock */
566int __set_ageing_time(struct net_device *dev, unsigned long t)
567{
568 struct switchdev_attr attr = {
569 .orig_dev = dev,
570 .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
571 .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
572 .u.ageing_time = jiffies_to_clock_t(t),
573 };
574 int err;
575
576 err = switchdev_port_attr_set(dev, &attr);
577 if (err && err != -EOPNOTSUPP)
578 return err;
579
580 return 0;
581}
582
565/* Set time interval that dynamic forwarding entries live 583/* Set time interval that dynamic forwarding entries live
566 * For pure software bridge, allow values outside the 802.1 584 * For pure software bridge, allow values outside the 802.1
567 * standard specification for special cases: 585 * standard specification for special cases:
@@ -572,25 +590,52 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
572 */ 590 */
573int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time) 591int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
574{ 592{
575 struct switchdev_attr attr = {
576 .orig_dev = br->dev,
577 .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
578 .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
579 .u.ageing_time = ageing_time,
580 };
581 unsigned long t = clock_t_to_jiffies(ageing_time); 593 unsigned long t = clock_t_to_jiffies(ageing_time);
582 int err; 594 int err;
583 595
584 err = switchdev_port_attr_set(br->dev, &attr); 596 err = __set_ageing_time(br->dev, t);
585 if (err && err != -EOPNOTSUPP) 597 if (err)
586 return err; 598 return err;
587 599
600 spin_lock_bh(&br->lock);
601 br->bridge_ageing_time = t;
588 br->ageing_time = t; 602 br->ageing_time = t;
603 spin_unlock_bh(&br->lock);
604
589 mod_timer(&br->gc_timer, jiffies); 605 mod_timer(&br->gc_timer, jiffies);
590 606
591 return 0; 607 return 0;
592} 608}
593 609
610/* called under bridge lock */
611void __br_set_topology_change(struct net_bridge *br, unsigned char val)
612{
613 unsigned long t;
614 int err;
615
616 if (br->stp_enabled == BR_KERNEL_STP && br->topology_change != val) {
617 /* On topology change, set the bridge ageing time to twice the
618 * forward delay. Otherwise, restore its default ageing time.
619 */
620
621 if (val) {
622 t = 2 * br->forward_delay;
623 br_debug(br, "decreasing ageing time to %lu\n", t);
624 } else {
625 t = br->bridge_ageing_time;
626 br_debug(br, "restoring ageing time to %lu\n", t);
627 }
628
629 err = __set_ageing_time(br->dev, t);
630 if (err)
631 br_warn(br, "error offloading ageing time\n");
632 else
633 br->ageing_time = t;
634 }
635
636 br->topology_change = val;
637}
638
594void __br_set_forward_delay(struct net_bridge *br, unsigned long t) 639void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
595{ 640{
596 br->bridge_forward_delay = t; 641 br->bridge_forward_delay = t;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d8ad73b38de2..6c1e21411125 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -36,12 +36,6 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
36/* called under bridge lock */ 36/* called under bridge lock */
37void br_init_port(struct net_bridge_port *p) 37void br_init_port(struct net_bridge_port *p)
38{ 38{
39 struct switchdev_attr attr = {
40 .orig_dev = p->dev,
41 .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
42 .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
43 .u.ageing_time = jiffies_to_clock_t(p->br->ageing_time),
44 };
45 int err; 39 int err;
46 40
47 p->port_id = br_make_port_id(p->priority, p->port_no); 41 p->port_id = br_make_port_id(p->priority, p->port_no);
@@ -50,9 +44,9 @@ void br_init_port(struct net_bridge_port *p)
50 p->topology_change_ack = 0; 44 p->topology_change_ack = 0;
51 p->config_pending = 0; 45 p->config_pending = 0;
52 46
53 err = switchdev_port_attr_set(p->dev, &attr); 47 err = __set_ageing_time(p->dev, p->br->ageing_time);
54 if (err && err != -EOPNOTSUPP) 48 if (err)
55 netdev_err(p->dev, "failed to set HW ageing time\n"); 49 netdev_err(p->dev, "failed to offload ageing time\n");
56} 50}
57 51
58/* NO locks held */ 52/* NO locks held */
@@ -87,7 +81,7 @@ void br_stp_disable_bridge(struct net_bridge *br)
87 81
88 } 82 }
89 83
90 br->topology_change = 0; 84 __br_set_topology_change(br, 0);
91 br->topology_change_detected = 0; 85 br->topology_change_detected = 0;
92 spin_unlock_bh(&br->lock); 86 spin_unlock_bh(&br->lock);
93 87
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index da058b85aa22..7ddb38e0a06e 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -125,7 +125,7 @@ static void br_topology_change_timer_expired(unsigned long arg)
125 br_debug(br, "topo change timer expired\n"); 125 br_debug(br, "topo change timer expired\n");
126 spin_lock(&br->lock); 126 spin_lock(&br->lock);
127 br->topology_change_detected = 0; 127 br->topology_change_detected = 0;
128 br->topology_change = 0; 128 __br_set_topology_change(br, 0);
129 spin_unlock(&br->lock); 129 spin_unlock(&br->lock);
130} 130}
131 131
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index f88c4df3f91e..a18148213b08 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -440,6 +440,23 @@ static ssize_t hash_max_store(struct device *d, struct device_attribute *attr,
440} 440}
441static DEVICE_ATTR_RW(hash_max); 441static DEVICE_ATTR_RW(hash_max);
442 442
443static ssize_t multicast_igmp_version_show(struct device *d,
444 struct device_attribute *attr,
445 char *buf)
446{
447 struct net_bridge *br = to_bridge(d);
448
449 return sprintf(buf, "%u\n", br->multicast_igmp_version);
450}
451
452static ssize_t multicast_igmp_version_store(struct device *d,
453 struct device_attribute *attr,
454 const char *buf, size_t len)
455{
456 return store_bridge_parm(d, buf, len, br_multicast_set_igmp_version);
457}
458static DEVICE_ATTR_RW(multicast_igmp_version);
459
443static ssize_t multicast_last_member_count_show(struct device *d, 460static ssize_t multicast_last_member_count_show(struct device *d,
444 struct device_attribute *attr, 461 struct device_attribute *attr,
445 char *buf) 462 char *buf)
@@ -642,6 +659,25 @@ static ssize_t multicast_stats_enabled_store(struct device *d,
642 return store_bridge_parm(d, buf, len, set_stats_enabled); 659 return store_bridge_parm(d, buf, len, set_stats_enabled);
643} 660}
644static DEVICE_ATTR_RW(multicast_stats_enabled); 661static DEVICE_ATTR_RW(multicast_stats_enabled);
662
663#if IS_ENABLED(CONFIG_IPV6)
664static ssize_t multicast_mld_version_show(struct device *d,
665 struct device_attribute *attr,
666 char *buf)
667{
668 struct net_bridge *br = to_bridge(d);
669
670 return sprintf(buf, "%u\n", br->multicast_mld_version);
671}
672
673static ssize_t multicast_mld_version_store(struct device *d,
674 struct device_attribute *attr,
675 const char *buf, size_t len)
676{
677 return store_bridge_parm(d, buf, len, br_multicast_set_mld_version);
678}
679static DEVICE_ATTR_RW(multicast_mld_version);
680#endif
645#endif 681#endif
646#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 682#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
647static ssize_t nf_call_iptables_show( 683static ssize_t nf_call_iptables_show(
@@ -809,6 +845,10 @@ static struct attribute *bridge_attrs[] = {
809 &dev_attr_multicast_query_response_interval.attr, 845 &dev_attr_multicast_query_response_interval.attr,
810 &dev_attr_multicast_startup_query_interval.attr, 846 &dev_attr_multicast_startup_query_interval.attr,
811 &dev_attr_multicast_stats_enabled.attr, 847 &dev_attr_multicast_stats_enabled.attr,
848 &dev_attr_multicast_igmp_version.attr,
849#if IS_ENABLED(CONFIG_IPV6)
850 &dev_attr_multicast_mld_version.attr,
851#endif
812#endif 852#endif
813#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 853#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
814 &dev_attr_nf_call_iptables.attr, 854 &dev_attr_nf_call_iptables.attr,
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 9cebf47ac840..e7ef1a1ef3a6 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT
22 22
23config NF_LOG_BRIDGE 23config NF_LOG_BRIDGE
24 tristate "Bridge packet logging" 24 tristate "Bridge packet logging"
25 select NF_LOG_COMMON
25 26
26endif # NF_TABLES_BRIDGE 27endif # NF_TABLES_BRIDGE
27 28
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 070cf134a22f..5929309beaa1 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -51,7 +51,8 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par)
51 if (diptr == NULL) 51 if (diptr == NULL)
52 return EBT_DROP; 52 return EBT_DROP;
53 53
54 arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, 54 arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr,
55 (struct net_device *)xt_in(par),
55 *diptr, shp, info->mac, shp); 56 *diptr, shp, info->mac, shp);
56 57
57 return info->target; 58 return info->target;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 9a11086ba6ff..e88bd4827ac1 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -179,7 +179,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
179{ 179{
180 const struct ebt_log_info *info = par->targinfo; 180 const struct ebt_log_info *info = par->targinfo;
181 struct nf_loginfo li; 181 struct nf_loginfo li;
182 struct net *net = par->net; 182 struct net *net = xt_net(par);
183 183
184 li.type = NF_LOG_TYPE_LOG; 184 li.type = NF_LOG_TYPE_LOG;
185 li.u.log.level = info->loglevel; 185 li.u.log.level = info->loglevel;
@@ -190,11 +190,12 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
190 * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo 190 * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo
191 */ 191 */
192 if (info->bitmask & EBT_LOG_NFLOG) 192 if (info->bitmask & EBT_LOG_NFLOG)
193 nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, 193 nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb,
194 par->in, par->out, &li, "%s", info->prefix); 194 xt_in(par), xt_out(par), &li, "%s",
195 info->prefix);
195 else 196 else
196 ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, 197 ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb,
197 par->out, &li, info->prefix); 198 xt_in(par), xt_out(par), &li, info->prefix);
198 return EBT_CONTINUE; 199 return EBT_CONTINUE;
199} 200}
200 201
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 54816150608e..c1dc48686200 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -23,16 +23,16 @@ static unsigned int
23ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) 23ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
24{ 24{
25 const struct ebt_nflog_info *info = par->targinfo; 25 const struct ebt_nflog_info *info = par->targinfo;
26 struct net *net = xt_net(par);
26 struct nf_loginfo li; 27 struct nf_loginfo li;
27 struct net *net = par->net;
28 28
29 li.type = NF_LOG_TYPE_ULOG; 29 li.type = NF_LOG_TYPE_ULOG;
30 li.u.ulog.copy_len = info->len; 30 li.u.ulog.copy_len = info->len;
31 li.u.ulog.group = info->group; 31 li.u.ulog.group = info->group;
32 li.u.ulog.qthreshold = info->threshold; 32 li.u.ulog.qthreshold = info->threshold;
33 33
34 nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, 34 nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par),
35 par->out, &li, "%s", info->prefix); 35 xt_out(par), &li, "%s", info->prefix);
36 return EBT_CONTINUE; 36 return EBT_CONTINUE;
37} 37}
38 38
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 2e7c4f974340..8d2a85e0594e 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -23,12 +23,12 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
23 if (!skb_make_writable(skb, 0)) 23 if (!skb_make_writable(skb, 0))
24 return EBT_DROP; 24 return EBT_DROP;
25 25
26 if (par->hooknum != NF_BR_BROUTING) 26 if (xt_hooknum(par) != NF_BR_BROUTING)
27 /* rcu_read_lock()ed by nf_hook_thresh */ 27 /* rcu_read_lock()ed by nf_hook_thresh */
28 ether_addr_copy(eth_hdr(skb)->h_dest, 28 ether_addr_copy(eth_hdr(skb)->h_dest,
29 br_port_get_rcu(par->in)->br->dev->dev_addr); 29 br_port_get_rcu(xt_in(par))->br->dev->dev_addr);
30 else 30 else
31 ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr); 31 ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr);
32 skb->pkt_type = PACKET_HOST; 32 skb->pkt_type = PACKET_HOST;
33 return info->target; 33 return info->target;
34} 34}
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index ec94c6f1ae88..8fe36dc3aab2 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb)
53 struct nf_hook_state state; 53 struct nf_hook_state state;
54 int ret; 54 int ret;
55 55
56 nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN, 56 nf_hook_state_init(&state, NF_BR_BROUTING,
57 NFPROTO_BRIDGE, skb->dev, NULL, NULL, 57 NFPROTO_BRIDGE, skb->dev, NULL, NULL,
58 dev_net(skb->dev), NULL); 58 dev_net(skb->dev), NULL);
59 59
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f5c11bbe27db..537e3d506fc2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -23,7 +23,7 @@
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <linux/mutex.h> 24#include <linux/mutex.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <asm/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/smp.h> 27#include <linux/smp.h>
28#include <linux/cpumask.h> 28#include <linux/cpumask.h>
29#include <linux/audit.h> 29#include <linux/audit.h>
@@ -194,12 +194,8 @@ unsigned int ebt_do_table(struct sk_buff *skb,
194 const struct ebt_table_info *private; 194 const struct ebt_table_info *private;
195 struct xt_action_param acpar; 195 struct xt_action_param acpar;
196 196
197 acpar.family = NFPROTO_BRIDGE; 197 acpar.state = state;
198 acpar.net = state->net;
199 acpar.in = state->in;
200 acpar.out = state->out;
201 acpar.hotdrop = false; 198 acpar.hotdrop = false;
202 acpar.hooknum = hook;
203 199
204 read_lock_bh(&table->lock); 200 read_lock_bh(&table->lock);
205 private = table->private; 201 private = table->private;
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index 1663df598545..bd2b3c78f59b 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -24,21 +24,8 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf,
24 const struct nf_loginfo *loginfo, 24 const struct nf_loginfo *loginfo,
25 const char *prefix) 25 const char *prefix)
26{ 26{
27 switch (eth_hdr(skb)->h_proto) { 27 nf_log_l2packet(net, pf, eth_hdr(skb)->h_proto, hooknum, skb,
28 case htons(ETH_P_IP): 28 in, out, loginfo, prefix);
29 nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
30 loginfo, "%s", prefix);
31 break;
32 case htons(ETH_P_IPV6):
33 nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
34 loginfo, "%s", prefix);
35 break;
36 case htons(ETH_P_ARP):
37 case htons(ETH_P_RARP):
38 nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
39 loginfo, "%s", prefix);
40 break;
41 }
42} 29}
43 30
44static struct nf_logger nf_bridge_logger __read_mostly = { 31static struct nf_logger nf_bridge_logger __read_mostly = {
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index ad47a921b701..5974dbc1ea24 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -23,7 +23,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
23 const struct nft_pktinfo *pkt) 23 const struct nft_pktinfo *pkt)
24{ 24{
25 const struct nft_meta *priv = nft_expr_priv(expr); 25 const struct nft_meta *priv = nft_expr_priv(expr);
26 const struct net_device *in = pkt->in, *out = pkt->out; 26 const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
27 u32 *dest = &regs->data[priv->dreg]; 27 u32 *dest = &regs->data[priv->dreg];
28 const struct net_bridge_port *p; 28 const struct net_bridge_port *p;
29 29
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 4b3df6b0e3b9..206dc266ecd2 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -315,17 +315,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
315 case htons(ETH_P_IP): 315 case htons(ETH_P_IP):
316 switch (priv->type) { 316 switch (priv->type) {
317 case NFT_REJECT_ICMP_UNREACH: 317 case NFT_REJECT_ICMP_UNREACH:
318 nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, 318 nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb,
319 pkt->in, pkt->hook, 319 nft_in(pkt),
320 nft_hook(pkt),
320 priv->icmp_code); 321 priv->icmp_code);
321 break; 322 break;
322 case NFT_REJECT_TCP_RST: 323 case NFT_REJECT_TCP_RST:
323 nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb, 324 nft_reject_br_send_v4_tcp_reset(nft_net(pkt), pkt->skb,
324 pkt->in, pkt->hook); 325 nft_in(pkt),
326 nft_hook(pkt));
325 break; 327 break;
326 case NFT_REJECT_ICMPX_UNREACH: 328 case NFT_REJECT_ICMPX_UNREACH:
327 nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, 329 nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb,
328 pkt->in, pkt->hook, 330 nft_in(pkt),
331 nft_hook(pkt),
329 nft_reject_icmp_code(priv->icmp_code)); 332 nft_reject_icmp_code(priv->icmp_code));
330 break; 333 break;
331 } 334 }
@@ -333,17 +336,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
333 case htons(ETH_P_IPV6): 336 case htons(ETH_P_IPV6):
334 switch (priv->type) { 337 switch (priv->type) {
335 case NFT_REJECT_ICMP_UNREACH: 338 case NFT_REJECT_ICMP_UNREACH:
336 nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, 339 nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb,
337 pkt->in, pkt->hook, 340 nft_in(pkt),
341 nft_hook(pkt),
338 priv->icmp_code); 342 priv->icmp_code);
339 break; 343 break;
340 case NFT_REJECT_TCP_RST: 344 case NFT_REJECT_TCP_RST:
341 nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb, 345 nft_reject_br_send_v6_tcp_reset(nft_net(pkt), pkt->skb,
342 pkt->in, pkt->hook); 346 nft_in(pkt),
347 nft_hook(pkt));
343 break; 348 break;
344 case NFT_REJECT_ICMPX_UNREACH: 349 case NFT_REJECT_ICMPX_UNREACH:
345 nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, 350 nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb,
346 pkt->in, pkt->hook, 351 nft_in(pkt),
352 nft_hook(pkt),
347 nft_reject_icmpv6_code(priv->icmp_code)); 353 nft_reject_icmpv6_code(priv->icmp_code));
348 break; 354 break;
349 } 355 }
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index d730a0f68f46..2d38b6e34203 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -52,7 +52,7 @@ struct caif_net {
52 struct caif_device_entry_list caifdevs; 52 struct caif_device_entry_list caifdevs;
53}; 53};
54 54
55static int caif_net_id; 55static unsigned int caif_net_id;
56static int q_high = 50; /* Percent */ 56static int q_high = 50; /* Percent */
57 57
58struct cfcnfg *get_cfcnfg(struct net *net) 58struct cfcnfg *get_cfcnfg(struct net *net)
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index fa39fc298708..273cb07f57d8 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -390,8 +390,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
390 rcu_read_lock(); 390 rcu_read_lock();
391 391
392 if (adapt_layer == NULL) { 392 if (adapt_layer == NULL) {
393 pr_debug("link setup response but no client exist," 393 pr_debug("link setup response but no client exist, send linkdown back\n");
394 "send linkdown back\n");
395 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); 394 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
396 goto unlock; 395 goto unlock;
397 } 396 }
@@ -401,8 +400,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
401 400
402 phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid); 401 phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
403 if (phyinfo == NULL) { 402 if (phyinfo == NULL) {
404 pr_err("ERROR: Link Layer Device disappeared" 403 pr_err("ERROR: Link Layer Device disappeared while connecting\n");
405 "while connecting\n");
406 goto unlock; 404 goto unlock;
407 } 405 }
408 406
@@ -436,8 +434,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
436 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); 434 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
437 break; 435 break;
438 default: 436 default:
439 pr_err("Protocol error. Link setup response " 437 pr_err("Protocol error. Link setup response - unknown channel type\n");
440 "- unknown channel type\n");
441 goto unlock; 438 goto unlock;
442 } 439 }
443 if (!servicel) 440 if (!servicel)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 436a7537e6a9..21ac75390e3d 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -199,11 +199,11 @@ static int bcm_proc_show(struct seq_file *m, void *v)
199 199
200 seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' '); 200 seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' ');
201 201
202 if (op->kt_ival1.tv64) 202 if (op->kt_ival1)
203 seq_printf(m, "timeo=%lld ", 203 seq_printf(m, "timeo=%lld ",
204 (long long)ktime_to_us(op->kt_ival1)); 204 (long long)ktime_to_us(op->kt_ival1));
205 205
206 if (op->kt_ival2.tv64) 206 if (op->kt_ival2)
207 seq_printf(m, "thr=%lld ", 207 seq_printf(m, "thr=%lld ",
208 (long long)ktime_to_us(op->kt_ival2)); 208 (long long)ktime_to_us(op->kt_ival2));
209 209
@@ -226,11 +226,11 @@ static int bcm_proc_show(struct seq_file *m, void *v)
226 else 226 else
227 seq_printf(m, "[%u] ", op->nframes); 227 seq_printf(m, "[%u] ", op->nframes);
228 228
229 if (op->kt_ival1.tv64) 229 if (op->kt_ival1)
230 seq_printf(m, "t1=%lld ", 230 seq_printf(m, "t1=%lld ",
231 (long long)ktime_to_us(op->kt_ival1)); 231 (long long)ktime_to_us(op->kt_ival1));
232 232
233 if (op->kt_ival2.tv64) 233 if (op->kt_ival2)
234 seq_printf(m, "t2=%lld ", 234 seq_printf(m, "t2=%lld ",
235 (long long)ktime_to_us(op->kt_ival2)); 235 (long long)ktime_to_us(op->kt_ival2));
236 236
@@ -365,11 +365,11 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
365 365
366static void bcm_tx_start_timer(struct bcm_op *op) 366static void bcm_tx_start_timer(struct bcm_op *op)
367{ 367{
368 if (op->kt_ival1.tv64 && op->count) 368 if (op->kt_ival1 && op->count)
369 hrtimer_start(&op->timer, 369 hrtimer_start(&op->timer,
370 ktime_add(ktime_get(), op->kt_ival1), 370 ktime_add(ktime_get(), op->kt_ival1),
371 HRTIMER_MODE_ABS); 371 HRTIMER_MODE_ABS);
372 else if (op->kt_ival2.tv64) 372 else if (op->kt_ival2)
373 hrtimer_start(&op->timer, 373 hrtimer_start(&op->timer,
374 ktime_add(ktime_get(), op->kt_ival2), 374 ktime_add(ktime_get(), op->kt_ival2),
375 HRTIMER_MODE_ABS); 375 HRTIMER_MODE_ABS);
@@ -380,7 +380,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
380 struct bcm_op *op = (struct bcm_op *)data; 380 struct bcm_op *op = (struct bcm_op *)data;
381 struct bcm_msg_head msg_head; 381 struct bcm_msg_head msg_head;
382 382
383 if (op->kt_ival1.tv64 && (op->count > 0)) { 383 if (op->kt_ival1 && (op->count > 0)) {
384 384
385 op->count--; 385 op->count--;
386 if (!op->count && (op->flags & TX_COUNTEVT)) { 386 if (!op->count && (op->flags & TX_COUNTEVT)) {
@@ -398,7 +398,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
398 } 398 }
399 bcm_can_tx(op); 399 bcm_can_tx(op);
400 400
401 } else if (op->kt_ival2.tv64) 401 } else if (op->kt_ival2)
402 bcm_can_tx(op); 402 bcm_can_tx(op);
403 403
404 bcm_tx_start_timer(op); 404 bcm_tx_start_timer(op);
@@ -459,7 +459,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
459 lastdata->flags |= (RX_RECV|RX_THR); 459 lastdata->flags |= (RX_RECV|RX_THR);
460 460
461 /* throttling mode inactive ? */ 461 /* throttling mode inactive ? */
462 if (!op->kt_ival2.tv64) { 462 if (!op->kt_ival2) {
463 /* send RX_CHANGED to the user immediately */ 463 /* send RX_CHANGED to the user immediately */
464 bcm_rx_changed(op, lastdata); 464 bcm_rx_changed(op, lastdata);
465 return; 465 return;
@@ -470,7 +470,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
470 return; 470 return;
471 471
472 /* first reception with enabled throttling mode */ 472 /* first reception with enabled throttling mode */
473 if (!op->kt_lastmsg.tv64) 473 if (!op->kt_lastmsg)
474 goto rx_changed_settime; 474 goto rx_changed_settime;
475 475
476 /* got a second frame inside a potential throttle period? */ 476 /* got a second frame inside a potential throttle period? */
@@ -537,7 +537,7 @@ static void bcm_rx_starttimer(struct bcm_op *op)
537 if (op->flags & RX_NO_AUTOTIMER) 537 if (op->flags & RX_NO_AUTOTIMER)
538 return; 538 return;
539 539
540 if (op->kt_ival1.tv64) 540 if (op->kt_ival1)
541 hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); 541 hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
542} 542}
543 543
@@ -643,7 +643,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
643 return HRTIMER_RESTART; 643 return HRTIMER_RESTART;
644 } else { 644 } else {
645 /* rearm throttle handling */ 645 /* rearm throttle handling */
646 op->kt_lastmsg = ktime_set(0, 0); 646 op->kt_lastmsg = 0;
647 return HRTIMER_NORESTART; 647 return HRTIMER_NORESTART;
648 } 648 }
649} 649}
@@ -1005,7 +1005,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
1005 op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2); 1005 op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
1006 1006
1007 /* disable an active timer due to zero values? */ 1007 /* disable an active timer due to zero values? */
1008 if (!op->kt_ival1.tv64 && !op->kt_ival2.tv64) 1008 if (!op->kt_ival1 && !op->kt_ival2)
1009 hrtimer_cancel(&op->timer); 1009 hrtimer_cancel(&op->timer);
1010 } 1010 }
1011 1011
@@ -1189,19 +1189,19 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
1189 op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2); 1189 op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
1190 1190
1191 /* disable an active timer due to zero value? */ 1191 /* disable an active timer due to zero value? */
1192 if (!op->kt_ival1.tv64) 1192 if (!op->kt_ival1)
1193 hrtimer_cancel(&op->timer); 1193 hrtimer_cancel(&op->timer);
1194 1194
1195 /* 1195 /*
1196 * In any case cancel the throttle timer, flush 1196 * In any case cancel the throttle timer, flush
1197 * potentially blocked msgs and reset throttle handling 1197 * potentially blocked msgs and reset throttle handling
1198 */ 1198 */
1199 op->kt_lastmsg = ktime_set(0, 0); 1199 op->kt_lastmsg = 0;
1200 hrtimer_cancel(&op->thrtimer); 1200 hrtimer_cancel(&op->thrtimer);
1201 bcm_rx_thr_flush(op, 1); 1201 bcm_rx_thr_flush(op, 1);
1202 } 1202 }
1203 1203
1204 if ((op->flags & STARTTIMER) && op->kt_ival1.tv64) 1204 if ((op->flags & STARTTIMER) && op->kt_ival1)
1205 hrtimer_start(&op->timer, op->kt_ival1, 1205 hrtimer_start(&op->timer, op->kt_ival1,
1206 HRTIMER_MODE_REL); 1206 HRTIMER_MODE_REL);
1207 } 1207 }
diff --git a/net/can/gw.c b/net/can/gw.c
index 455168718c2e..a54ab0c82104 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -429,7 +429,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
429 429
430 /* clear the skb timestamp if not configured the other way */ 430 /* clear the skb timestamp if not configured the other way */
431 if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP)) 431 if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP))
432 nskb->tstamp.tv64 = 0; 432 nskb->tstamp = 0;
433 433
434 /* send to netdevice */ 434 /* send to netdevice */
435 if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO)) 435 if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO))
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index c822b3ae1bd3..48bb8d95195b 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -315,13 +315,13 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
315EXPORT_SYMBOL(ceph_auth_update_authorizer); 315EXPORT_SYMBOL(ceph_auth_update_authorizer);
316 316
317int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, 317int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
318 struct ceph_authorizer *a, size_t len) 318 struct ceph_authorizer *a)
319{ 319{
320 int ret = 0; 320 int ret = 0;
321 321
322 mutex_lock(&ac->mutex); 322 mutex_lock(&ac->mutex);
323 if (ac->ops && ac->ops->verify_authorizer_reply) 323 if (ac->ops && ac->ops->verify_authorizer_reply)
324 ret = ac->ops->verify_authorizer_reply(ac, a, len); 324 ret = ac->ops->verify_authorizer_reply(ac, a);
325 mutex_unlock(&ac->mutex); 325 mutex_unlock(&ac->mutex);
326 return ret; 326 return ret;
327} 327}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index a0905f04bd13..2034fb926670 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -39,56 +39,58 @@ static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
39 return need != 0; 39 return need != 0;
40} 40}
41 41
42static int ceph_x_encrypt_offset(void)
43{
44 return sizeof(u32) + sizeof(struct ceph_x_encrypt_header);
45}
46
42static int ceph_x_encrypt_buflen(int ilen) 47static int ceph_x_encrypt_buflen(int ilen)
43{ 48{
44 return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + 49 return ceph_x_encrypt_offset() + ilen + 16;
45 sizeof(u32);
46} 50}
47 51
48static int ceph_x_encrypt(struct ceph_crypto_key *secret, 52static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf,
49 void *ibuf, int ilen, void *obuf, size_t olen) 53 int buf_len, int plaintext_len)
50{ 54{
51 struct ceph_x_encrypt_header head = { 55 struct ceph_x_encrypt_header *hdr = buf + sizeof(u32);
52 .struct_v = 1, 56 int ciphertext_len;
53 .magic = cpu_to_le64(CEPHX_ENC_MAGIC)
54 };
55 size_t len = olen - sizeof(u32);
56 int ret; 57 int ret;
57 58
58 ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len, 59 hdr->struct_v = 1;
59 &head, sizeof(head), ibuf, ilen); 60 hdr->magic = cpu_to_le64(CEPHX_ENC_MAGIC);
61
62 ret = ceph_crypt(secret, true, buf + sizeof(u32), buf_len - sizeof(u32),
63 plaintext_len + sizeof(struct ceph_x_encrypt_header),
64 &ciphertext_len);
60 if (ret) 65 if (ret)
61 return ret; 66 return ret;
62 ceph_encode_32(&obuf, len); 67
63 return len + sizeof(u32); 68 ceph_encode_32(&buf, ciphertext_len);
69 return sizeof(u32) + ciphertext_len;
64} 70}
65 71
66static int ceph_x_decrypt(struct ceph_crypto_key *secret, 72static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
67 void **p, void *end, void **obuf, size_t olen)
68{ 73{
69 struct ceph_x_encrypt_header head; 74 struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
70 size_t head_len = sizeof(head); 75 int ciphertext_len, plaintext_len;
71 int len, ret; 76 int ret;
72
73 len = ceph_decode_32(p);
74 if (*p + len > end)
75 return -EINVAL;
76 77
77 dout("ceph_x_decrypt len %d\n", len); 78 ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
78 if (*obuf == NULL) { 79 ceph_decode_need(p, end, ciphertext_len, e_inval);
79 *obuf = kmalloc(len, GFP_NOFS);
80 if (!*obuf)
81 return -ENOMEM;
82 olen = len;
83 }
84 80
85 ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len); 81 ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
82 &plaintext_len);
86 if (ret) 83 if (ret)
87 return ret; 84 return ret;
88 if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) 85
86 if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
89 return -EPERM; 87 return -EPERM;
90 *p += len; 88
91 return olen; 89 *p += ciphertext_len;
90 return plaintext_len - sizeof(struct ceph_x_encrypt_header);
91
92e_inval:
93 return -EINVAL;
92} 94}
93 95
94/* 96/*
@@ -143,13 +145,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
143 int type; 145 int type;
144 u8 tkt_struct_v, blob_struct_v; 146 u8 tkt_struct_v, blob_struct_v;
145 struct ceph_x_ticket_handler *th; 147 struct ceph_x_ticket_handler *th;
146 void *dbuf = NULL;
147 void *dp, *dend; 148 void *dp, *dend;
148 int dlen; 149 int dlen;
149 char is_enc; 150 char is_enc;
150 struct timespec validity; 151 struct timespec validity;
151 struct ceph_crypto_key old_key;
152 void *ticket_buf = NULL;
153 void *tp, *tpend; 152 void *tp, *tpend;
154 void **ptp; 153 void **ptp;
155 struct ceph_crypto_key new_session_key; 154 struct ceph_crypto_key new_session_key;
@@ -174,20 +173,17 @@ static int process_one_ticket(struct ceph_auth_client *ac,
174 } 173 }
175 174
176 /* blob for me */ 175 /* blob for me */
177 dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0); 176 dp = *p + ceph_x_encrypt_offset();
178 if (dlen <= 0) { 177 ret = ceph_x_decrypt(secret, p, end);
179 ret = dlen; 178 if (ret < 0)
180 goto out; 179 goto out;
181 } 180 dout(" decrypted %d bytes\n", ret);
182 dout(" decrypted %d bytes\n", dlen); 181 dend = dp + ret;
183 dp = dbuf;
184 dend = dp + dlen;
185 182
186 tkt_struct_v = ceph_decode_8(&dp); 183 tkt_struct_v = ceph_decode_8(&dp);
187 if (tkt_struct_v != 1) 184 if (tkt_struct_v != 1)
188 goto bad; 185 goto bad;
189 186
190 memcpy(&old_key, &th->session_key, sizeof(old_key));
191 ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); 187 ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
192 if (ret) 188 if (ret)
193 goto out; 189 goto out;
@@ -203,15 +199,13 @@ static int process_one_ticket(struct ceph_auth_client *ac,
203 ceph_decode_8_safe(p, end, is_enc, bad); 199 ceph_decode_8_safe(p, end, is_enc, bad);
204 if (is_enc) { 200 if (is_enc) {
205 /* encrypted */ 201 /* encrypted */
206 dout(" encrypted ticket\n"); 202 tp = *p + ceph_x_encrypt_offset();
207 dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0); 203 ret = ceph_x_decrypt(&th->session_key, p, end);
208 if (dlen < 0) { 204 if (ret < 0)
209 ret = dlen;
210 goto out; 205 goto out;
211 } 206 dout(" encrypted ticket, decrypted %d bytes\n", ret);
212 tp = ticket_buf;
213 ptp = &tp; 207 ptp = &tp;
214 tpend = *ptp + dlen; 208 tpend = tp + ret;
215 } else { 209 } else {
216 /* unencrypted */ 210 /* unencrypted */
217 ptp = p; 211 ptp = p;
@@ -242,8 +236,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
242 xi->have_keys |= th->service; 236 xi->have_keys |= th->service;
243 237
244out: 238out:
245 kfree(ticket_buf);
246 kfree(dbuf);
247 return ret; 239 return ret;
248 240
249bad: 241bad:
@@ -294,7 +286,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
294{ 286{
295 int maxlen; 287 int maxlen;
296 struct ceph_x_authorize_a *msg_a; 288 struct ceph_x_authorize_a *msg_a;
297 struct ceph_x_authorize_b msg_b; 289 struct ceph_x_authorize_b *msg_b;
298 void *p, *end; 290 void *p, *end;
299 int ret; 291 int ret;
300 int ticket_blob_len = 292 int ticket_blob_len =
@@ -308,8 +300,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
308 if (ret) 300 if (ret)
309 goto out_au; 301 goto out_au;
310 302
311 maxlen = sizeof(*msg_a) + sizeof(msg_b) + 303 maxlen = sizeof(*msg_a) + ticket_blob_len +
312 ceph_x_encrypt_buflen(ticket_blob_len); 304 ceph_x_encrypt_buflen(sizeof(*msg_b));
313 dout(" need len %d\n", maxlen); 305 dout(" need len %d\n", maxlen);
314 if (au->buf && au->buf->alloc_len < maxlen) { 306 if (au->buf && au->buf->alloc_len < maxlen) {
315 ceph_buffer_put(au->buf); 307 ceph_buffer_put(au->buf);
@@ -343,18 +335,19 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
343 p += ticket_blob_len; 335 p += ticket_blob_len;
344 end = au->buf->vec.iov_base + au->buf->vec.iov_len; 336 end = au->buf->vec.iov_base + au->buf->vec.iov_len;
345 337
338 msg_b = p + ceph_x_encrypt_offset();
339 msg_b->struct_v = 1;
346 get_random_bytes(&au->nonce, sizeof(au->nonce)); 340 get_random_bytes(&au->nonce, sizeof(au->nonce));
347 msg_b.struct_v = 1; 341 msg_b->nonce = cpu_to_le64(au->nonce);
348 msg_b.nonce = cpu_to_le64(au->nonce); 342 ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
349 ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
350 p, end - p);
351 if (ret < 0) 343 if (ret < 0)
352 goto out_au; 344 goto out_au;
345
353 p += ret; 346 p += ret;
347 WARN_ON(p > end);
354 au->buf->vec.iov_len = p - au->buf->vec.iov_base; 348 au->buf->vec.iov_len = p - au->buf->vec.iov_base;
355 dout(" built authorizer nonce %llx len %d\n", au->nonce, 349 dout(" built authorizer nonce %llx len %d\n", au->nonce,
356 (int)au->buf->vec.iov_len); 350 (int)au->buf->vec.iov_len);
357 BUG_ON(au->buf->vec.iov_len > maxlen);
358 return 0; 351 return 0;
359 352
360out_au: 353out_au:
@@ -452,8 +445,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
452 if (need & CEPH_ENTITY_TYPE_AUTH) { 445 if (need & CEPH_ENTITY_TYPE_AUTH) {
453 struct ceph_x_authenticate *auth = (void *)(head + 1); 446 struct ceph_x_authenticate *auth = (void *)(head + 1);
454 void *p = auth + 1; 447 void *p = auth + 1;
455 struct ceph_x_challenge_blob tmp; 448 void *enc_buf = xi->auth_authorizer.enc_buf;
456 char tmp_enc[40]; 449 struct ceph_x_challenge_blob *blob = enc_buf +
450 ceph_x_encrypt_offset();
457 u64 *u; 451 u64 *u;
458 452
459 if (p > end) 453 if (p > end)
@@ -464,16 +458,16 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
464 458
465 /* encrypt and hash */ 459 /* encrypt and hash */
466 get_random_bytes(&auth->client_challenge, sizeof(u64)); 460 get_random_bytes(&auth->client_challenge, sizeof(u64));
467 tmp.client_challenge = auth->client_challenge; 461 blob->client_challenge = auth->client_challenge;
468 tmp.server_challenge = cpu_to_le64(xi->server_challenge); 462 blob->server_challenge = cpu_to_le64(xi->server_challenge);
469 ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp), 463 ret = ceph_x_encrypt(&xi->secret, enc_buf, CEPHX_AU_ENC_BUF_LEN,
470 tmp_enc, sizeof(tmp_enc)); 464 sizeof(*blob));
471 if (ret < 0) 465 if (ret < 0)
472 return ret; 466 return ret;
473 467
474 auth->struct_v = 1; 468 auth->struct_v = 1;
475 auth->key = 0; 469 auth->key = 0;
476 for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) 470 for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++)
477 auth->key ^= *(__le64 *)u; 471 auth->key ^= *(__le64 *)u;
478 dout(" server_challenge %llx client_challenge %llx key %llx\n", 472 dout(" server_challenge %llx client_challenge %llx key %llx\n",
479 xi->server_challenge, le64_to_cpu(auth->client_challenge), 473 xi->server_challenge, le64_to_cpu(auth->client_challenge),
@@ -600,8 +594,8 @@ static int ceph_x_create_authorizer(
600 auth->authorizer = (struct ceph_authorizer *) au; 594 auth->authorizer = (struct ceph_authorizer *) au;
601 auth->authorizer_buf = au->buf->vec.iov_base; 595 auth->authorizer_buf = au->buf->vec.iov_base;
602 auth->authorizer_buf_len = au->buf->vec.iov_len; 596 auth->authorizer_buf_len = au->buf->vec.iov_len;
603 auth->authorizer_reply_buf = au->reply_buf; 597 auth->authorizer_reply_buf = au->enc_buf;
604 auth->authorizer_reply_buf_len = sizeof (au->reply_buf); 598 auth->authorizer_reply_buf_len = CEPHX_AU_ENC_BUF_LEN;
605 auth->sign_message = ac->ops->sign_message; 599 auth->sign_message = ac->ops->sign_message;
606 auth->check_message_signature = ac->ops->check_message_signature; 600 auth->check_message_signature = ac->ops->check_message_signature;
607 601
@@ -629,27 +623,25 @@ static int ceph_x_update_authorizer(
629} 623}
630 624
631static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, 625static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
632 struct ceph_authorizer *a, size_t len) 626 struct ceph_authorizer *a)
633{ 627{
634 struct ceph_x_authorizer *au = (void *)a; 628 struct ceph_x_authorizer *au = (void *)a;
635 int ret = 0; 629 void *p = au->enc_buf;
636 struct ceph_x_authorize_reply reply; 630 struct ceph_x_authorize_reply *reply = p + ceph_x_encrypt_offset();
637 void *preply = &reply; 631 int ret;
638 void *p = au->reply_buf;
639 void *end = p + sizeof(au->reply_buf);
640 632
641 ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply)); 633 ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
642 if (ret < 0) 634 if (ret < 0)
643 return ret; 635 return ret;
644 if (ret != sizeof(reply)) 636 if (ret != sizeof(*reply))
645 return -EPERM; 637 return -EPERM;
646 638
647 if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one)) 639 if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
648 ret = -EPERM; 640 ret = -EPERM;
649 else 641 else
650 ret = 0; 642 ret = 0;
651 dout("verify_authorizer_reply nonce %llx got %llx ret %d\n", 643 dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
652 au->nonce, le64_to_cpu(reply.nonce_plus_one), ret); 644 au->nonce, le64_to_cpu(reply->nonce_plus_one), ret);
653 return ret; 645 return ret;
654} 646}
655 647
@@ -704,35 +696,48 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
704 invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH); 696 invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
705} 697}
706 698
707static int calcu_signature(struct ceph_x_authorizer *au, 699static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
708 struct ceph_msg *msg, __le64 *sig) 700 __le64 *psig)
709{ 701{
702 void *enc_buf = au->enc_buf;
703 struct {
704 __le32 len;
705 __le32 header_crc;
706 __le32 front_crc;
707 __le32 middle_crc;
708 __le32 data_crc;
709 } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
710 int ret; 710 int ret;
711 char tmp_enc[40]; 711
712 __le32 tmp[5] = { 712 sigblock->len = cpu_to_le32(4*sizeof(u32));
713 cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc, 713 sigblock->header_crc = msg->hdr.crc;
714 msg->footer.middle_crc, msg->footer.data_crc, 714 sigblock->front_crc = msg->footer.front_crc;
715 }; 715 sigblock->middle_crc = msg->footer.middle_crc;
716 ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp), 716 sigblock->data_crc = msg->footer.data_crc;
717 tmp_enc, sizeof(tmp_enc)); 717 ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
718 sizeof(*sigblock));
718 if (ret < 0) 719 if (ret < 0)
719 return ret; 720 return ret;
720 *sig = *(__le64*)(tmp_enc + 4); 721
722 *psig = *(__le64 *)(enc_buf + sizeof(u32));
721 return 0; 723 return 0;
722} 724}
723 725
724static int ceph_x_sign_message(struct ceph_auth_handshake *auth, 726static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
725 struct ceph_msg *msg) 727 struct ceph_msg *msg)
726{ 728{
729 __le64 sig;
727 int ret; 730 int ret;
728 731
729 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN)) 732 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
730 return 0; 733 return 0;
731 734
732 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, 735 ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer,
733 msg, &msg->footer.sig); 736 msg, &sig);
734 if (ret < 0) 737 if (ret)
735 return ret; 738 return ret;
739
740 msg->footer.sig = sig;
736 msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED; 741 msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED;
737 return 0; 742 return 0;
738} 743}
@@ -746,9 +751,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
746 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN)) 751 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
747 return 0; 752 return 0;
748 753
749 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, 754 ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer,
750 msg, &sig_check); 755 msg, &sig_check);
751 if (ret < 0) 756 if (ret)
752 return ret; 757 return ret;
753 if (sig_check == msg->footer.sig) 758 if (sig_check == msg->footer.sig)
754 return 0; 759 return 0;
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 21a5af904bae..48e9ad41bd2a 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -24,6 +24,7 @@ struct ceph_x_ticket_handler {
24 unsigned long renew_after, expires; 24 unsigned long renew_after, expires;
25}; 25};
26 26
27#define CEPHX_AU_ENC_BUF_LEN 128 /* big enough for encrypted blob */
27 28
28struct ceph_x_authorizer { 29struct ceph_x_authorizer {
29 struct ceph_authorizer base; 30 struct ceph_authorizer base;
@@ -32,7 +33,7 @@ struct ceph_x_authorizer {
32 unsigned int service; 33 unsigned int service;
33 u64 nonce; 34 u64 nonce;
34 u64 secret_id; 35 u64 secret_id;
35 char reply_buf[128]; /* big enough for encrypted blob */ 36 char enc_buf[CEPHX_AU_ENC_BUF_LEN] __aligned(8);
36}; 37};
37 38
38struct ceph_x_info { 39struct ceph_x_info {
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index a421e905331a..130ab407c5ec 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -17,10 +17,12 @@
17# include <linux/kernel.h> 17# include <linux/kernel.h>
18# include <linux/crush/crush.h> 18# include <linux/crush/crush.h>
19# include <linux/crush/hash.h> 19# include <linux/crush/hash.h>
20# include <linux/crush/mapper.h>
20#else 21#else
21# include "crush_compat.h" 22# include "crush_compat.h"
22# include "crush.h" 23# include "crush.h"
23# include "hash.h" 24# include "hash.h"
25# include "mapper.h"
24#endif 26#endif
25#include "crush_ln_table.h" 27#include "crush_ln_table.h"
26 28
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index db2847ac5f12..3949ce70be07 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -13,14 +13,60 @@
13#include <linux/ceph/decode.h> 13#include <linux/ceph/decode.h>
14#include "crypto.h" 14#include "crypto.h"
15 15
16/*
17 * Set ->key and ->tfm. The rest of the key should be filled in before
18 * this function is called.
19 */
20static int set_secret(struct ceph_crypto_key *key, void *buf)
21{
22 unsigned int noio_flag;
23 int ret;
24
25 key->key = NULL;
26 key->tfm = NULL;
27
28 switch (key->type) {
29 case CEPH_CRYPTO_NONE:
30 return 0; /* nothing to do */
31 case CEPH_CRYPTO_AES:
32 break;
33 default:
34 return -ENOTSUPP;
35 }
36
37 WARN_ON(!key->len);
38 key->key = kmemdup(buf, key->len, GFP_NOIO);
39 if (!key->key) {
40 ret = -ENOMEM;
41 goto fail;
42 }
43
44 /* crypto_alloc_skcipher() allocates with GFP_KERNEL */
45 noio_flag = memalloc_noio_save();
46 key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
47 memalloc_noio_restore(noio_flag);
48 if (IS_ERR(key->tfm)) {
49 ret = PTR_ERR(key->tfm);
50 key->tfm = NULL;
51 goto fail;
52 }
53
54 ret = crypto_skcipher_setkey(key->tfm, key->key, key->len);
55 if (ret)
56 goto fail;
57
58 return 0;
59
60fail:
61 ceph_crypto_key_destroy(key);
62 return ret;
63}
64
16int ceph_crypto_key_clone(struct ceph_crypto_key *dst, 65int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
17 const struct ceph_crypto_key *src) 66 const struct ceph_crypto_key *src)
18{ 67{
19 memcpy(dst, src, sizeof(struct ceph_crypto_key)); 68 memcpy(dst, src, sizeof(struct ceph_crypto_key));
20 dst->key = kmemdup(src->key, src->len, GFP_NOFS); 69 return set_secret(dst, src->key);
21 if (!dst->key)
22 return -ENOMEM;
23 return 0;
24} 70}
25 71
26int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) 72int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
@@ -37,16 +83,16 @@ int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
37 83
38int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) 84int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
39{ 85{
86 int ret;
87
40 ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad); 88 ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad);
41 key->type = ceph_decode_16(p); 89 key->type = ceph_decode_16(p);
42 ceph_decode_copy(p, &key->created, sizeof(key->created)); 90 ceph_decode_copy(p, &key->created, sizeof(key->created));
43 key->len = ceph_decode_16(p); 91 key->len = ceph_decode_16(p);
44 ceph_decode_need(p, end, key->len, bad); 92 ceph_decode_need(p, end, key->len, bad);
45 key->key = kmalloc(key->len, GFP_NOFS); 93 ret = set_secret(key, *p);
46 if (!key->key) 94 *p += key->len;
47 return -ENOMEM; 95 return ret;
48 ceph_decode_copy(p, key->key, key->len);
49 return 0;
50 96
51bad: 97bad:
52 dout("failed to decode crypto key\n"); 98 dout("failed to decode crypto key\n");
@@ -80,9 +126,14 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
80 return 0; 126 return 0;
81} 127}
82 128
83static struct crypto_skcipher *ceph_crypto_alloc_cipher(void) 129void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
84{ 130{
85 return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); 131 if (key) {
132 kfree(key->key);
133 key->key = NULL;
134 crypto_free_skcipher(key->tfm);
135 key->tfm = NULL;
136 }
86} 137}
87 138
88static const u8 *aes_iv = (u8 *)CEPH_AES_IV; 139static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
@@ -157,372 +208,82 @@ static void teardown_sgtable(struct sg_table *sgt)
157 sg_free_table(sgt); 208 sg_free_table(sgt);
158} 209}
159 210
160static int ceph_aes_encrypt(const void *key, int key_len, 211static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt,
161 void *dst, size_t *dst_len, 212 void *buf, int buf_len, int in_len, int *pout_len)
162 const void *src, size_t src_len)
163{ 213{
164 struct scatterlist sg_in[2], prealloc_sg; 214 SKCIPHER_REQUEST_ON_STACK(req, key->tfm);
165 struct sg_table sg_out; 215 struct sg_table sgt;
166 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); 216 struct scatterlist prealloc_sg;
167 SKCIPHER_REQUEST_ON_STACK(req, tfm);
168 int ret;
169 char iv[AES_BLOCK_SIZE]; 217 char iv[AES_BLOCK_SIZE];
170 size_t zero_padding = (0x10 - (src_len & 0x0f)); 218 int pad_byte = AES_BLOCK_SIZE - (in_len & (AES_BLOCK_SIZE - 1));
171 char pad[16]; 219 int crypt_len = encrypt ? in_len + pad_byte : in_len;
172
173 if (IS_ERR(tfm))
174 return PTR_ERR(tfm);
175
176 memset(pad, zero_padding, zero_padding);
177
178 *dst_len = src_len + zero_padding;
179
180 sg_init_table(sg_in, 2);
181 sg_set_buf(&sg_in[0], src, src_len);
182 sg_set_buf(&sg_in[1], pad, zero_padding);
183 ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len);
184 if (ret)
185 goto out_tfm;
186
187 crypto_skcipher_setkey((void *)tfm, key, key_len);
188 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
189
190 skcipher_request_set_tfm(req, tfm);
191 skcipher_request_set_callback(req, 0, NULL, NULL);
192 skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
193 src_len + zero_padding, iv);
194
195 /*
196 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
197 key, key_len, 1);
198 print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1,
199 src, src_len, 1);
200 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
201 pad, zero_padding, 1);
202 */
203 ret = crypto_skcipher_encrypt(req);
204 skcipher_request_zero(req);
205 if (ret < 0) {
206 pr_err("ceph_aes_crypt failed %d\n", ret);
207 goto out_sg;
208 }
209 /*
210 print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
211 dst, *dst_len, 1);
212 */
213
214out_sg:
215 teardown_sgtable(&sg_out);
216out_tfm:
217 crypto_free_skcipher(tfm);
218 return ret;
219}
220
221static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
222 size_t *dst_len,
223 const void *src1, size_t src1_len,
224 const void *src2, size_t src2_len)
225{
226 struct scatterlist sg_in[3], prealloc_sg;
227 struct sg_table sg_out;
228 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
229 SKCIPHER_REQUEST_ON_STACK(req, tfm);
230 int ret; 220 int ret;
231 char iv[AES_BLOCK_SIZE];
232 size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
233 char pad[16];
234 221
235 if (IS_ERR(tfm)) 222 WARN_ON(crypt_len > buf_len);
236 return PTR_ERR(tfm); 223 if (encrypt)
237 224 memset(buf + in_len, pad_byte, pad_byte);
238 memset(pad, zero_padding, zero_padding); 225 ret = setup_sgtable(&sgt, &prealloc_sg, buf, crypt_len);
239
240 *dst_len = src1_len + src2_len + zero_padding;
241
242 sg_init_table(sg_in, 3);
243 sg_set_buf(&sg_in[0], src1, src1_len);
244 sg_set_buf(&sg_in[1], src2, src2_len);
245 sg_set_buf(&sg_in[2], pad, zero_padding);
246 ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len);
247 if (ret) 226 if (ret)
248 goto out_tfm; 227 return ret;
249
250 crypto_skcipher_setkey((void *)tfm, key, key_len);
251 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
252
253 skcipher_request_set_tfm(req, tfm);
254 skcipher_request_set_callback(req, 0, NULL, NULL);
255 skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
256 src1_len + src2_len + zero_padding, iv);
257
258 /*
259 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
260 key, key_len, 1);
261 print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1,
262 src1, src1_len, 1);
263 print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1,
264 src2, src2_len, 1);
265 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
266 pad, zero_padding, 1);
267 */
268 ret = crypto_skcipher_encrypt(req);
269 skcipher_request_zero(req);
270 if (ret < 0) {
271 pr_err("ceph_aes_crypt2 failed %d\n", ret);
272 goto out_sg;
273 }
274 /*
275 print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
276 dst, *dst_len, 1);
277 */
278
279out_sg:
280 teardown_sgtable(&sg_out);
281out_tfm:
282 crypto_free_skcipher(tfm);
283 return ret;
284}
285
286static int ceph_aes_decrypt(const void *key, int key_len,
287 void *dst, size_t *dst_len,
288 const void *src, size_t src_len)
289{
290 struct sg_table sg_in;
291 struct scatterlist sg_out[2], prealloc_sg;
292 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
293 SKCIPHER_REQUEST_ON_STACK(req, tfm);
294 char pad[16];
295 char iv[AES_BLOCK_SIZE];
296 int ret;
297 int last_byte;
298
299 if (IS_ERR(tfm))
300 return PTR_ERR(tfm);
301
302 sg_init_table(sg_out, 2);
303 sg_set_buf(&sg_out[0], dst, *dst_len);
304 sg_set_buf(&sg_out[1], pad, sizeof(pad));
305 ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len);
306 if (ret)
307 goto out_tfm;
308 228
309 crypto_skcipher_setkey((void *)tfm, key, key_len);
310 memcpy(iv, aes_iv, AES_BLOCK_SIZE); 229 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
311 230 skcipher_request_set_tfm(req, key->tfm);
312 skcipher_request_set_tfm(req, tfm);
313 skcipher_request_set_callback(req, 0, NULL, NULL); 231 skcipher_request_set_callback(req, 0, NULL, NULL);
314 skcipher_request_set_crypt(req, sg_in.sgl, sg_out, 232 skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv);
315 src_len, iv);
316 233
317 /* 234 /*
318 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, 235 print_hex_dump(KERN_ERR, "key: ", DUMP_PREFIX_NONE, 16, 1,
319 key, key_len, 1); 236 key->key, key->len, 1);
320 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, 237 print_hex_dump(KERN_ERR, " in: ", DUMP_PREFIX_NONE, 16, 1,
321 src, src_len, 1); 238 buf, crypt_len, 1);
322 */ 239 */
323 ret = crypto_skcipher_decrypt(req); 240 if (encrypt)
324 skcipher_request_zero(req); 241 ret = crypto_skcipher_encrypt(req);
325 if (ret < 0) {
326 pr_err("ceph_aes_decrypt failed %d\n", ret);
327 goto out_sg;
328 }
329
330 if (src_len <= *dst_len)
331 last_byte = ((char *)dst)[src_len - 1];
332 else 242 else
333 last_byte = pad[src_len - *dst_len - 1]; 243 ret = crypto_skcipher_decrypt(req);
334 if (last_byte <= 16 && src_len >= last_byte) {
335 *dst_len = src_len - last_byte;
336 } else {
337 pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
338 last_byte, (int)src_len);
339 return -EPERM; /* bad padding */
340 }
341 /*
342 print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
343 dst, *dst_len, 1);
344 */
345
346out_sg:
347 teardown_sgtable(&sg_in);
348out_tfm:
349 crypto_free_skcipher(tfm);
350 return ret;
351}
352
353static int ceph_aes_decrypt2(const void *key, int key_len,
354 void *dst1, size_t *dst1_len,
355 void *dst2, size_t *dst2_len,
356 const void *src, size_t src_len)
357{
358 struct sg_table sg_in;
359 struct scatterlist sg_out[3], prealloc_sg;
360 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
361 SKCIPHER_REQUEST_ON_STACK(req, tfm);
362 char pad[16];
363 char iv[AES_BLOCK_SIZE];
364 int ret;
365 int last_byte;
366
367 if (IS_ERR(tfm))
368 return PTR_ERR(tfm);
369
370 sg_init_table(sg_out, 3);
371 sg_set_buf(&sg_out[0], dst1, *dst1_len);
372 sg_set_buf(&sg_out[1], dst2, *dst2_len);
373 sg_set_buf(&sg_out[2], pad, sizeof(pad));
374 ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len);
375 if (ret)
376 goto out_tfm;
377
378 crypto_skcipher_setkey((void *)tfm, key, key_len);
379 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
380
381 skcipher_request_set_tfm(req, tfm);
382 skcipher_request_set_callback(req, 0, NULL, NULL);
383 skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
384 src_len, iv);
385
386 /*
387 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
388 key, key_len, 1);
389 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
390 src, src_len, 1);
391 */
392 ret = crypto_skcipher_decrypt(req);
393 skcipher_request_zero(req); 244 skcipher_request_zero(req);
394 if (ret < 0) { 245 if (ret) {
395 pr_err("ceph_aes_decrypt failed %d\n", ret); 246 pr_err("%s %scrypt failed: %d\n", __func__,
396 goto out_sg; 247 encrypt ? "en" : "de", ret);
397 } 248 goto out_sgt;
398
399 if (src_len <= *dst1_len)
400 last_byte = ((char *)dst1)[src_len - 1];
401 else if (src_len <= *dst1_len + *dst2_len)
402 last_byte = ((char *)dst2)[src_len - *dst1_len - 1];
403 else
404 last_byte = pad[src_len - *dst1_len - *dst2_len - 1];
405 if (last_byte <= 16 && src_len >= last_byte) {
406 src_len -= last_byte;
407 } else {
408 pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
409 last_byte, (int)src_len);
410 return -EPERM; /* bad padding */
411 }
412
413 if (src_len < *dst1_len) {
414 *dst1_len = src_len;
415 *dst2_len = 0;
416 } else {
417 *dst2_len = src_len - *dst1_len;
418 } 249 }
419 /* 250 /*
420 print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1, 251 print_hex_dump(KERN_ERR, "out: ", DUMP_PREFIX_NONE, 16, 1,
421 dst1, *dst1_len, 1); 252 buf, crypt_len, 1);
422 print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1,
423 dst2, *dst2_len, 1);
424 */ 253 */
425 254
426out_sg: 255 if (encrypt) {
427 teardown_sgtable(&sg_in); 256 *pout_len = crypt_len;
428out_tfm: 257 } else {
429 crypto_free_skcipher(tfm); 258 pad_byte = *(char *)(buf + in_len - 1);
430 return ret; 259 if (pad_byte > 0 && pad_byte <= AES_BLOCK_SIZE &&
431} 260 in_len >= pad_byte) {
432 261 *pout_len = in_len - pad_byte;
433 262 } else {
434int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, 263 pr_err("%s got bad padding %d on in_len %d\n",
435 const void *src, size_t src_len) 264 __func__, pad_byte, in_len);
436{ 265 ret = -EPERM;
437 switch (secret->type) { 266 goto out_sgt;
438 case CEPH_CRYPTO_NONE:
439 if (*dst_len < src_len)
440 return -ERANGE;
441 memcpy(dst, src, src_len);
442 *dst_len = src_len;
443 return 0;
444
445 case CEPH_CRYPTO_AES:
446 return ceph_aes_decrypt(secret->key, secret->len, dst,
447 dst_len, src, src_len);
448
449 default:
450 return -EINVAL;
451 }
452}
453
454int ceph_decrypt2(struct ceph_crypto_key *secret,
455 void *dst1, size_t *dst1_len,
456 void *dst2, size_t *dst2_len,
457 const void *src, size_t src_len)
458{
459 size_t t;
460
461 switch (secret->type) {
462 case CEPH_CRYPTO_NONE:
463 if (*dst1_len + *dst2_len < src_len)
464 return -ERANGE;
465 t = min(*dst1_len, src_len);
466 memcpy(dst1, src, t);
467 *dst1_len = t;
468 src += t;
469 src_len -= t;
470 if (src_len) {
471 t = min(*dst2_len, src_len);
472 memcpy(dst2, src, t);
473 *dst2_len = t;
474 } 267 }
475 return 0;
476
477 case CEPH_CRYPTO_AES:
478 return ceph_aes_decrypt2(secret->key, secret->len,
479 dst1, dst1_len, dst2, dst2_len,
480 src, src_len);
481
482 default:
483 return -EINVAL;
484 } 268 }
485}
486
487int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
488 const void *src, size_t src_len)
489{
490 switch (secret->type) {
491 case CEPH_CRYPTO_NONE:
492 if (*dst_len < src_len)
493 return -ERANGE;
494 memcpy(dst, src, src_len);
495 *dst_len = src_len;
496 return 0;
497 269
498 case CEPH_CRYPTO_AES: 270out_sgt:
499 return ceph_aes_encrypt(secret->key, secret->len, dst, 271 teardown_sgtable(&sgt);
500 dst_len, src, src_len); 272 return ret;
501
502 default:
503 return -EINVAL;
504 }
505} 273}
506 274
507int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, 275int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt,
508 const void *src1, size_t src1_len, 276 void *buf, int buf_len, int in_len, int *pout_len)
509 const void *src2, size_t src2_len)
510{ 277{
511 switch (secret->type) { 278 switch (key->type) {
512 case CEPH_CRYPTO_NONE: 279 case CEPH_CRYPTO_NONE:
513 if (*dst_len < src1_len + src2_len) 280 *pout_len = in_len;
514 return -ERANGE;
515 memcpy(dst, src1, src1_len);
516 memcpy(dst + src1_len, src2, src2_len);
517 *dst_len = src1_len + src2_len;
518 return 0; 281 return 0;
519
520 case CEPH_CRYPTO_AES: 282 case CEPH_CRYPTO_AES:
521 return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len, 283 return ceph_aes_crypt(key, encrypt, buf, buf_len, in_len,
522 src1, src1_len, src2, src2_len); 284 pout_len);
523
524 default: 285 default:
525 return -EINVAL; 286 return -ENOTSUPP;
526 } 287 }
527} 288}
528 289
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 2e9cab09f37b..58d83aa7740f 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -12,37 +12,19 @@ struct ceph_crypto_key {
12 struct ceph_timespec created; 12 struct ceph_timespec created;
13 int len; 13 int len;
14 void *key; 14 void *key;
15 struct crypto_skcipher *tfm;
15}; 16};
16 17
17static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
18{
19 if (key) {
20 kfree(key->key);
21 key->key = NULL;
22 }
23}
24
25int ceph_crypto_key_clone(struct ceph_crypto_key *dst, 18int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
26 const struct ceph_crypto_key *src); 19 const struct ceph_crypto_key *src);
27int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end); 20int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end);
28int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end); 21int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end);
29int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); 22int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
23void ceph_crypto_key_destroy(struct ceph_crypto_key *key);
30 24
31/* crypto.c */ 25/* crypto.c */
32int ceph_decrypt(struct ceph_crypto_key *secret, 26int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt,
33 void *dst, size_t *dst_len, 27 void *buf, int buf_len, int in_len, int *pout_len);
34 const void *src, size_t src_len);
35int ceph_encrypt(struct ceph_crypto_key *secret,
36 void *dst, size_t *dst_len,
37 const void *src, size_t src_len);
38int ceph_decrypt2(struct ceph_crypto_key *secret,
39 void *dst1, size_t *dst1_len,
40 void *dst2, size_t *dst2_len,
41 const void *src, size_t src_len);
42int ceph_encrypt2(struct ceph_crypto_key *secret,
43 void *dst, size_t *dst_len,
44 const void *src1, size_t src1_len,
45 const void *src2, size_t src2_len);
46int ceph_crypto_init(void); 28int ceph_crypto_init(void);
47void ceph_crypto_shutdown(void); 29void ceph_crypto_shutdown(void);
48 30
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a5502898ea33..770c52701efa 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1393,15 +1393,9 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection
1393 return NULL; 1393 return NULL;
1394 } 1394 }
1395 1395
1396 /* Can't hold the mutex while getting authorizer */
1397 mutex_unlock(&con->mutex);
1398 auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); 1396 auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
1399 mutex_lock(&con->mutex);
1400
1401 if (IS_ERR(auth)) 1397 if (IS_ERR(auth))
1402 return auth; 1398 return auth;
1403 if (con->state != CON_STATE_NEGOTIATING)
1404 return ERR_PTR(-EAGAIN);
1405 1399
1406 con->auth_reply_buf = auth->authorizer_reply_buf; 1400 con->auth_reply_buf = auth->authorizer_reply_buf;
1407 con->auth_reply_buf_len = auth->authorizer_reply_buf_len; 1401 con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
@@ -2027,6 +2021,19 @@ static int process_connect(struct ceph_connection *con)
2027 2021
2028 dout("process_connect on %p tag %d\n", con, (int)con->in_tag); 2022 dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
2029 2023
2024 if (con->auth_reply_buf) {
2025 /*
2026 * Any connection that defines ->get_authorizer()
2027 * should also define ->verify_authorizer_reply().
2028 * See get_connect_authorizer().
2029 */
2030 ret = con->ops->verify_authorizer_reply(con);
2031 if (ret < 0) {
2032 con->error_msg = "bad authorize reply";
2033 return ret;
2034 }
2035 }
2036
2030 switch (con->in_reply.tag) { 2037 switch (con->in_reply.tag) {
2031 case CEPH_MSGR_TAG_FEATURES: 2038 case CEPH_MSGR_TAG_FEATURES:
2032 pr_err("%s%lld %s feature set mismatch," 2039 pr_err("%s%lld %s feature set mismatch,"
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index a8effc8b7280..29a0ef351c5e 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1028,21 +1028,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
1028 err = -ENOMEM; 1028 err = -ENOMEM;
1029 monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, 1029 monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK,
1030 sizeof(struct ceph_mon_subscribe_ack), 1030 sizeof(struct ceph_mon_subscribe_ack),
1031 GFP_NOFS, true); 1031 GFP_KERNEL, true);
1032 if (!monc->m_subscribe_ack) 1032 if (!monc->m_subscribe_ack)
1033 goto out_auth; 1033 goto out_auth;
1034 1034
1035 monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 128, GFP_NOFS, 1035 monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 128,
1036 true); 1036 GFP_KERNEL, true);
1037 if (!monc->m_subscribe) 1037 if (!monc->m_subscribe)
1038 goto out_subscribe_ack; 1038 goto out_subscribe_ack;
1039 1039
1040 monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS, 1040 monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096,
1041 true); 1041 GFP_KERNEL, true);
1042 if (!monc->m_auth_reply) 1042 if (!monc->m_auth_reply)
1043 goto out_subscribe; 1043 goto out_subscribe;
1044 1044
1045 monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS, true); 1045 monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_KERNEL, true);
1046 monc->pending_auth = 0; 1046 monc->pending_auth = 0;
1047 if (!monc->m_auth) 1047 if (!monc->m_auth)
1048 goto out_auth_reply; 1048 goto out_auth_reply;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e6ae15bc41b7..842f049abb86 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -460,7 +460,7 @@ static void request_init(struct ceph_osd_request *req)
460 460
461 kref_init(&req->r_kref); 461 kref_init(&req->r_kref);
462 init_completion(&req->r_completion); 462 init_completion(&req->r_completion);
463 init_completion(&req->r_safe_completion); 463 init_completion(&req->r_done_completion);
464 RB_CLEAR_NODE(&req->r_node); 464 RB_CLEAR_NODE(&req->r_node);
465 RB_CLEAR_NODE(&req->r_mc_node); 465 RB_CLEAR_NODE(&req->r_mc_node);
466 INIT_LIST_HEAD(&req->r_unsafe_item); 466 INIT_LIST_HEAD(&req->r_unsafe_item);
@@ -1725,7 +1725,7 @@ static void submit_request(struct ceph_osd_request *req, bool wrlocked)
1725 __submit_request(req, wrlocked); 1725 __submit_request(req, wrlocked);
1726} 1726}
1727 1727
1728static void __finish_request(struct ceph_osd_request *req) 1728static void finish_request(struct ceph_osd_request *req)
1729{ 1729{
1730 struct ceph_osd_client *osdc = req->r_osdc; 1730 struct ceph_osd_client *osdc = req->r_osdc;
1731 struct ceph_osd *osd = req->r_osd; 1731 struct ceph_osd *osd = req->r_osd;
@@ -1747,12 +1747,6 @@ static void __finish_request(struct ceph_osd_request *req)
1747 ceph_msg_revoke_incoming(req->r_reply); 1747 ceph_msg_revoke_incoming(req->r_reply);
1748} 1748}
1749 1749
1750static void finish_request(struct ceph_osd_request *req)
1751{
1752 __finish_request(req);
1753 ceph_osdc_put_request(req);
1754}
1755
1756static void __complete_request(struct ceph_osd_request *req) 1750static void __complete_request(struct ceph_osd_request *req)
1757{ 1751{
1758 if (req->r_callback) 1752 if (req->r_callback)
@@ -1770,9 +1764,9 @@ static void complete_request(struct ceph_osd_request *req, int err)
1770 dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); 1764 dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err);
1771 1765
1772 req->r_result = err; 1766 req->r_result = err;
1773 __finish_request(req); 1767 finish_request(req);
1774 __complete_request(req); 1768 __complete_request(req);
1775 complete_all(&req->r_safe_completion); 1769 complete_all(&req->r_done_completion);
1776 ceph_osdc_put_request(req); 1770 ceph_osdc_put_request(req);
1777} 1771}
1778 1772
@@ -1798,6 +1792,8 @@ static void cancel_request(struct ceph_osd_request *req)
1798 1792
1799 cancel_map_check(req); 1793 cancel_map_check(req);
1800 finish_request(req); 1794 finish_request(req);
1795 complete_all(&req->r_done_completion);
1796 ceph_osdc_put_request(req);
1801} 1797}
1802 1798
1803static void check_pool_dne(struct ceph_osd_request *req) 1799static void check_pool_dne(struct ceph_osd_request *req)
@@ -2808,12 +2804,12 @@ static bool done_request(const struct ceph_osd_request *req,
2808 * ->r_unsafe_callback is set? yes no 2804 * ->r_unsafe_callback is set? yes no
2809 * 2805 *
2810 * first reply is OK (needed r_cb/r_completion, r_cb/r_completion, 2806 * first reply is OK (needed r_cb/r_completion, r_cb/r_completion,
2811 * any or needed/got safe) r_safe_completion r_safe_completion 2807 * any or needed/got safe) r_done_completion r_done_completion
2812 * 2808 *
2813 * first reply is unsafe r_unsafe_cb(true) (nothing) 2809 * first reply is unsafe r_unsafe_cb(true) (nothing)
2814 * 2810 *
2815 * when we get the safe reply r_unsafe_cb(false), r_cb/r_completion, 2811 * when we get the safe reply r_unsafe_cb(false), r_cb/r_completion,
2816 * r_safe_completion r_safe_completion 2812 * r_done_completion r_done_completion
2817 */ 2813 */
2818static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) 2814static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
2819{ 2815{
@@ -2915,7 +2911,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
2915 } 2911 }
2916 2912
2917 if (done_request(req, &m)) { 2913 if (done_request(req, &m)) {
2918 __finish_request(req); 2914 finish_request(req);
2919 if (req->r_linger) { 2915 if (req->r_linger) {
2920 WARN_ON(req->r_unsafe_callback); 2916 WARN_ON(req->r_unsafe_callback);
2921 dout("req %p tid %llu cb (locked)\n", req, req->r_tid); 2917 dout("req %p tid %llu cb (locked)\n", req, req->r_tid);
@@ -2934,8 +2930,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
2934 dout("req %p tid %llu cb\n", req, req->r_tid); 2930 dout("req %p tid %llu cb\n", req, req->r_tid);
2935 __complete_request(req); 2931 __complete_request(req);
2936 } 2932 }
2937 if (m.flags & CEPH_OSD_FLAG_ONDISK) 2933 complete_all(&req->r_done_completion);
2938 complete_all(&req->r_safe_completion);
2939 ceph_osdc_put_request(req); 2934 ceph_osdc_put_request(req);
2940 } else { 2935 } else {
2941 if (req->r_unsafe_callback) { 2936 if (req->r_unsafe_callback) {
@@ -3471,9 +3466,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
3471EXPORT_SYMBOL(ceph_osdc_start_request); 3466EXPORT_SYMBOL(ceph_osdc_start_request);
3472 3467
3473/* 3468/*
3474 * Unregister a registered request. The request is not completed (i.e. 3469 * Unregister a registered request. The request is not completed:
3475 * no callbacks or wakeups) - higher layers are supposed to know what 3470 * ->r_result isn't set and __complete_request() isn't called.
3476 * they are canceling.
3477 */ 3471 */
3478void ceph_osdc_cancel_request(struct ceph_osd_request *req) 3472void ceph_osdc_cancel_request(struct ceph_osd_request *req)
3479{ 3473{
@@ -3500,9 +3494,6 @@ static int wait_request_timeout(struct ceph_osd_request *req,
3500 if (left <= 0) { 3494 if (left <= 0) {
3501 left = left ?: -ETIMEDOUT; 3495 left = left ?: -ETIMEDOUT;
3502 ceph_osdc_cancel_request(req); 3496 ceph_osdc_cancel_request(req);
3503
3504 /* kludge - need to to wake ceph_osdc_sync() */
3505 complete_all(&req->r_safe_completion);
3506 } else { 3497 } else {
3507 left = req->r_result; /* completed */ 3498 left = req->r_result; /* completed */
3508 } 3499 }
@@ -3549,7 +3540,7 @@ again:
3549 up_read(&osdc->lock); 3540 up_read(&osdc->lock);
3550 dout("%s waiting on req %p tid %llu last_tid %llu\n", 3541 dout("%s waiting on req %p tid %llu last_tid %llu\n",
3551 __func__, req, req->r_tid, last_tid); 3542 __func__, req, req->r_tid, last_tid);
3552 wait_for_completion(&req->r_safe_completion); 3543 wait_for_completion(&req->r_done_completion);
3553 ceph_osdc_put_request(req); 3544 ceph_osdc_put_request(req);
3554 goto again; 3545 goto again;
3555 } 3546 }
@@ -4478,13 +4469,13 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
4478} 4469}
4479 4470
4480 4471
4481static int verify_authorizer_reply(struct ceph_connection *con, int len) 4472static int verify_authorizer_reply(struct ceph_connection *con)
4482{ 4473{
4483 struct ceph_osd *o = con->private; 4474 struct ceph_osd *o = con->private;
4484 struct ceph_osd_client *osdc = o->o_osdc; 4475 struct ceph_osd_client *osdc = o->o_osdc;
4485 struct ceph_auth_client *ac = osdc->client->monc.auth; 4476 struct ceph_auth_client *ac = osdc->client->monc.auth;
4486 4477
4487 return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len); 4478 return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer);
4488} 4479}
4489 4480
4490static int invalidate_authorizer(struct ceph_connection *con) 4481static int invalidate_authorizer(struct ceph_connection *con)
diff --git a/net/compat.c b/net/compat.c
index 1cd2ec046164..96c544b05b15 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -28,7 +28,7 @@
28#include <net/sock.h> 28#include <net/sock.h>
29#include <net/ip.h> 29#include <net/ip.h>
30#include <net/ipv6.h> 30#include <net/ipv6.h>
31#include <asm/uaccess.h> 31#include <linux/uaccess.h>
32#include <net/compat.h> 32#include <net/compat.h>
33 33
34int get_compat_msghdr(struct msghdr *kmsg, 34int get_compat_msghdr(struct msghdr *kmsg,
diff --git a/net/core/Makefile b/net/core/Makefile
index d6508c2ddca5..f6761b6e3b29 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o 24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o 25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
26obj-$(CONFIG_LWTUNNEL) += lwtunnel.o 26obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
27obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
27obj-$(CONFIG_DST_CACHE) += dst_cache.o 28obj-$(CONFIG_DST_CACHE) += dst_cache.o
28obj-$(CONFIG_HWBM) += hwbm.o 29obj-$(CONFIG_HWBM) += hwbm.o
29obj-$(CONFIG_NET_DEVLINK) += devlink.o 30obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b7de71f8d5d3..662bea587165 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -36,7 +36,7 @@
36#include <linux/module.h> 36#include <linux/module.h>
37#include <linux/types.h> 37#include <linux/types.h>
38#include <linux/kernel.h> 38#include <linux/kernel.h>
39#include <asm/uaccess.h> 39#include <linux/uaccess.h>
40#include <linux/mm.h> 40#include <linux/mm.h>
41#include <linux/interrupt.h> 41#include <linux/interrupt.h>
42#include <linux/errno.h> 42#include <linux/errno.h>
@@ -165,6 +165,7 @@ done:
165 * __skb_try_recv_datagram - Receive a datagram skbuff 165 * __skb_try_recv_datagram - Receive a datagram skbuff
166 * @sk: socket 166 * @sk: socket
167 * @flags: MSG_ flags 167 * @flags: MSG_ flags
168 * @destructor: invoked under the receive lock on successful dequeue
168 * @peeked: returns non-zero if this packet has been seen before 169 * @peeked: returns non-zero if this packet has been seen before
169 * @off: an offset in bytes to peek skb from. Returns an offset 170 * @off: an offset in bytes to peek skb from. Returns an offset
170 * within an skb where data actually starts 171 * within an skb where data actually starts
@@ -197,6 +198,8 @@ done:
197 * the standard around please. 198 * the standard around please.
198 */ 199 */
199struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, 200struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
201 void (*destructor)(struct sock *sk,
202 struct sk_buff *skb),
200 int *peeked, int *off, int *err, 203 int *peeked, int *off, int *err,
201 struct sk_buff **last) 204 struct sk_buff **last)
202{ 205{
@@ -211,6 +214,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
211 if (error) 214 if (error)
212 goto no_packet; 215 goto no_packet;
213 216
217 *peeked = 0;
214 do { 218 do {
215 /* Again only user level code calls this function, so nothing 219 /* Again only user level code calls this function, so nothing
216 * interrupt level will suddenly eat the receive_queue. 220 * interrupt level will suddenly eat the receive_queue.
@@ -224,26 +228,28 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
224 spin_lock_irqsave(&queue->lock, cpu_flags); 228 spin_lock_irqsave(&queue->lock, cpu_flags);
225 skb_queue_walk(queue, skb) { 229 skb_queue_walk(queue, skb) {
226 *last = skb; 230 *last = skb;
227 *peeked = skb->peeked;
228 if (flags & MSG_PEEK) { 231 if (flags & MSG_PEEK) {
229 if (_off >= skb->len && (skb->len || _off || 232 if (_off >= skb->len && (skb->len || _off ||
230 skb->peeked)) { 233 skb->peeked)) {
231 _off -= skb->len; 234 _off -= skb->len;
232 continue; 235 continue;
233 } 236 }
234 237 if (!skb->len) {
235 skb = skb_set_peeked(skb); 238 skb = skb_set_peeked(skb);
236 error = PTR_ERR(skb); 239 if (IS_ERR(skb)) {
237 if (IS_ERR(skb)) { 240 error = PTR_ERR(skb);
238 spin_unlock_irqrestore(&queue->lock, 241 spin_unlock_irqrestore(&queue->lock,
239 cpu_flags); 242 cpu_flags);
240 goto no_packet; 243 goto no_packet;
244 }
241 } 245 }
242 246 *peeked = 1;
243 atomic_inc(&skb->users); 247 atomic_inc(&skb->users);
244 } else 248 } else {
245 __skb_unlink(skb, queue); 249 __skb_unlink(skb, queue);
246 250 if (destructor)
251 destructor(sk, skb);
252 }
247 spin_unlock_irqrestore(&queue->lock, cpu_flags); 253 spin_unlock_irqrestore(&queue->lock, cpu_flags);
248 *off = _off; 254 *off = _off;
249 return skb; 255 return skb;
@@ -262,6 +268,8 @@ no_packet:
262EXPORT_SYMBOL(__skb_try_recv_datagram); 268EXPORT_SYMBOL(__skb_try_recv_datagram);
263 269
264struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, 270struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
271 void (*destructor)(struct sock *sk,
272 struct sk_buff *skb),
265 int *peeked, int *off, int *err) 273 int *peeked, int *off, int *err)
266{ 274{
267 struct sk_buff *skb, *last; 275 struct sk_buff *skb, *last;
@@ -270,8 +278,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
270 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 278 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
271 279
272 do { 280 do {
273 skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, 281 skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
274 &last); 282 off, err, &last);
275 if (skb) 283 if (skb)
276 return skb; 284 return skb;
277 285
@@ -290,7 +298,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
290 int peeked, off = 0; 298 int peeked, off = 0;
291 299
292 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 300 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
293 &peeked, &off, err); 301 NULL, &peeked, &off, err);
294} 302}
295EXPORT_SYMBOL(skb_recv_datagram); 303EXPORT_SYMBOL(skb_recv_datagram);
296 304
@@ -323,6 +331,27 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
323} 331}
324EXPORT_SYMBOL(__skb_free_datagram_locked); 332EXPORT_SYMBOL(__skb_free_datagram_locked);
325 333
334int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
335 unsigned int flags)
336{
337 int err = 0;
338
339 if (flags & MSG_PEEK) {
340 err = -ENOENT;
341 spin_lock_bh(&sk->sk_receive_queue.lock);
342 if (skb == skb_peek(&sk->sk_receive_queue)) {
343 __skb_unlink(skb, &sk->sk_receive_queue);
344 atomic_dec(&skb->users);
345 err = 0;
346 }
347 spin_unlock_bh(&sk->sk_receive_queue.lock);
348 }
349
350 atomic_inc(&sk->sk_drops);
351 return err;
352}
353EXPORT_SYMBOL(__sk_queue_drop_skb);
354
326/** 355/**
327 * skb_kill_datagram - Free a datagram skbuff forcibly 356 * skb_kill_datagram - Free a datagram skbuff forcibly
328 * @sk: socket 357 * @sk: socket
@@ -346,23 +375,10 @@ EXPORT_SYMBOL(__skb_free_datagram_locked);
346 375
347int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) 376int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
348{ 377{
349 int err = 0; 378 int err = __sk_queue_drop_skb(sk, skb, flags);
350
351 if (flags & MSG_PEEK) {
352 err = -ENOENT;
353 spin_lock_bh(&sk->sk_receive_queue.lock);
354 if (skb == skb_peek(&sk->sk_receive_queue)) {
355 __skb_unlink(skb, &sk->sk_receive_queue);
356 atomic_dec(&skb->users);
357 err = 0;
358 }
359 spin_unlock_bh(&sk->sk_receive_queue.lock);
360 }
361 379
362 kfree_skb(skb); 380 kfree_skb(skb);
363 atomic_inc(&sk->sk_drops);
364 sk_mem_reclaim_partial(sk); 381 sk_mem_reclaim_partial(sk);
365
366 return err; 382 return err;
367} 383}
368EXPORT_SYMBOL(skb_kill_datagram); 384EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6666b28b6815..8db5a0b4b520 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -72,7 +72,7 @@
72 * - netif_rx() feedback 72 * - netif_rx() feedback
73 */ 73 */
74 74
75#include <asm/uaccess.h> 75#include <linux/uaccess.h>
76#include <linux/bitops.h> 76#include <linux/bitops.h>
77#include <linux/capability.h> 77#include <linux/capability.h>
78#include <linux/cpu.h> 78#include <linux/cpu.h>
@@ -139,7 +139,6 @@
139#include <linux/errqueue.h> 139#include <linux/errqueue.h>
140#include <linux/hrtimer.h> 140#include <linux/hrtimer.h>
141#include <linux/netfilter_ingress.h> 141#include <linux/netfilter_ingress.h>
142#include <linux/sctp.h>
143#include <linux/crash_dump.h> 142#include <linux/crash_dump.h>
144 143
145#include "net-sysfs.h" 144#include "net-sysfs.h"
@@ -1732,14 +1731,14 @@ EXPORT_SYMBOL(net_disable_timestamp);
1732 1731
1733static inline void net_timestamp_set(struct sk_buff *skb) 1732static inline void net_timestamp_set(struct sk_buff *skb)
1734{ 1733{
1735 skb->tstamp.tv64 = 0; 1734 skb->tstamp = 0;
1736 if (static_key_false(&netstamp_needed)) 1735 if (static_key_false(&netstamp_needed))
1737 __net_timestamp(skb); 1736 __net_timestamp(skb);
1738} 1737}
1739 1738
1740#define net_timestamp_check(COND, SKB) \ 1739#define net_timestamp_check(COND, SKB) \
1741 if (static_key_false(&netstamp_needed)) { \ 1740 if (static_key_false(&netstamp_needed)) { \
1742 if ((COND) && !(SKB)->tstamp.tv64) \ 1741 if ((COND) && !(SKB)->tstamp) \
1743 __net_timestamp(SKB); \ 1742 __net_timestamp(SKB); \
1744 } \ 1743 } \
1745 1744
@@ -1944,37 +1943,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1944 } 1943 }
1945} 1944}
1946 1945
1946int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
1947{
1948 if (dev->num_tc) {
1949 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1950 int i;
1951
1952 for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
1953 if ((txq - tc->offset) < tc->count)
1954 return i;
1955 }
1956
1957 return -1;
1958 }
1959
1960 return 0;
1961}
1962
1947#ifdef CONFIG_XPS 1963#ifdef CONFIG_XPS
1948static DEFINE_MUTEX(xps_map_mutex); 1964static DEFINE_MUTEX(xps_map_mutex);
1949#define xmap_dereference(P) \ 1965#define xmap_dereference(P) \
1950 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) 1966 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1951 1967
1952static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, 1968static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
1953 int cpu, u16 index) 1969 int tci, u16 index)
1954{ 1970{
1955 struct xps_map *map = NULL; 1971 struct xps_map *map = NULL;
1956 int pos; 1972 int pos;
1957 1973
1958 if (dev_maps) 1974 if (dev_maps)
1959 map = xmap_dereference(dev_maps->cpu_map[cpu]); 1975 map = xmap_dereference(dev_maps->cpu_map[tci]);
1976 if (!map)
1977 return false;
1960 1978
1961 for (pos = 0; map && pos < map->len; pos++) { 1979 for (pos = map->len; pos--;) {
1962 if (map->queues[pos] == index) { 1980 if (map->queues[pos] != index)
1963 if (map->len > 1) { 1981 continue;
1964 map->queues[pos] = map->queues[--map->len]; 1982
1965 } else { 1983 if (map->len > 1) {
1966 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); 1984 map->queues[pos] = map->queues[--map->len];
1967 kfree_rcu(map, rcu);
1968 map = NULL;
1969 }
1970 break; 1985 break;
1971 } 1986 }
1987
1988 RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
1989 kfree_rcu(map, rcu);
1990 return false;
1991 }
1992
1993 return true;
1994}
1995
1996static bool remove_xps_queue_cpu(struct net_device *dev,
1997 struct xps_dev_maps *dev_maps,
1998 int cpu, u16 offset, u16 count)
1999{
2000 int num_tc = dev->num_tc ? : 1;
2001 bool active = false;
2002 int tci;
2003
2004 for (tci = cpu * num_tc; num_tc--; tci++) {
2005 int i, j;
2006
2007 for (i = count, j = offset; i--; j++) {
2008 if (!remove_xps_queue(dev_maps, cpu, j))
2009 break;
2010 }
2011
2012 active |= i < 0;
1972 } 2013 }
1973 2014
1974 return map; 2015 return active;
1975} 2016}
1976 2017
1977static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) 2018static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
2019 u16 count)
1978{ 2020{
1979 struct xps_dev_maps *dev_maps; 2021 struct xps_dev_maps *dev_maps;
1980 int cpu, i; 2022 int cpu, i;
@@ -1986,21 +2028,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1986 if (!dev_maps) 2028 if (!dev_maps)
1987 goto out_no_maps; 2029 goto out_no_maps;
1988 2030
1989 for_each_possible_cpu(cpu) { 2031 for_each_possible_cpu(cpu)
1990 for (i = index; i < dev->num_tx_queues; i++) { 2032 active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
1991 if (!remove_xps_queue(dev_maps, cpu, i)) 2033 offset, count);
1992 break;
1993 }
1994 if (i == dev->num_tx_queues)
1995 active = true;
1996 }
1997 2034
1998 if (!active) { 2035 if (!active) {
1999 RCU_INIT_POINTER(dev->xps_maps, NULL); 2036 RCU_INIT_POINTER(dev->xps_maps, NULL);
2000 kfree_rcu(dev_maps, rcu); 2037 kfree_rcu(dev_maps, rcu);
2001 } 2038 }
2002 2039
2003 for (i = index; i < dev->num_tx_queues; i++) 2040 for (i = offset + (count - 1); count--; i--)
2004 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), 2041 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
2005 NUMA_NO_NODE); 2042 NUMA_NO_NODE);
2006 2043
@@ -2008,6 +2045,11 @@ out_no_maps:
2008 mutex_unlock(&xps_map_mutex); 2045 mutex_unlock(&xps_map_mutex);
2009} 2046}
2010 2047
2048static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
2049{
2050 netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
2051}
2052
2011static struct xps_map *expand_xps_map(struct xps_map *map, 2053static struct xps_map *expand_xps_map(struct xps_map *map,
2012 int cpu, u16 index) 2054 int cpu, u16 index)
2013{ 2055{
@@ -2047,20 +2089,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2047 u16 index) 2089 u16 index)
2048{ 2090{
2049 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; 2091 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
2092 int i, cpu, tci, numa_node_id = -2;
2093 int maps_sz, num_tc = 1, tc = 0;
2050 struct xps_map *map, *new_map; 2094 struct xps_map *map, *new_map;
2051 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
2052 int cpu, numa_node_id = -2;
2053 bool active = false; 2095 bool active = false;
2054 2096
2097 if (dev->num_tc) {
2098 num_tc = dev->num_tc;
2099 tc = netdev_txq_to_tc(dev, index);
2100 if (tc < 0)
2101 return -EINVAL;
2102 }
2103
2104 maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
2105 if (maps_sz < L1_CACHE_BYTES)
2106 maps_sz = L1_CACHE_BYTES;
2107
2055 mutex_lock(&xps_map_mutex); 2108 mutex_lock(&xps_map_mutex);
2056 2109
2057 dev_maps = xmap_dereference(dev->xps_maps); 2110 dev_maps = xmap_dereference(dev->xps_maps);
2058 2111
2059 /* allocate memory for queue storage */ 2112 /* allocate memory for queue storage */
2060 for_each_online_cpu(cpu) { 2113 for_each_cpu_and(cpu, cpu_online_mask, mask) {
2061 if (!cpumask_test_cpu(cpu, mask))
2062 continue;
2063
2064 if (!new_dev_maps) 2114 if (!new_dev_maps)
2065 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); 2115 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
2066 if (!new_dev_maps) { 2116 if (!new_dev_maps) {
@@ -2068,25 +2118,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2068 return -ENOMEM; 2118 return -ENOMEM;
2069 } 2119 }
2070 2120
2071 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : 2121 tci = cpu * num_tc + tc;
2122 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
2072 NULL; 2123 NULL;
2073 2124
2074 map = expand_xps_map(map, cpu, index); 2125 map = expand_xps_map(map, cpu, index);
2075 if (!map) 2126 if (!map)
2076 goto error; 2127 goto error;
2077 2128
2078 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); 2129 RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
2079 } 2130 }
2080 2131
2081 if (!new_dev_maps) 2132 if (!new_dev_maps)
2082 goto out_no_new_maps; 2133 goto out_no_new_maps;
2083 2134
2084 for_each_possible_cpu(cpu) { 2135 for_each_possible_cpu(cpu) {
2136 /* copy maps belonging to foreign traffic classes */
2137 for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
2138 /* fill in the new device map from the old device map */
2139 map = xmap_dereference(dev_maps->cpu_map[tci]);
2140 RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
2141 }
2142
2143 /* We need to explicitly update tci as prevous loop
2144 * could break out early if dev_maps is NULL.
2145 */
2146 tci = cpu * num_tc + tc;
2147
2085 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { 2148 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
2086 /* add queue to CPU maps */ 2149 /* add queue to CPU maps */
2087 int pos = 0; 2150 int pos = 0;
2088 2151
2089 map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 2152 map = xmap_dereference(new_dev_maps->cpu_map[tci]);
2090 while ((pos < map->len) && (map->queues[pos] != index)) 2153 while ((pos < map->len) && (map->queues[pos] != index))
2091 pos++; 2154 pos++;
2092 2155
@@ -2100,26 +2163,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2100#endif 2163#endif
2101 } else if (dev_maps) { 2164 } else if (dev_maps) {
2102 /* fill in the new device map from the old device map */ 2165 /* fill in the new device map from the old device map */
2103 map = xmap_dereference(dev_maps->cpu_map[cpu]); 2166 map = xmap_dereference(dev_maps->cpu_map[tci]);
2104 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); 2167 RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
2105 } 2168 }
2106 2169
2170 /* copy maps belonging to foreign traffic classes */
2171 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
2172 /* fill in the new device map from the old device map */
2173 map = xmap_dereference(dev_maps->cpu_map[tci]);
2174 RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
2175 }
2107 } 2176 }
2108 2177
2109 rcu_assign_pointer(dev->xps_maps, new_dev_maps); 2178 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
2110 2179
2111 /* Cleanup old maps */ 2180 /* Cleanup old maps */
2112 if (dev_maps) { 2181 if (!dev_maps)
2113 for_each_possible_cpu(cpu) { 2182 goto out_no_old_maps;
2114 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 2183
2115 map = xmap_dereference(dev_maps->cpu_map[cpu]); 2184 for_each_possible_cpu(cpu) {
2185 for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
2186 new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
2187 map = xmap_dereference(dev_maps->cpu_map[tci]);
2116 if (map && map != new_map) 2188 if (map && map != new_map)
2117 kfree_rcu(map, rcu); 2189 kfree_rcu(map, rcu);
2118 } 2190 }
2119
2120 kfree_rcu(dev_maps, rcu);
2121 } 2191 }
2122 2192
2193 kfree_rcu(dev_maps, rcu);
2194
2195out_no_old_maps:
2123 dev_maps = new_dev_maps; 2196 dev_maps = new_dev_maps;
2124 active = true; 2197 active = true;
2125 2198
@@ -2134,11 +2207,12 @@ out_no_new_maps:
2134 2207
2135 /* removes queue from unused CPUs */ 2208 /* removes queue from unused CPUs */
2136 for_each_possible_cpu(cpu) { 2209 for_each_possible_cpu(cpu) {
2137 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) 2210 for (i = tc, tci = cpu * num_tc; i--; tci++)
2138 continue; 2211 active |= remove_xps_queue(dev_maps, tci, index);
2139 2212 if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
2140 if (remove_xps_queue(dev_maps, cpu, index)) 2213 active |= remove_xps_queue(dev_maps, tci, index);
2141 active = true; 2214 for (i = num_tc - tc, tci++; --i; tci++)
2215 active |= remove_xps_queue(dev_maps, tci, index);
2142 } 2216 }
2143 2217
2144 /* free map if not active */ 2218 /* free map if not active */
@@ -2154,11 +2228,14 @@ out_no_maps:
2154error: 2228error:
2155 /* remove any maps that we added */ 2229 /* remove any maps that we added */
2156 for_each_possible_cpu(cpu) { 2230 for_each_possible_cpu(cpu) {
2157 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); 2231 for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
2158 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : 2232 new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
2159 NULL; 2233 map = dev_maps ?
2160 if (new_map && new_map != map) 2234 xmap_dereference(dev_maps->cpu_map[tci]) :
2161 kfree(new_map); 2235 NULL;
2236 if (new_map && new_map != map)
2237 kfree(new_map);
2238 }
2162 } 2239 }
2163 2240
2164 mutex_unlock(&xps_map_mutex); 2241 mutex_unlock(&xps_map_mutex);
@@ -2169,6 +2246,44 @@ error:
2169EXPORT_SYMBOL(netif_set_xps_queue); 2246EXPORT_SYMBOL(netif_set_xps_queue);
2170 2247
2171#endif 2248#endif
2249void netdev_reset_tc(struct net_device *dev)
2250{
2251#ifdef CONFIG_XPS
2252 netif_reset_xps_queues_gt(dev, 0);
2253#endif
2254 dev->num_tc = 0;
2255 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
2256 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
2257}
2258EXPORT_SYMBOL(netdev_reset_tc);
2259
2260int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
2261{
2262 if (tc >= dev->num_tc)
2263 return -EINVAL;
2264
2265#ifdef CONFIG_XPS
2266 netif_reset_xps_queues(dev, offset, count);
2267#endif
2268 dev->tc_to_txq[tc].count = count;
2269 dev->tc_to_txq[tc].offset = offset;
2270 return 0;
2271}
2272EXPORT_SYMBOL(netdev_set_tc_queue);
2273
2274int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
2275{
2276 if (num_tc > TC_MAX_QUEUE)
2277 return -EINVAL;
2278
2279#ifdef CONFIG_XPS
2280 netif_reset_xps_queues_gt(dev, 0);
2281#endif
2282 dev->num_tc = num_tc;
2283 return 0;
2284}
2285EXPORT_SYMBOL(netdev_set_num_tc);
2286
2172/* 2287/*
2173 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 2288 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
2174 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 2289 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -2487,141 +2602,6 @@ out:
2487} 2602}
2488EXPORT_SYMBOL(skb_checksum_help); 2603EXPORT_SYMBOL(skb_checksum_help);
2489 2604
2490/* skb_csum_offload_check - Driver helper function to determine if a device
2491 * with limited checksum offload capabilities is able to offload the checksum
2492 * for a given packet.
2493 *
2494 * Arguments:
2495 * skb - sk_buff for the packet in question
2496 * spec - contains the description of what device can offload
2497 * csum_encapped - returns true if the checksum being offloaded is
2498 * encpasulated. That is it is checksum for the transport header
2499 * in the inner headers.
2500 * checksum_help - when set indicates that helper function should
2501 * call skb_checksum_help if offload checks fail
2502 *
2503 * Returns:
2504 * true: Packet has passed the checksum checks and should be offloadable to
2505 * the device (a driver may still need to check for additional
2506 * restrictions of its device)
2507 * false: Checksum is not offloadable. If checksum_help was set then
2508 * skb_checksum_help was called to resolve checksum for non-GSO
2509 * packets and when IP protocol is not SCTP
2510 */
2511bool __skb_csum_offload_chk(struct sk_buff *skb,
2512 const struct skb_csum_offl_spec *spec,
2513 bool *csum_encapped,
2514 bool csum_help)
2515{
2516 struct iphdr *iph;
2517 struct ipv6hdr *ipv6;
2518 void *nhdr;
2519 int protocol;
2520 u8 ip_proto;
2521
2522 if (skb->protocol == htons(ETH_P_8021Q) ||
2523 skb->protocol == htons(ETH_P_8021AD)) {
2524 if (!spec->vlan_okay)
2525 goto need_help;
2526 }
2527
2528 /* We check whether the checksum refers to a transport layer checksum in
2529 * the outermost header or an encapsulated transport layer checksum that
2530 * corresponds to the inner headers of the skb. If the checksum is for
2531 * something else in the packet we need help.
2532 */
2533 if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
2534 /* Non-encapsulated checksum */
2535 protocol = eproto_to_ipproto(vlan_get_protocol(skb));
2536 nhdr = skb_network_header(skb);
2537 *csum_encapped = false;
2538 if (spec->no_not_encapped)
2539 goto need_help;
2540 } else if (skb->encapsulation && spec->encap_okay &&
2541 skb_checksum_start_offset(skb) ==
2542 skb_inner_transport_offset(skb)) {
2543 /* Encapsulated checksum */
2544 *csum_encapped = true;
2545 switch (skb->inner_protocol_type) {
2546 case ENCAP_TYPE_ETHER:
2547 protocol = eproto_to_ipproto(skb->inner_protocol);
2548 break;
2549 case ENCAP_TYPE_IPPROTO:
2550 protocol = skb->inner_protocol;
2551 break;
2552 }
2553 nhdr = skb_inner_network_header(skb);
2554 } else {
2555 goto need_help;
2556 }
2557
2558 switch (protocol) {
2559 case IPPROTO_IP:
2560 if (!spec->ipv4_okay)
2561 goto need_help;
2562 iph = nhdr;
2563 ip_proto = iph->protocol;
2564 if (iph->ihl != 5 && !spec->ip_options_okay)
2565 goto need_help;
2566 break;
2567 case IPPROTO_IPV6:
2568 if (!spec->ipv6_okay)
2569 goto need_help;
2570 if (spec->no_encapped_ipv6 && *csum_encapped)
2571 goto need_help;
2572 ipv6 = nhdr;
2573 nhdr += sizeof(*ipv6);
2574 ip_proto = ipv6->nexthdr;
2575 break;
2576 default:
2577 goto need_help;
2578 }
2579
2580ip_proto_again:
2581 switch (ip_proto) {
2582 case IPPROTO_TCP:
2583 if (!spec->tcp_okay ||
2584 skb->csum_offset != offsetof(struct tcphdr, check))
2585 goto need_help;
2586 break;
2587 case IPPROTO_UDP:
2588 if (!spec->udp_okay ||
2589 skb->csum_offset != offsetof(struct udphdr, check))
2590 goto need_help;
2591 break;
2592 case IPPROTO_SCTP:
2593 if (!spec->sctp_okay ||
2594 skb->csum_offset != offsetof(struct sctphdr, checksum))
2595 goto cant_help;
2596 break;
2597 case NEXTHDR_HOP:
2598 case NEXTHDR_ROUTING:
2599 case NEXTHDR_DEST: {
2600 u8 *opthdr = nhdr;
2601
2602 if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
2603 goto need_help;
2604
2605 ip_proto = opthdr[0];
2606 nhdr += (opthdr[1] + 1) << 3;
2607
2608 goto ip_proto_again;
2609 }
2610 default:
2611 goto need_help;
2612 }
2613
2614 /* Passed the tests for offloading checksum */
2615 return true;
2616
2617need_help:
2618 if (csum_help && !skb_shinfo(skb)->gso_size)
2619 skb_checksum_help(skb);
2620cant_help:
2621 return false;
2622}
2623EXPORT_SYMBOL(__skb_csum_offload_chk);
2624
2625__be16 skb_network_protocol(struct sk_buff *skb, int *depth) 2605__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2626{ 2606{
2627 __be16 type = skb->protocol; 2607 __be16 type = skb->protocol;
@@ -3216,8 +3196,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
3216 rcu_read_lock(); 3196 rcu_read_lock();
3217 dev_maps = rcu_dereference(dev->xps_maps); 3197 dev_maps = rcu_dereference(dev->xps_maps);
3218 if (dev_maps) { 3198 if (dev_maps) {
3219 map = rcu_dereference( 3199 unsigned int tci = skb->sender_cpu - 1;
3220 dev_maps->cpu_map[skb->sender_cpu - 1]); 3200
3201 if (dev->num_tc) {
3202 tci *= dev->num_tc;
3203 tci += netdev_get_prio_tc_map(dev, skb->priority);
3204 }
3205
3206 map = rcu_dereference(dev_maps->cpu_map[tci]);
3221 if (map) { 3207 if (map) {
3222 if (map->len == 1) 3208 if (map->len == 1)
3223 queue_index = map->queues[0]; 3209 queue_index = map->queues[0];
@@ -3461,6 +3447,8 @@ EXPORT_SYMBOL(rps_cpu_mask);
3461 3447
3462struct static_key rps_needed __read_mostly; 3448struct static_key rps_needed __read_mostly;
3463EXPORT_SYMBOL(rps_needed); 3449EXPORT_SYMBOL(rps_needed);
3450struct static_key rfs_needed __read_mostly;
3451EXPORT_SYMBOL(rfs_needed);
3464 3452
3465static struct rps_dev_flow * 3453static struct rps_dev_flow *
3466set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 3454set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -4491,7 +4479,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
4491 if (!(skb->dev->features & NETIF_F_GRO)) 4479 if (!(skb->dev->features & NETIF_F_GRO))
4492 goto normal; 4480 goto normal;
4493 4481
4494 if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) 4482 if (skb->csum_bad)
4495 goto normal; 4483 goto normal;
4496 4484
4497 gro_list_prepare(napi, skb); 4485 gro_list_prepare(napi, skb);
@@ -4504,7 +4492,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
4504 skb_set_network_header(skb, skb_gro_offset(skb)); 4492 skb_set_network_header(skb, skb_gro_offset(skb));
4505 skb_reset_mac_len(skb); 4493 skb_reset_mac_len(skb);
4506 NAPI_GRO_CB(skb)->same_flow = 0; 4494 NAPI_GRO_CB(skb)->same_flow = 0;
4507 NAPI_GRO_CB(skb)->flush = 0; 4495 NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
4508 NAPI_GRO_CB(skb)->free = 0; 4496 NAPI_GRO_CB(skb)->free = 0;
4509 NAPI_GRO_CB(skb)->encap_mark = 0; 4497 NAPI_GRO_CB(skb)->encap_mark = 0;
4510 NAPI_GRO_CB(skb)->recursion_counter = 0; 4498 NAPI_GRO_CB(skb)->recursion_counter = 0;
@@ -4912,26 +4900,36 @@ void __napi_schedule_irqoff(struct napi_struct *n)
4912} 4900}
4913EXPORT_SYMBOL(__napi_schedule_irqoff); 4901EXPORT_SYMBOL(__napi_schedule_irqoff);
4914 4902
4915void __napi_complete(struct napi_struct *n) 4903bool __napi_complete(struct napi_struct *n)
4916{ 4904{
4917 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); 4905 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4918 4906
4907 /* Some drivers call us directly, instead of calling
4908 * napi_complete_done().
4909 */
4910 if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state)))
4911 return false;
4912
4919 list_del_init(&n->poll_list); 4913 list_del_init(&n->poll_list);
4920 smp_mb__before_atomic(); 4914 smp_mb__before_atomic();
4921 clear_bit(NAPI_STATE_SCHED, &n->state); 4915 clear_bit(NAPI_STATE_SCHED, &n->state);
4916 return true;
4922} 4917}
4923EXPORT_SYMBOL(__napi_complete); 4918EXPORT_SYMBOL(__napi_complete);
4924 4919
4925void napi_complete_done(struct napi_struct *n, int work_done) 4920bool napi_complete_done(struct napi_struct *n, int work_done)
4926{ 4921{
4927 unsigned long flags; 4922 unsigned long flags;
4928 4923
4929 /* 4924 /*
4930 * don't let napi dequeue from the cpu poll list 4925 * 1) Don't let napi dequeue from the cpu poll list
4931 * just in case its running on a different cpu 4926 * just in case its running on a different cpu.
4927 * 2) If we are busy polling, do nothing here, we have
4928 * the guarantee we will be called later.
4932 */ 4929 */
4933 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) 4930 if (unlikely(n->state & (NAPIF_STATE_NPSVC |
4934 return; 4931 NAPIF_STATE_IN_BUSY_POLL)))
4932 return false;
4935 4933
4936 if (n->gro_list) { 4934 if (n->gro_list) {
4937 unsigned long timeout = 0; 4935 unsigned long timeout = 0;
@@ -4953,6 +4951,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
4953 __napi_complete(n); 4951 __napi_complete(n);
4954 local_irq_restore(flags); 4952 local_irq_restore(flags);
4955 } 4953 }
4954 return true;
4956} 4955}
4957EXPORT_SYMBOL(napi_complete_done); 4956EXPORT_SYMBOL(napi_complete_done);
4958 4957
@@ -4970,13 +4969,41 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
4970} 4969}
4971 4970
4972#if defined(CONFIG_NET_RX_BUSY_POLL) 4971#if defined(CONFIG_NET_RX_BUSY_POLL)
4972
4973#define BUSY_POLL_BUDGET 8 4973#define BUSY_POLL_BUDGET 8
4974
4975static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
4976{
4977 int rc;
4978
4979 clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
4980
4981 local_bh_disable();
4982
4983 /* All we really want here is to re-enable device interrupts.
4984 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
4985 */
4986 rc = napi->poll(napi, BUSY_POLL_BUDGET);
4987 netpoll_poll_unlock(have_poll_lock);
4988 if (rc == BUSY_POLL_BUDGET)
4989 __napi_schedule(napi);
4990 local_bh_enable();
4991 if (local_softirq_pending())
4992 do_softirq();
4993}
4994
4974bool sk_busy_loop(struct sock *sk, int nonblock) 4995bool sk_busy_loop(struct sock *sk, int nonblock)
4975{ 4996{
4976 unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; 4997 unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
4998 int (*napi_poll)(struct napi_struct *napi, int budget);
4977 int (*busy_poll)(struct napi_struct *dev); 4999 int (*busy_poll)(struct napi_struct *dev);
5000 void *have_poll_lock = NULL;
4978 struct napi_struct *napi; 5001 struct napi_struct *napi;
4979 int rc = false; 5002 int rc;
5003
5004restart:
5005 rc = false;
5006 napi_poll = NULL;
4980 5007
4981 rcu_read_lock(); 5008 rcu_read_lock();
4982 5009
@@ -4987,24 +5014,33 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
4987 /* Note: ndo_busy_poll method is optional in linux-4.5 */ 5014 /* Note: ndo_busy_poll method is optional in linux-4.5 */
4988 busy_poll = napi->dev->netdev_ops->ndo_busy_poll; 5015 busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
4989 5016
4990 do { 5017 preempt_disable();
5018 for (;;) {
4991 rc = 0; 5019 rc = 0;
4992 local_bh_disable(); 5020 local_bh_disable();
4993 if (busy_poll) { 5021 if (busy_poll) {
4994 rc = busy_poll(napi); 5022 rc = busy_poll(napi);
4995 } else if (napi_schedule_prep(napi)) { 5023 goto count;
4996 void *have = netpoll_poll_lock(napi); 5024 }
4997 5025 if (!napi_poll) {
4998 if (test_bit(NAPI_STATE_SCHED, &napi->state)) { 5026 unsigned long val = READ_ONCE(napi->state);
4999 rc = napi->poll(napi, BUSY_POLL_BUDGET); 5027
5000 trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); 5028 /* If multiple threads are competing for this napi,
5001 if (rc == BUSY_POLL_BUDGET) { 5029 * we avoid dirtying napi->state as much as we can.
5002 napi_complete_done(napi, rc); 5030 */
5003 napi_schedule(napi); 5031 if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
5004 } 5032 NAPIF_STATE_IN_BUSY_POLL))
5005 } 5033 goto count;
5006 netpoll_poll_unlock(have); 5034 if (cmpxchg(&napi->state, val,
5035 val | NAPIF_STATE_IN_BUSY_POLL |
5036 NAPIF_STATE_SCHED) != val)
5037 goto count;
5038 have_poll_lock = netpoll_poll_lock(napi);
5039 napi_poll = napi->poll;
5007 } 5040 }
5041 rc = napi_poll(napi, BUSY_POLL_BUDGET);
5042 trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
5043count:
5008 if (rc > 0) 5044 if (rc > 0)
5009 __NET_ADD_STATS(sock_net(sk), 5045 __NET_ADD_STATS(sock_net(sk),
5010 LINUX_MIB_BUSYPOLLRXPACKETS, rc); 5046 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
@@ -5013,10 +5049,26 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
5013 if (rc == LL_FLUSH_FAILED) 5049 if (rc == LL_FLUSH_FAILED)
5014 break; /* permanent failure */ 5050 break; /* permanent failure */
5015 5051
5016 cpu_relax(); 5052 if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
5017 } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && 5053 busy_loop_timeout(end_time))
5018 !need_resched() && !busy_loop_timeout(end_time)); 5054 break;
5019 5055
5056 if (unlikely(need_resched())) {
5057 if (napi_poll)
5058 busy_poll_stop(napi, have_poll_lock);
5059 preempt_enable();
5060 rcu_read_unlock();
5061 cond_resched();
5062 rc = !skb_queue_empty(&sk->sk_receive_queue);
5063 if (rc || busy_loop_timeout(end_time))
5064 return rc;
5065 goto restart;
5066 }
5067 cpu_relax();
5068 }
5069 if (napi_poll)
5070 busy_poll_stop(napi, have_poll_lock);
5071 preempt_enable();
5020 rc = !skb_queue_empty(&sk->sk_receive_queue); 5072 rc = !skb_queue_empty(&sk->sk_receive_queue);
5021out: 5073out:
5022 rcu_read_unlock(); 5074 rcu_read_unlock();
@@ -5026,7 +5078,7 @@ EXPORT_SYMBOL(sk_busy_loop);
5026 5078
5027#endif /* CONFIG_NET_RX_BUSY_POLL */ 5079#endif /* CONFIG_NET_RX_BUSY_POLL */
5028 5080
5029void napi_hash_add(struct napi_struct *napi) 5081static void napi_hash_add(struct napi_struct *napi)
5030{ 5082{
5031 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || 5083 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
5032 test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) 5084 test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
@@ -5046,7 +5098,6 @@ void napi_hash_add(struct napi_struct *napi)
5046 5098
5047 spin_unlock(&napi_hash_lock); 5099 spin_unlock(&napi_hash_lock);
5048} 5100}
5049EXPORT_SYMBOL_GPL(napi_hash_add);
5050 5101
5051/* Warning : caller is responsible to make sure rcu grace period 5102/* Warning : caller is responsible to make sure rcu grace period
5052 * is respected before freeing memory containing @napi 5103 * is respected before freeing memory containing @napi
@@ -5094,7 +5145,6 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
5094 list_add(&napi->dev_list, &dev->napi_list); 5145 list_add(&napi->dev_list, &dev->napi_list);
5095 napi->dev = dev; 5146 napi->dev = dev;
5096#ifdef CONFIG_NETPOLL 5147#ifdef CONFIG_NETPOLL
5097 spin_lock_init(&napi->poll_lock);
5098 napi->poll_owner = -1; 5148 napi->poll_owner = -1;
5099#endif 5149#endif
5100 set_bit(NAPI_STATE_SCHED, &napi->state); 5150 set_bit(NAPI_STATE_SCHED, &napi->state);
@@ -5212,7 +5262,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
5212 5262
5213 if (list_empty(&list)) { 5263 if (list_empty(&list)) {
5214 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) 5264 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
5215 return; 5265 goto out;
5216 break; 5266 break;
5217 } 5267 }
5218 5268
@@ -5230,7 +5280,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
5230 } 5280 }
5231 } 5281 }
5232 5282
5233 __kfree_skb_flush();
5234 local_irq_disable(); 5283 local_irq_disable();
5235 5284
5236 list_splice_tail_init(&sd->poll_list, &list); 5285 list_splice_tail_init(&sd->poll_list, &list);
@@ -5240,6 +5289,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
5240 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 5289 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
5241 5290
5242 net_rps_action_and_irq_enable(sd); 5291 net_rps_action_and_irq_enable(sd);
5292out:
5293 __kfree_skb_flush();
5243} 5294}
5244 5295
5245struct netdev_adjacent { 5296struct netdev_adjacent {
@@ -5270,6 +5321,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
5270 return NULL; 5321 return NULL;
5271} 5322}
5272 5323
5324static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
5325{
5326 struct net_device *dev = data;
5327
5328 return upper_dev == dev;
5329}
5330
5273/** 5331/**
5274 * netdev_has_upper_dev - Check if device is linked to an upper device 5332 * netdev_has_upper_dev - Check if device is linked to an upper device
5275 * @dev: device 5333 * @dev: device
@@ -5284,11 +5342,30 @@ bool netdev_has_upper_dev(struct net_device *dev,
5284{ 5342{
5285 ASSERT_RTNL(); 5343 ASSERT_RTNL();
5286 5344
5287 return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); 5345 return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
5346 upper_dev);
5288} 5347}
5289EXPORT_SYMBOL(netdev_has_upper_dev); 5348EXPORT_SYMBOL(netdev_has_upper_dev);
5290 5349
5291/** 5350/**
5351 * netdev_has_upper_dev_all - Check if device is linked to an upper device
5352 * @dev: device
5353 * @upper_dev: upper device to check
5354 *
5355 * Find out if a device is linked to specified upper device and return true
5356 * in case it is. Note that this checks the entire upper device chain.
5357 * The caller must hold rcu lock.
5358 */
5359
5360bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
5361 struct net_device *upper_dev)
5362{
5363 return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
5364 upper_dev);
5365}
5366EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
5367
5368/**
5292 * netdev_has_any_upper_dev - Check if device is linked to some device 5369 * netdev_has_any_upper_dev - Check if device is linked to some device
5293 * @dev: device 5370 * @dev: device
5294 * 5371 *
@@ -5299,7 +5376,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev)
5299{ 5376{
5300 ASSERT_RTNL(); 5377 ASSERT_RTNL();
5301 5378
5302 return !list_empty(&dev->all_adj_list.upper); 5379 return !list_empty(&dev->adj_list.upper);
5303} 5380}
5304 5381
5305/** 5382/**
@@ -5326,6 +5403,20 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
5326} 5403}
5327EXPORT_SYMBOL(netdev_master_upper_dev_get); 5404EXPORT_SYMBOL(netdev_master_upper_dev_get);
5328 5405
5406/**
5407 * netdev_has_any_lower_dev - Check if device is linked to some device
5408 * @dev: device
5409 *
5410 * Find out if a device is linked to a lower device and return true in case
5411 * it is. The caller must hold the RTNL lock.
5412 */
5413static bool netdev_has_any_lower_dev(struct net_device *dev)
5414{
5415 ASSERT_RTNL();
5416
5417 return !list_empty(&dev->adj_list.lower);
5418}
5419
5329void *netdev_adjacent_get_private(struct list_head *adj_list) 5420void *netdev_adjacent_get_private(struct list_head *adj_list)
5330{ 5421{
5331 struct netdev_adjacent *adj; 5422 struct netdev_adjacent *adj;
@@ -5362,16 +5453,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
5362} 5453}
5363EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); 5454EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
5364 5455
5365/** 5456static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
5366 * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list 5457 struct list_head **iter)
5367 * @dev: device
5368 * @iter: list_head ** of the current position
5369 *
5370 * Gets the next device from the dev's upper list, starting from iter
5371 * position. The caller must hold RCU read lock.
5372 */
5373struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
5374 struct list_head **iter)
5375{ 5458{
5376 struct netdev_adjacent *upper; 5459 struct netdev_adjacent *upper;
5377 5460
@@ -5379,14 +5462,41 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
5379 5462
5380 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); 5463 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5381 5464
5382 if (&upper->list == &dev->all_adj_list.upper) 5465 if (&upper->list == &dev->adj_list.upper)
5383 return NULL; 5466 return NULL;
5384 5467
5385 *iter = &upper->list; 5468 *iter = &upper->list;
5386 5469
5387 return upper->dev; 5470 return upper->dev;
5388} 5471}
5389EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); 5472
5473int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
5474 int (*fn)(struct net_device *dev,
5475 void *data),
5476 void *data)
5477{
5478 struct net_device *udev;
5479 struct list_head *iter;
5480 int ret;
5481
5482 for (iter = &dev->adj_list.upper,
5483 udev = netdev_next_upper_dev_rcu(dev, &iter);
5484 udev;
5485 udev = netdev_next_upper_dev_rcu(dev, &iter)) {
5486 /* first is the upper device itself */
5487 ret = fn(udev, data);
5488 if (ret)
5489 return ret;
5490
5491 /* then look at all of its upper devices */
5492 ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
5493 if (ret)
5494 return ret;
5495 }
5496
5497 return 0;
5498}
5499EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
5390 5500
5391/** 5501/**
5392 * netdev_lower_get_next_private - Get the next ->private from the 5502 * netdev_lower_get_next_private - Get the next ->private from the
@@ -5469,55 +5579,90 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
5469} 5579}
5470EXPORT_SYMBOL(netdev_lower_get_next); 5580EXPORT_SYMBOL(netdev_lower_get_next);
5471 5581
5472/** 5582static struct net_device *netdev_next_lower_dev(struct net_device *dev,
5473 * netdev_all_lower_get_next - Get the next device from all lower neighbour list 5583 struct list_head **iter)
5474 * @dev: device
5475 * @iter: list_head ** of the current position
5476 *
5477 * Gets the next netdev_adjacent from the dev's all lower neighbour
5478 * list, starting from iter position. The caller must hold RTNL lock or
5479 * its own locking that guarantees that the neighbour all lower
5480 * list will remain unchanged.
5481 */
5482struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
5483{ 5584{
5484 struct netdev_adjacent *lower; 5585 struct netdev_adjacent *lower;
5485 5586
5486 lower = list_entry(*iter, struct netdev_adjacent, list); 5587 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
5487 5588
5488 if (&lower->list == &dev->all_adj_list.lower) 5589 if (&lower->list == &dev->adj_list.lower)
5489 return NULL; 5590 return NULL;
5490 5591
5491 *iter = lower->list.next; 5592 *iter = &lower->list;
5492 5593
5493 return lower->dev; 5594 return lower->dev;
5494} 5595}
5495EXPORT_SYMBOL(netdev_all_lower_get_next);
5496 5596
5497/** 5597int netdev_walk_all_lower_dev(struct net_device *dev,
5498 * netdev_all_lower_get_next_rcu - Get the next device from all 5598 int (*fn)(struct net_device *dev,
5499 * lower neighbour list, RCU variant 5599 void *data),
5500 * @dev: device 5600 void *data)
5501 * @iter: list_head ** of the current position 5601{
5502 * 5602 struct net_device *ldev;
5503 * Gets the next netdev_adjacent from the dev's all lower neighbour 5603 struct list_head *iter;
5504 * list, starting from iter position. The caller must hold RCU read lock. 5604 int ret;
5505 */ 5605
5506struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, 5606 for (iter = &dev->adj_list.lower,
5507 struct list_head **iter) 5607 ldev = netdev_next_lower_dev(dev, &iter);
5608 ldev;
5609 ldev = netdev_next_lower_dev(dev, &iter)) {
5610 /* first is the lower device itself */
5611 ret = fn(ldev, data);
5612 if (ret)
5613 return ret;
5614
5615 /* then look at all of its lower devices */
5616 ret = netdev_walk_all_lower_dev(ldev, fn, data);
5617 if (ret)
5618 return ret;
5619 }
5620
5621 return 0;
5622}
5623EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
5624
5625static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
5626 struct list_head **iter)
5508{ 5627{
5509 struct netdev_adjacent *lower; 5628 struct netdev_adjacent *lower;
5510 5629
5511 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); 5630 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5512 5631 if (&lower->list == &dev->adj_list.lower)
5513 if (&lower->list == &dev->all_adj_list.lower)
5514 return NULL; 5632 return NULL;
5515 5633
5516 *iter = &lower->list; 5634 *iter = &lower->list;
5517 5635
5518 return lower->dev; 5636 return lower->dev;
5519} 5637}
5520EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); 5638
5639int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
5640 int (*fn)(struct net_device *dev,
5641 void *data),
5642 void *data)
5643{
5644 struct net_device *ldev;
5645 struct list_head *iter;
5646 int ret;
5647
5648 for (iter = &dev->adj_list.lower,
5649 ldev = netdev_next_lower_dev_rcu(dev, &iter);
5650 ldev;
5651 ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
5652 /* first is the lower device itself */
5653 ret = fn(ldev, data);
5654 if (ret)
5655 return ret;
5656
5657 /* then look at all of its lower devices */
5658 ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
5659 if (ret)
5660 return ret;
5661 }
5662
5663 return 0;
5664}
5665EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
5521 5666
5522/** 5667/**
5523 * netdev_lower_get_first_private_rcu - Get the first ->private from the 5668 * netdev_lower_get_first_private_rcu - Get the first ->private from the
@@ -5590,7 +5735,6 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
5590 5735
5591static int __netdev_adjacent_dev_insert(struct net_device *dev, 5736static int __netdev_adjacent_dev_insert(struct net_device *dev,
5592 struct net_device *adj_dev, 5737 struct net_device *adj_dev,
5593 u16 ref_nr,
5594 struct list_head *dev_list, 5738 struct list_head *dev_list,
5595 void *private, bool master) 5739 void *private, bool master)
5596{ 5740{
@@ -5600,7 +5744,10 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
5600 adj = __netdev_find_adj(adj_dev, dev_list); 5744 adj = __netdev_find_adj(adj_dev, dev_list);
5601 5745
5602 if (adj) { 5746 if (adj) {
5603 adj->ref_nr += ref_nr; 5747 adj->ref_nr += 1;
5748 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
5749 dev->name, adj_dev->name, adj->ref_nr);
5750
5604 return 0; 5751 return 0;
5605 } 5752 }
5606 5753
@@ -5610,12 +5757,12 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
5610 5757
5611 adj->dev = adj_dev; 5758 adj->dev = adj_dev;
5612 adj->master = master; 5759 adj->master = master;
5613 adj->ref_nr = ref_nr; 5760 adj->ref_nr = 1;
5614 adj->private = private; 5761 adj->private = private;
5615 dev_hold(adj_dev); 5762 dev_hold(adj_dev);
5616 5763
5617 pr_debug("dev_hold for %s, because of link added from %s to %s\n", 5764 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
5618 adj_dev->name, dev->name, adj_dev->name); 5765 dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
5619 5766
5620 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) { 5767 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
5621 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); 5768 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
@@ -5654,17 +5801,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
5654{ 5801{
5655 struct netdev_adjacent *adj; 5802 struct netdev_adjacent *adj;
5656 5803
5804 pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
5805 dev->name, adj_dev->name, ref_nr);
5806
5657 adj = __netdev_find_adj(adj_dev, dev_list); 5807 adj = __netdev_find_adj(adj_dev, dev_list);
5658 5808
5659 if (!adj) { 5809 if (!adj) {
5660 pr_err("tried to remove device %s from %s\n", 5810 pr_err("Adjacency does not exist for device %s from %s\n",
5661 dev->name, adj_dev->name); 5811 dev->name, adj_dev->name);
5662 BUG(); 5812 WARN_ON(1);
5813 return;
5663 } 5814 }
5664 5815
5665 if (adj->ref_nr > ref_nr) { 5816 if (adj->ref_nr > ref_nr) {
5666 pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, 5817 pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
5667 ref_nr, adj->ref_nr-ref_nr); 5818 dev->name, adj_dev->name, ref_nr,
5819 adj->ref_nr - ref_nr);
5668 adj->ref_nr -= ref_nr; 5820 adj->ref_nr -= ref_nr;
5669 return; 5821 return;
5670 } 5822 }
@@ -5676,7 +5828,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
5676 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); 5828 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
5677 5829
5678 list_del_rcu(&adj->list); 5830 list_del_rcu(&adj->list);
5679 pr_debug("dev_put for %s, because link removed from %s to %s\n", 5831 pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
5680 adj_dev->name, dev->name, adj_dev->name); 5832 adj_dev->name, dev->name, adj_dev->name);
5681 dev_put(adj_dev); 5833 dev_put(adj_dev);
5682 kfree_rcu(adj, rcu); 5834 kfree_rcu(adj, rcu);
@@ -5684,38 +5836,27 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
5684 5836
5685static int __netdev_adjacent_dev_link_lists(struct net_device *dev, 5837static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
5686 struct net_device *upper_dev, 5838 struct net_device *upper_dev,
5687 u16 ref_nr,
5688 struct list_head *up_list, 5839 struct list_head *up_list,
5689 struct list_head *down_list, 5840 struct list_head *down_list,
5690 void *private, bool master) 5841 void *private, bool master)
5691{ 5842{
5692 int ret; 5843 int ret;
5693 5844
5694 ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, 5845 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,
5695 private, master); 5846 private, master);
5696 if (ret) 5847 if (ret)
5697 return ret; 5848 return ret;
5698 5849
5699 ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, 5850 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,
5700 private, false); 5851 private, false);
5701 if (ret) { 5852 if (ret) {
5702 __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); 5853 __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);
5703 return ret; 5854 return ret;
5704 } 5855 }
5705 5856
5706 return 0; 5857 return 0;
5707} 5858}
5708 5859
5709static int __netdev_adjacent_dev_link(struct net_device *dev,
5710 struct net_device *upper_dev,
5711 u16 ref_nr)
5712{
5713 return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr,
5714 &dev->all_adj_list.upper,
5715 &upper_dev->all_adj_list.lower,
5716 NULL, false);
5717}
5718
5719static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, 5860static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
5720 struct net_device *upper_dev, 5861 struct net_device *upper_dev,
5721 u16 ref_nr, 5862 u16 ref_nr,
@@ -5726,40 +5867,19 @@ static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
5726 __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); 5867 __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
5727} 5868}
5728 5869
5729static void __netdev_adjacent_dev_unlink(struct net_device *dev,
5730 struct net_device *upper_dev,
5731 u16 ref_nr)
5732{
5733 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr,
5734 &dev->all_adj_list.upper,
5735 &upper_dev->all_adj_list.lower);
5736}
5737
5738static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, 5870static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
5739 struct net_device *upper_dev, 5871 struct net_device *upper_dev,
5740 void *private, bool master) 5872 void *private, bool master)
5741{ 5873{
5742 int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); 5874 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
5743 5875 &dev->adj_list.upper,
5744 if (ret) 5876 &upper_dev->adj_list.lower,
5745 return ret; 5877 private, master);
5746
5747 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1,
5748 &dev->adj_list.upper,
5749 &upper_dev->adj_list.lower,
5750 private, master);
5751 if (ret) {
5752 __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
5753 return ret;
5754 }
5755
5756 return 0;
5757} 5878}
5758 5879
5759static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, 5880static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
5760 struct net_device *upper_dev) 5881 struct net_device *upper_dev)
5761{ 5882{
5762 __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
5763 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, 5883 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
5764 &dev->adj_list.upper, 5884 &dev->adj_list.upper,
5765 &upper_dev->adj_list.lower); 5885 &upper_dev->adj_list.lower);
@@ -5770,7 +5890,6 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5770 void *upper_priv, void *upper_info) 5890 void *upper_priv, void *upper_info)
5771{ 5891{
5772 struct netdev_notifier_changeupper_info changeupper_info; 5892 struct netdev_notifier_changeupper_info changeupper_info;
5773 struct netdev_adjacent *i, *j, *to_i, *to_j;
5774 int ret = 0; 5893 int ret = 0;
5775 5894
5776 ASSERT_RTNL(); 5895 ASSERT_RTNL();
@@ -5779,10 +5898,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5779 return -EBUSY; 5898 return -EBUSY;
5780 5899
5781 /* To prevent loops, check if dev is not upper device to upper_dev. */ 5900 /* To prevent loops, check if dev is not upper device to upper_dev. */
5782 if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) 5901 if (netdev_has_upper_dev(upper_dev, dev))
5783 return -EBUSY; 5902 return -EBUSY;
5784 5903
5785 if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) 5904 if (netdev_has_upper_dev(dev, upper_dev))
5786 return -EEXIST; 5905 return -EEXIST;
5787 5906
5788 if (master && netdev_master_upper_dev_get(dev)) 5907 if (master && netdev_master_upper_dev_get(dev))
@@ -5804,80 +5923,15 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5804 if (ret) 5923 if (ret)
5805 return ret; 5924 return ret;
5806 5925
5807 /* Now that we linked these devs, make all the upper_dev's
5808 * all_adj_list.upper visible to every dev's all_adj_list.lower an
5809 * versa, and don't forget the devices itself. All of these
5810 * links are non-neighbours.
5811 */
5812 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5813 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5814 pr_debug("Interlinking %s with %s, non-neighbour\n",
5815 i->dev->name, j->dev->name);
5816 ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr);
5817 if (ret)
5818 goto rollback_mesh;
5819 }
5820 }
5821
5822 /* add dev to every upper_dev's upper device */
5823 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5824 pr_debug("linking %s's upper device %s with %s\n",
5825 upper_dev->name, i->dev->name, dev->name);
5826 ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr);
5827 if (ret)
5828 goto rollback_upper_mesh;
5829 }
5830
5831 /* add upper_dev to every dev's lower device */
5832 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5833 pr_debug("linking %s's lower device %s with %s\n", dev->name,
5834 i->dev->name, upper_dev->name);
5835 ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr);
5836 if (ret)
5837 goto rollback_lower_mesh;
5838 }
5839
5840 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, 5926 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5841 &changeupper_info.info); 5927 &changeupper_info.info);
5842 ret = notifier_to_errno(ret); 5928 ret = notifier_to_errno(ret);
5843 if (ret) 5929 if (ret)
5844 goto rollback_lower_mesh; 5930 goto rollback;
5845 5931
5846 return 0; 5932 return 0;
5847 5933
5848rollback_lower_mesh: 5934rollback:
5849 to_i = i;
5850 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5851 if (i == to_i)
5852 break;
5853 __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
5854 }
5855
5856 i = NULL;
5857
5858rollback_upper_mesh:
5859 to_i = i;
5860 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5861 if (i == to_i)
5862 break;
5863 __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
5864 }
5865
5866 i = j = NULL;
5867
5868rollback_mesh:
5869 to_i = i;
5870 to_j = j;
5871 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5872 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5873 if (i == to_i && j == to_j)
5874 break;
5875 __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
5876 }
5877 if (i == to_i)
5878 break;
5879 }
5880
5881 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); 5935 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5882 5936
5883 return ret; 5937 return ret;
@@ -5934,7 +5988,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
5934 struct net_device *upper_dev) 5988 struct net_device *upper_dev)
5935{ 5989{
5936 struct netdev_notifier_changeupper_info changeupper_info; 5990 struct netdev_notifier_changeupper_info changeupper_info;
5937 struct netdev_adjacent *i, *j;
5938 ASSERT_RTNL(); 5991 ASSERT_RTNL();
5939 5992
5940 changeupper_info.upper_dev = upper_dev; 5993 changeupper_info.upper_dev = upper_dev;
@@ -5946,23 +5999,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
5946 5999
5947 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); 6000 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5948 6001
5949 /* Here is the tricky part. We must remove all dev's lower
5950 * devices from all upper_dev's upper devices and vice
5951 * versa, to maintain the graph relationship.
5952 */
5953 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5954 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
5955 __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
5956
5957 /* remove also the devices itself from lower/upper device
5958 * list
5959 */
5960 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5961 __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
5962
5963 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
5964 __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
5965
5966 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, 6002 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5967 &changeupper_info.info); 6003 &changeupper_info.info);
5968} 6004}
@@ -6500,9 +6536,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
6500 if (new_mtu == dev->mtu) 6536 if (new_mtu == dev->mtu)
6501 return 0; 6537 return 0;
6502 6538
6503 /* MTU must be positive. */ 6539 /* MTU must be positive, and in range */
6504 if (new_mtu < 0) 6540 if (new_mtu < 0 || new_mtu < dev->min_mtu) {
6541 net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n",
6542 dev->name, new_mtu, dev->min_mtu);
6505 return -EINVAL; 6543 return -EINVAL;
6544 }
6545
6546 if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
6547 net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n",
6548 dev->name, new_mtu, dev->max_mtu);
6549 return -EINVAL;
6550 }
6506 6551
6507 if (!netif_device_present(dev)) 6552 if (!netif_device_present(dev))
6508 return -ENODEV; 6553 return -ENODEV;
@@ -6649,26 +6694,42 @@ EXPORT_SYMBOL(dev_change_proto_down);
6649 * dev_change_xdp_fd - set or clear a bpf program for a device rx path 6694 * dev_change_xdp_fd - set or clear a bpf program for a device rx path
6650 * @dev: device 6695 * @dev: device
6651 * @fd: new program fd or negative value to clear 6696 * @fd: new program fd or negative value to clear
6697 * @flags: xdp-related flags
6652 * 6698 *
6653 * Set or clear a bpf program for a device 6699 * Set or clear a bpf program for a device
6654 */ 6700 */
6655int dev_change_xdp_fd(struct net_device *dev, int fd) 6701int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
6656{ 6702{
6657 const struct net_device_ops *ops = dev->netdev_ops; 6703 const struct net_device_ops *ops = dev->netdev_ops;
6658 struct bpf_prog *prog = NULL; 6704 struct bpf_prog *prog = NULL;
6659 struct netdev_xdp xdp = {}; 6705 struct netdev_xdp xdp;
6660 int err; 6706 int err;
6661 6707
6708 ASSERT_RTNL();
6709
6662 if (!ops->ndo_xdp) 6710 if (!ops->ndo_xdp)
6663 return -EOPNOTSUPP; 6711 return -EOPNOTSUPP;
6664 if (fd >= 0) { 6712 if (fd >= 0) {
6713 if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
6714 memset(&xdp, 0, sizeof(xdp));
6715 xdp.command = XDP_QUERY_PROG;
6716
6717 err = ops->ndo_xdp(dev, &xdp);
6718 if (err < 0)
6719 return err;
6720 if (xdp.prog_attached)
6721 return -EBUSY;
6722 }
6723
6665 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); 6724 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
6666 if (IS_ERR(prog)) 6725 if (IS_ERR(prog))
6667 return PTR_ERR(prog); 6726 return PTR_ERR(prog);
6668 } 6727 }
6669 6728
6729 memset(&xdp, 0, sizeof(xdp));
6670 xdp.command = XDP_SETUP_PROG; 6730 xdp.command = XDP_SETUP_PROG;
6671 xdp.prog = prog; 6731 xdp.prog = prog;
6732
6672 err = ops->ndo_xdp(dev, &xdp); 6733 err = ops->ndo_xdp(dev, &xdp);
6673 if (err < 0 && prog) 6734 if (err < 0 && prog)
6674 bpf_prog_put(prog); 6735 bpf_prog_put(prog);
@@ -6777,6 +6838,7 @@ static void rollback_registered_many(struct list_head *head)
6777 6838
6778 /* Notifier chain MUST detach us all upper devices. */ 6839 /* Notifier chain MUST detach us all upper devices. */
6779 WARN_ON(netdev_has_any_upper_dev(dev)); 6840 WARN_ON(netdev_has_any_upper_dev(dev));
6841 WARN_ON(netdev_has_any_lower_dev(dev));
6780 6842
6781 /* Remove entries from kobject tree */ 6843 /* Remove entries from kobject tree */
6782 netdev_unregister_kobject(dev); 6844 netdev_unregister_kobject(dev);
@@ -7655,8 +7717,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7655 INIT_LIST_HEAD(&dev->link_watch_list); 7717 INIT_LIST_HEAD(&dev->link_watch_list);
7656 INIT_LIST_HEAD(&dev->adj_list.upper); 7718 INIT_LIST_HEAD(&dev->adj_list.upper);
7657 INIT_LIST_HEAD(&dev->adj_list.lower); 7719 INIT_LIST_HEAD(&dev->adj_list.lower);
7658 INIT_LIST_HEAD(&dev->all_adj_list.upper);
7659 INIT_LIST_HEAD(&dev->all_adj_list.lower);
7660 INIT_LIST_HEAD(&dev->ptype_all); 7720 INIT_LIST_HEAD(&dev->ptype_all);
7661 INIT_LIST_HEAD(&dev->ptype_specific); 7721 INIT_LIST_HEAD(&dev->ptype_specific);
7662#ifdef CONFIG_NET_SCHED 7722#ifdef CONFIG_NET_SCHED
@@ -7667,7 +7727,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7667 7727
7668 if (!dev->tx_queue_len) { 7728 if (!dev->tx_queue_len) {
7669 dev->priv_flags |= IFF_NO_QUEUE; 7729 dev->priv_flags |= IFF_NO_QUEUE;
7670 dev->tx_queue_len = 1; 7730 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
7671 } 7731 }
7672 7732
7673 dev->num_tx_queues = txqs; 7733 dev->num_tx_queues = txqs;
@@ -7948,18 +8008,13 @@ out:
7948} 8008}
7949EXPORT_SYMBOL_GPL(dev_change_net_namespace); 8009EXPORT_SYMBOL_GPL(dev_change_net_namespace);
7950 8010
7951static int dev_cpu_callback(struct notifier_block *nfb, 8011static int dev_cpu_dead(unsigned int oldcpu)
7952 unsigned long action,
7953 void *ocpu)
7954{ 8012{
7955 struct sk_buff **list_skb; 8013 struct sk_buff **list_skb;
7956 struct sk_buff *skb; 8014 struct sk_buff *skb;
7957 unsigned int cpu, oldcpu = (unsigned long)ocpu; 8015 unsigned int cpu;
7958 struct softnet_data *sd, *oldsd; 8016 struct softnet_data *sd, *oldsd;
7959 8017
7960 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
7961 return NOTIFY_OK;
7962
7963 local_irq_disable(); 8018 local_irq_disable();
7964 cpu = smp_processor_id(); 8019 cpu = smp_processor_id();
7965 sd = &per_cpu(softnet_data, cpu); 8020 sd = &per_cpu(softnet_data, cpu);
@@ -8009,10 +8064,9 @@ static int dev_cpu_callback(struct notifier_block *nfb,
8009 input_queue_head_incr(oldsd); 8064 input_queue_head_incr(oldsd);
8010 } 8065 }
8011 8066
8012 return NOTIFY_OK; 8067 return 0;
8013} 8068}
8014 8069
8015
8016/** 8070/**
8017 * netdev_increment_features - increment feature set by one 8071 * netdev_increment_features - increment feature set by one
8018 * @all: current feature set 8072 * @all: current feature set
@@ -8346,7 +8400,9 @@ static int __init net_dev_init(void)
8346 open_softirq(NET_TX_SOFTIRQ, net_tx_action); 8400 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
8347 open_softirq(NET_RX_SOFTIRQ, net_rx_action); 8401 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
8348 8402
8349 hotcpu_notifier(dev_cpu_callback, 0); 8403 rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
8404 NULL, dev_cpu_dead);
8405 WARN_ON(rc < 0);
8350 dst_subsys_init(); 8406 dst_subsys_init();
8351 rc = 0; 8407 rc = 0;
8352out: 8408out:
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 1b5063088f1a..2b5bf9efa720 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -341,15 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
341 mutex_unlock(&devlink_mutex); 341 mutex_unlock(&devlink_mutex);
342} 342}
343 343
344static struct genl_family devlink_nl_family = { 344static struct genl_family devlink_nl_family;
345 .id = GENL_ID_GENERATE,
346 .name = DEVLINK_GENL_NAME,
347 .version = DEVLINK_GENL_VERSION,
348 .maxattr = DEVLINK_ATTR_MAX,
349 .netnsok = true,
350 .pre_doit = devlink_nl_pre_doit,
351 .post_doit = devlink_nl_post_doit,
352};
353 345
354enum devlink_multicast_groups { 346enum devlink_multicast_groups {
355 DEVLINK_MCGRP_CONFIG, 347 DEVLINK_MCGRP_CONFIG,
@@ -608,6 +600,8 @@ static int devlink_port_type_set(struct devlink *devlink,
608 if (devlink->ops && devlink->ops->port_type_set) { 600 if (devlink->ops && devlink->ops->port_type_set) {
609 if (port_type == DEVLINK_PORT_TYPE_NOTSET) 601 if (port_type == DEVLINK_PORT_TYPE_NOTSET)
610 return -EINVAL; 602 return -EINVAL;
603 if (port_type == devlink_port->type)
604 return 0;
611 err = devlink->ops->port_type_set(devlink_port, port_type); 605 err = devlink->ops->port_type_set(devlink_port, port_type);
612 if (err) 606 if (err)
613 return err; 607 return err;
@@ -1400,26 +1394,45 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
1400 1394
1401static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, 1395static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
1402 enum devlink_command cmd, u32 portid, 1396 enum devlink_command cmd, u32 portid,
1403 u32 seq, int flags, u16 mode) 1397 u32 seq, int flags)
1404{ 1398{
1399 const struct devlink_ops *ops = devlink->ops;
1405 void *hdr; 1400 void *hdr;
1401 int err = 0;
1402 u16 mode;
1403 u8 inline_mode;
1406 1404
1407 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); 1405 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
1408 if (!hdr) 1406 if (!hdr)
1409 return -EMSGSIZE; 1407 return -EMSGSIZE;
1410 1408
1411 if (devlink_nl_put_handle(msg, devlink)) 1409 err = devlink_nl_put_handle(msg, devlink);
1412 goto nla_put_failure; 1410 if (err)
1411 goto out;
1413 1412
1414 if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode)) 1413 err = ops->eswitch_mode_get(devlink, &mode);
1415 goto nla_put_failure; 1414 if (err)
1415 goto out;
1416 err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
1417 if (err)
1418 goto out;
1419
1420 if (ops->eswitch_inline_mode_get) {
1421 err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
1422 if (err)
1423 goto out;
1424 err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
1425 inline_mode);
1426 if (err)
1427 goto out;
1428 }
1416 1429
1417 genlmsg_end(msg, hdr); 1430 genlmsg_end(msg, hdr);
1418 return 0; 1431 return 0;
1419 1432
1420nla_put_failure: 1433out:
1421 genlmsg_cancel(msg, hdr); 1434 genlmsg_cancel(msg, hdr);
1422 return -EMSGSIZE; 1435 return err;
1423} 1436}
1424 1437
1425static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, 1438static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
@@ -1428,22 +1441,17 @@ static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
1428 struct devlink *devlink = info->user_ptr[0]; 1441 struct devlink *devlink = info->user_ptr[0];
1429 const struct devlink_ops *ops = devlink->ops; 1442 const struct devlink_ops *ops = devlink->ops;
1430 struct sk_buff *msg; 1443 struct sk_buff *msg;
1431 u16 mode;
1432 int err; 1444 int err;
1433 1445
1434 if (!ops || !ops->eswitch_mode_get) 1446 if (!ops || !ops->eswitch_mode_get)
1435 return -EOPNOTSUPP; 1447 return -EOPNOTSUPP;
1436 1448
1437 err = ops->eswitch_mode_get(devlink, &mode);
1438 if (err)
1439 return err;
1440
1441 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1449 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1442 if (!msg) 1450 if (!msg)
1443 return -ENOMEM; 1451 return -ENOMEM;
1444 1452
1445 err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET, 1453 err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
1446 info->snd_portid, info->snd_seq, 0, mode); 1454 info->snd_portid, info->snd_seq, 0);
1447 1455
1448 if (err) { 1456 if (err) {
1449 nlmsg_free(msg); 1457 nlmsg_free(msg);
@@ -1459,15 +1467,32 @@ static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
1459 struct devlink *devlink = info->user_ptr[0]; 1467 struct devlink *devlink = info->user_ptr[0];
1460 const struct devlink_ops *ops = devlink->ops; 1468 const struct devlink_ops *ops = devlink->ops;
1461 u16 mode; 1469 u16 mode;
1470 u8 inline_mode;
1471 int err = 0;
1462 1472
1463 if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) 1473 if (!ops)
1464 return -EINVAL; 1474 return -EOPNOTSUPP;
1465 1475
1466 mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); 1476 if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
1477 if (!ops->eswitch_mode_set)
1478 return -EOPNOTSUPP;
1479 mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
1480 err = ops->eswitch_mode_set(devlink, mode);
1481 if (err)
1482 return err;
1483 }
1467 1484
1468 if (ops && ops->eswitch_mode_set) 1485 if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
1469 return ops->eswitch_mode_set(devlink, mode); 1486 if (!ops->eswitch_inline_mode_set)
1470 return -EOPNOTSUPP; 1487 return -EOPNOTSUPP;
1488 inline_mode = nla_get_u8(
1489 info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
1490 err = ops->eswitch_inline_mode_set(devlink, inline_mode);
1491 if (err)
1492 return err;
1493 }
1494
1495 return 0;
1471} 1496}
1472 1497
1473static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { 1498static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1484,6 +1509,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
1484 [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, 1509 [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
1485 [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, 1510 [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
1486 [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, 1511 [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
1512 [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
1487}; 1513};
1488 1514
1489static const struct genl_ops devlink_nl_ops[] = { 1515static const struct genl_ops devlink_nl_ops[] = {
@@ -1618,6 +1644,20 @@ static const struct genl_ops devlink_nl_ops[] = {
1618 }, 1644 },
1619}; 1645};
1620 1646
1647static struct genl_family devlink_nl_family __ro_after_init = {
1648 .name = DEVLINK_GENL_NAME,
1649 .version = DEVLINK_GENL_VERSION,
1650 .maxattr = DEVLINK_ATTR_MAX,
1651 .netnsok = true,
1652 .pre_doit = devlink_nl_pre_doit,
1653 .post_doit = devlink_nl_post_doit,
1654 .module = THIS_MODULE,
1655 .ops = devlink_nl_ops,
1656 .n_ops = ARRAY_SIZE(devlink_nl_ops),
1657 .mcgrps = devlink_nl_mcgrps,
1658 .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
1659};
1660
1621/** 1661/**
1622 * devlink_alloc - Allocate new devlink instance resources 1662 * devlink_alloc - Allocate new devlink instance resources
1623 * 1663 *
@@ -1840,9 +1880,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
1840 1880
1841static int __init devlink_module_init(void) 1881static int __init devlink_module_init(void)
1842{ 1882{
1843 return genl_register_family_with_ops_groups(&devlink_nl_family, 1883 return genl_register_family(&devlink_nl_family);
1844 devlink_nl_ops,
1845 devlink_nl_mcgrps);
1846} 1884}
1847 1885
1848static void __exit devlink_module_exit(void) 1886static void __exit devlink_module_exit(void)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 72cfb0c61125..8e0c0635ee97 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -59,12 +59,7 @@ struct dm_hw_stat_delta {
59 unsigned long last_drop_val; 59 unsigned long last_drop_val;
60}; 60};
61 61
62static struct genl_family net_drop_monitor_family = { 62static struct genl_family net_drop_monitor_family;
63 .id = GENL_ID_GENERATE,
64 .hdrsize = 0,
65 .name = "NET_DM",
66 .version = 2,
67};
68 63
69static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); 64static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
70 65
@@ -351,6 +346,17 @@ static const struct genl_ops dropmon_ops[] = {
351 }, 346 },
352}; 347};
353 348
349static struct genl_family net_drop_monitor_family __ro_after_init = {
350 .hdrsize = 0,
351 .name = "NET_DM",
352 .version = 2,
353 .module = THIS_MODULE,
354 .ops = dropmon_ops,
355 .n_ops = ARRAY_SIZE(dropmon_ops),
356 .mcgrps = dropmon_mcgrps,
357 .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps),
358};
359
354static struct notifier_block dropmon_net_notifier = { 360static struct notifier_block dropmon_net_notifier = {
355 .notifier_call = dropmon_net_event 361 .notifier_call = dropmon_net_event
356}; 362};
@@ -367,8 +373,7 @@ static int __init init_net_drop_monitor(void)
367 return -ENOSPC; 373 return -ENOSPC;
368 } 374 }
369 375
370 rc = genl_register_family_with_ops_groups(&net_drop_monitor_family, 376 rc = genl_register_family(&net_drop_monitor_family);
371 dropmon_ops, dropmon_mcgrps);
372 if (rc) { 377 if (rc) {
373 pr_err("Could not create drop monitor netlink family\n"); 378 pr_err("Could not create drop monitor netlink family\n");
374 return rc; 379 return rc;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 047a1752ece1..e23766c7e3ba 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -119,6 +119,12 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
119 [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", 119 [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
120}; 120};
121 121
122static const char
123phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
124 [ETHTOOL_ID_UNSPEC] = "Unspec",
125 [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
126};
127
122static int ethtool_get_features(struct net_device *dev, void __user *useraddr) 128static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
123{ 129{
124 struct ethtool_gfeatures cmd = { 130 struct ethtool_gfeatures cmd = {
@@ -227,6 +233,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
227 if (sset == ETH_SS_TUNABLES) 233 if (sset == ETH_SS_TUNABLES)
228 return ARRAY_SIZE(tunable_strings); 234 return ARRAY_SIZE(tunable_strings);
229 235
236 if (sset == ETH_SS_PHY_TUNABLES)
237 return ARRAY_SIZE(phy_tunable_strings);
238
230 if (sset == ETH_SS_PHY_STATS) { 239 if (sset == ETH_SS_PHY_STATS) {
231 if (dev->phydev) 240 if (dev->phydev)
232 return phy_get_sset_count(dev->phydev); 241 return phy_get_sset_count(dev->phydev);
@@ -253,6 +262,8 @@ static void __ethtool_get_strings(struct net_device *dev,
253 sizeof(rss_hash_func_strings)); 262 sizeof(rss_hash_func_strings));
254 else if (stringset == ETH_SS_TUNABLES) 263 else if (stringset == ETH_SS_TUNABLES)
255 memcpy(data, tunable_strings, sizeof(tunable_strings)); 264 memcpy(data, tunable_strings, sizeof(tunable_strings));
265 else if (stringset == ETH_SS_PHY_TUNABLES)
266 memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
256 else if (stringset == ETH_SS_PHY_STATS) { 267 else if (stringset == ETH_SS_PHY_STATS) {
257 struct phy_device *phydev = dev->phydev; 268 struct phy_device *phydev = dev->phydev;
258 269
@@ -2422,6 +2433,85 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
2422 }; 2433 };
2423} 2434}
2424 2435
2436static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
2437{
2438 switch (tuna->id) {
2439 case ETHTOOL_PHY_DOWNSHIFT:
2440 if (tuna->len != sizeof(u8) ||
2441 tuna->type_id != ETHTOOL_TUNABLE_U8)
2442 return -EINVAL;
2443 break;
2444 default:
2445 return -EINVAL;
2446 }
2447
2448 return 0;
2449}
2450
2451static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
2452{
2453 int ret;
2454 struct ethtool_tunable tuna;
2455 struct phy_device *phydev = dev->phydev;
2456 void *data;
2457
2458 if (!(phydev && phydev->drv && phydev->drv->get_tunable))
2459 return -EOPNOTSUPP;
2460
2461 if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
2462 return -EFAULT;
2463 ret = ethtool_phy_tunable_valid(&tuna);
2464 if (ret)
2465 return ret;
2466 data = kmalloc(tuna.len, GFP_USER);
2467 if (!data)
2468 return -ENOMEM;
2469 mutex_lock(&phydev->lock);
2470 ret = phydev->drv->get_tunable(phydev, &tuna, data);
2471 mutex_unlock(&phydev->lock);
2472 if (ret)
2473 goto out;
2474 useraddr += sizeof(tuna);
2475 ret = -EFAULT;
2476 if (copy_to_user(useraddr, data, tuna.len))
2477 goto out;
2478 ret = 0;
2479
2480out:
2481 kfree(data);
2482 return ret;
2483}
2484
2485static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
2486{
2487 int ret;
2488 struct ethtool_tunable tuna;
2489 struct phy_device *phydev = dev->phydev;
2490 void *data;
2491
2492 if (!(phydev && phydev->drv && phydev->drv->set_tunable))
2493 return -EOPNOTSUPP;
2494 if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
2495 return -EFAULT;
2496 ret = ethtool_phy_tunable_valid(&tuna);
2497 if (ret)
2498 return ret;
2499 data = kmalloc(tuna.len, GFP_USER);
2500 if (!data)
2501 return -ENOMEM;
2502 useraddr += sizeof(tuna);
2503 ret = -EFAULT;
2504 if (copy_from_user(data, useraddr, tuna.len))
2505 goto out;
2506 mutex_lock(&phydev->lock);
2507 ret = phydev->drv->set_tunable(phydev, &tuna, data);
2508 mutex_unlock(&phydev->lock);
2509
2510out:
2511 kfree(data);
2512 return ret;
2513}
2514
2425/* The main entry point in this file. Called from net/core/dev_ioctl.c */ 2515/* The main entry point in this file. Called from net/core/dev_ioctl.c */
2426 2516
2427int dev_ethtool(struct net *net, struct ifreq *ifr) 2517int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2479,6 +2569,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
2479 case ETHTOOL_GET_TS_INFO: 2569 case ETHTOOL_GET_TS_INFO:
2480 case ETHTOOL_GEEE: 2570 case ETHTOOL_GEEE:
2481 case ETHTOOL_GTUNABLE: 2571 case ETHTOOL_GTUNABLE:
2572 case ETHTOOL_PHY_GTUNABLE:
2482 case ETHTOOL_GLINKSETTINGS: 2573 case ETHTOOL_GLINKSETTINGS:
2483 break; 2574 break;
2484 default: 2575 default:
@@ -2685,6 +2776,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
2685 case ETHTOOL_SLINKSETTINGS: 2776 case ETHTOOL_SLINKSETTINGS:
2686 rc = ethtool_set_link_ksettings(dev, useraddr); 2777 rc = ethtool_set_link_ksettings(dev, useraddr);
2687 break; 2778 break;
2779 case ETHTOOL_PHY_GTUNABLE:
2780 rc = get_phy_tunable(dev, useraddr);
2781 break;
2782 case ETHTOOL_PHY_STUNABLE:
2783 rc = set_phy_tunable(dev, useraddr);
2784 break;
2688 default: 2785 default:
2689 rc = -EOPNOTSUPP; 2786 rc = -EOPNOTSUPP;
2690 } 2787 }
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index be4629c344a6..b6791d94841d 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -18,6 +18,11 @@
18#include <net/fib_rules.h> 18#include <net/fib_rules.h>
19#include <net/ip_tunnels.h> 19#include <net/ip_tunnels.h>
20 20
21static const struct fib_kuid_range fib_kuid_range_unset = {
22 KUIDT_INIT(0),
23 KUIDT_INIT(~0),
24};
25
21int fib_default_rule_add(struct fib_rules_ops *ops, 26int fib_default_rule_add(struct fib_rules_ops *ops,
22 u32 pref, u32 table, u32 flags) 27 u32 pref, u32 table, u32 flags)
23{ 28{
@@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
33 r->table = table; 38 r->table = table;
34 r->flags = flags; 39 r->flags = flags;
35 r->fr_net = ops->fro_net; 40 r->fr_net = ops->fro_net;
41 r->uid_range = fib_kuid_range_unset;
36 42
37 r->suppress_prefixlen = -1; 43 r->suppress_prefixlen = -1;
38 r->suppress_ifgroup = -1; 44 r->suppress_ifgroup = -1;
@@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
172} 178}
173EXPORT_SYMBOL_GPL(fib_rules_unregister); 179EXPORT_SYMBOL_GPL(fib_rules_unregister);
174 180
181static int uid_range_set(struct fib_kuid_range *range)
182{
183 return uid_valid(range->start) && uid_valid(range->end);
184}
185
186static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
187{
188 struct fib_rule_uid_range *in;
189 struct fib_kuid_range out;
190
191 in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);
192
193 out.start = make_kuid(current_user_ns(), in->start);
194 out.end = make_kuid(current_user_ns(), in->end);
195
196 return out;
197}
198
199static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
200{
201 struct fib_rule_uid_range out = {
202 from_kuid_munged(current_user_ns(), range->start),
203 from_kuid_munged(current_user_ns(), range->end)
204 };
205
206 return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
207}
208
175static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, 209static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
176 struct flowi *fl, int flags, 210 struct flowi *fl, int flags,
177 struct fib_lookup_arg *arg) 211 struct fib_lookup_arg *arg)
@@ -193,6 +227,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
193 if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) 227 if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
194 goto out; 228 goto out;
195 229
230 if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
231 uid_gt(fl->flowi_uid, rule->uid_range.end))
232 goto out;
233
196 ret = ops->match(rule, fl, flags); 234 ret = ops->match(rule, fl, flags);
197out: 235out:
198 return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; 236 return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -305,6 +343,10 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
305 if (r->l3mdev != rule->l3mdev) 343 if (r->l3mdev != rule->l3mdev)
306 continue; 344 continue;
307 345
346 if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
347 !uid_eq(r->uid_range.end, rule->uid_range.end))
348 continue;
349
308 if (!ops->compare(r, frh, tb)) 350 if (!ops->compare(r, frh, tb))
309 continue; 351 continue;
310 return 1; 352 return 1;
@@ -429,6 +471,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
429 if (rule->l3mdev && rule->table) 471 if (rule->l3mdev && rule->table)
430 goto errout_free; 472 goto errout_free;
431 473
474 if (tb[FRA_UID_RANGE]) {
475 if (current_user_ns() != net->user_ns) {
476 err = -EPERM;
477 goto errout_free;
478 }
479
480 rule->uid_range = nla_get_kuid_range(tb);
481
482 if (!uid_range_set(&rule->uid_range) ||
483 !uid_lte(rule->uid_range.start, rule->uid_range.end))
484 goto errout_free;
485 } else {
486 rule->uid_range = fib_kuid_range_unset;
487 }
488
432 if ((nlh->nlmsg_flags & NLM_F_EXCL) && 489 if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
433 rule_exists(ops, frh, tb, rule)) { 490 rule_exists(ops, frh, tb, rule)) {
434 err = -EEXIST; 491 err = -EEXIST;
@@ -497,6 +554,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
497 struct fib_rules_ops *ops = NULL; 554 struct fib_rules_ops *ops = NULL;
498 struct fib_rule *rule, *tmp; 555 struct fib_rule *rule, *tmp;
499 struct nlattr *tb[FRA_MAX+1]; 556 struct nlattr *tb[FRA_MAX+1];
557 struct fib_kuid_range range;
500 int err = -EINVAL; 558 int err = -EINVAL;
501 559
502 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 560 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
@@ -516,6 +574,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
516 if (err < 0) 574 if (err < 0)
517 goto errout; 575 goto errout;
518 576
577 if (tb[FRA_UID_RANGE]) {
578 range = nla_get_kuid_range(tb);
579 if (!uid_range_set(&range))
580 goto errout;
581 } else {
582 range = fib_kuid_range_unset;
583 }
584
519 list_for_each_entry(rule, &ops->rules_list, list) { 585 list_for_each_entry(rule, &ops->rules_list, list) {
520 if (frh->action && (frh->action != rule->action)) 586 if (frh->action && (frh->action != rule->action))
521 continue; 587 continue;
@@ -552,6 +618,11 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
552 (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) 618 (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
553 continue; 619 continue;
554 620
621 if (uid_range_set(&range) &&
622 (!uid_eq(rule->uid_range.start, range.start) ||
623 !uid_eq(rule->uid_range.end, range.end)))
624 continue;
625
555 if (!ops->compare(rule, frh, tb)) 626 if (!ops->compare(rule, frh, tb))
556 continue; 627 continue;
557 628
@@ -619,7 +690,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
619 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ 690 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
620 + nla_total_size(4) /* FRA_FWMARK */ 691 + nla_total_size(4) /* FRA_FWMARK */
621 + nla_total_size(4) /* FRA_FWMASK */ 692 + nla_total_size(4) /* FRA_FWMASK */
622 + nla_total_size_64bit(8); /* FRA_TUN_ID */ 693 + nla_total_size_64bit(8) /* FRA_TUN_ID */
694 + nla_total_size(sizeof(struct fib_kuid_range));
623 695
624 if (ops->nlmsg_payload) 696 if (ops->nlmsg_payload)
625 payload += ops->nlmsg_payload(rule); 697 payload += ops->nlmsg_payload(rule);
@@ -679,7 +751,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
679 (rule->tun_id && 751 (rule->tun_id &&
680 nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || 752 nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
681 (rule->l3mdev && 753 (rule->l3mdev &&
682 nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev))) 754 nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
755 (uid_range_set(&rule->uid_range) &&
756 nla_put_uid_range(skb, &rule->uid_range)))
683 goto nla_put_failure; 757 goto nla_put_failure;
684 758
685 if (rule->suppress_ifgroup != -1) { 759 if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index b391209838ef..e6c412b94dec 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -30,6 +30,7 @@
30#include <linux/inet.h> 30#include <linux/inet.h>
31#include <linux/netdevice.h> 31#include <linux/netdevice.h>
32#include <linux/if_packet.h> 32#include <linux/if_packet.h>
33#include <linux/if_arp.h>
33#include <linux/gfp.h> 34#include <linux/gfp.h>
34#include <net/ip.h> 35#include <net/ip.h>
35#include <net/protocol.h> 36#include <net/protocol.h>
@@ -39,7 +40,7 @@
39#include <net/flow_dissector.h> 40#include <net/flow_dissector.h>
40#include <linux/errno.h> 41#include <linux/errno.h>
41#include <linux/timer.h> 42#include <linux/timer.h>
42#include <asm/uaccess.h> 43#include <linux/uaccess.h>
43#include <asm/unaligned.h> 44#include <asm/unaligned.h>
44#include <linux/filter.h> 45#include <linux/filter.h>
45#include <linux/ratelimit.h> 46#include <linux/ratelimit.h>
@@ -78,6 +79,10 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
78 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) 79 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
79 return -ENOMEM; 80 return -ENOMEM;
80 81
82 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
83 if (err)
84 return err;
85
81 err = security_sock_rcv_skb(sk, skb); 86 err = security_sock_rcv_skb(sk, skb);
82 if (err) 87 if (err)
83 return err; 88 return err;
@@ -1684,6 +1689,12 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
1684static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, 1689static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
1685 u32 flags) 1690 u32 flags)
1686{ 1691{
1692 /* Verify that a link layer header is carried */
1693 if (unlikely(skb->mac_header >= skb->network_header)) {
1694 kfree_skb(skb);
1695 return -ERANGE;
1696 }
1697
1687 bpf_push_mac_rcsum(skb); 1698 bpf_push_mac_rcsum(skb);
1688 return flags & BPF_F_INGRESS ? 1699 return flags & BPF_F_INGRESS ?
1689 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); 1700 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
@@ -1692,17 +1703,10 @@ static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
1692static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, 1703static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
1693 u32 flags) 1704 u32 flags)
1694{ 1705{
1695 switch (dev->type) { 1706 if (dev_is_mac_header_xmit(dev))
1696 case ARPHRD_TUNNEL:
1697 case ARPHRD_TUNNEL6:
1698 case ARPHRD_SIT:
1699 case ARPHRD_IPGRE:
1700 case ARPHRD_VOID:
1701 case ARPHRD_NONE:
1702 return __bpf_redirect_no_mac(skb, dev, flags);
1703 default:
1704 return __bpf_redirect_common(skb, dev, flags); 1707 return __bpf_redirect_common(skb, dev, flags);
1705 } 1708 else
1709 return __bpf_redirect_no_mac(skb, dev, flags);
1706} 1710}
1707 1711
1708BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) 1712BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2190,16 +2194,79 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
2190 .arg3_type = ARG_ANYTHING, 2194 .arg3_type = ARG_ANYTHING,
2191}; 2195};
2192 2196
2193bool bpf_helper_changes_skb_data(void *func) 2197BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
2198 u64, flags)
2199{
2200 u32 max_len = __bpf_skb_max_len(skb);
2201 u32 new_len = skb->len + head_room;
2202 int ret;
2203
2204 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
2205 new_len < skb->len))
2206 return -EINVAL;
2207
2208 ret = skb_cow(skb, head_room);
2209 if (likely(!ret)) {
2210 /* Idea for this helper is that we currently only
2211 * allow to expand on mac header. This means that
2212 * skb->protocol network header, etc, stay as is.
2213 * Compared to bpf_skb_change_tail(), we're more
2214 * flexible due to not needing to linearize or
2215 * reset GSO. Intention for this helper is to be
2216 * used by an L3 skb that needs to push mac header
2217 * for redirection into L2 device.
2218 */
2219 __skb_push(skb, head_room);
2220 memset(skb->data, 0, head_room);
2221 skb_reset_mac_header(skb);
2222 }
2223
2224 bpf_compute_data_end(skb);
2225 return 0;
2226}
2227
2228static const struct bpf_func_proto bpf_skb_change_head_proto = {
2229 .func = bpf_skb_change_head,
2230 .gpl_only = false,
2231 .ret_type = RET_INTEGER,
2232 .arg1_type = ARG_PTR_TO_CTX,
2233 .arg2_type = ARG_ANYTHING,
2234 .arg3_type = ARG_ANYTHING,
2235};
2236
2237BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
2238{
2239 void *data = xdp->data + offset;
2240
2241 if (unlikely(data < xdp->data_hard_start ||
2242 data > xdp->data_end - ETH_HLEN))
2243 return -EINVAL;
2244
2245 xdp->data = data;
2246
2247 return 0;
2248}
2249
2250static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
2251 .func = bpf_xdp_adjust_head,
2252 .gpl_only = false,
2253 .ret_type = RET_INTEGER,
2254 .arg1_type = ARG_PTR_TO_CTX,
2255 .arg2_type = ARG_ANYTHING,
2256};
2257
2258bool bpf_helper_changes_pkt_data(void *func)
2194{ 2259{
2195 if (func == bpf_skb_vlan_push || 2260 if (func == bpf_skb_vlan_push ||
2196 func == bpf_skb_vlan_pop || 2261 func == bpf_skb_vlan_pop ||
2197 func == bpf_skb_store_bytes || 2262 func == bpf_skb_store_bytes ||
2198 func == bpf_skb_change_proto || 2263 func == bpf_skb_change_proto ||
2264 func == bpf_skb_change_head ||
2199 func == bpf_skb_change_tail || 2265 func == bpf_skb_change_tail ||
2200 func == bpf_skb_pull_data || 2266 func == bpf_skb_pull_data ||
2201 func == bpf_l3_csum_replace || 2267 func == bpf_l3_csum_replace ||
2202 func == bpf_l4_csum_replace) 2268 func == bpf_l4_csum_replace ||
2269 func == bpf_xdp_adjust_head)
2203 return true; 2270 return true;
2204 2271
2205 return false; 2272 return false;
@@ -2544,6 +2611,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
2544 return &bpf_get_prandom_u32_proto; 2611 return &bpf_get_prandom_u32_proto;
2545 case BPF_FUNC_get_smp_processor_id: 2612 case BPF_FUNC_get_smp_processor_id:
2546 return &bpf_get_raw_smp_processor_id_proto; 2613 return &bpf_get_raw_smp_processor_id_proto;
2614 case BPF_FUNC_get_numa_node_id:
2615 return &bpf_get_numa_node_id_proto;
2547 case BPF_FUNC_tail_call: 2616 case BPF_FUNC_tail_call:
2548 return &bpf_tail_call_proto; 2617 return &bpf_tail_call_proto;
2549 case BPF_FUNC_ktime_get_ns: 2618 case BPF_FUNC_ktime_get_ns:
@@ -2623,12 +2692,87 @@ xdp_func_proto(enum bpf_func_id func_id)
2623 return &bpf_xdp_event_output_proto; 2692 return &bpf_xdp_event_output_proto;
2624 case BPF_FUNC_get_smp_processor_id: 2693 case BPF_FUNC_get_smp_processor_id:
2625 return &bpf_get_smp_processor_id_proto; 2694 return &bpf_get_smp_processor_id_proto;
2695 case BPF_FUNC_xdp_adjust_head:
2696 return &bpf_xdp_adjust_head_proto;
2697 default:
2698 return sk_filter_func_proto(func_id);
2699 }
2700}
2701
2702static const struct bpf_func_proto *
2703cg_skb_func_proto(enum bpf_func_id func_id)
2704{
2705 switch (func_id) {
2706 case BPF_FUNC_skb_load_bytes:
2707 return &bpf_skb_load_bytes_proto;
2708 default:
2709 return sk_filter_func_proto(func_id);
2710 }
2711}
2712
2713static const struct bpf_func_proto *
2714lwt_inout_func_proto(enum bpf_func_id func_id)
2715{
2716 switch (func_id) {
2717 case BPF_FUNC_skb_load_bytes:
2718 return &bpf_skb_load_bytes_proto;
2719 case BPF_FUNC_skb_pull_data:
2720 return &bpf_skb_pull_data_proto;
2721 case BPF_FUNC_csum_diff:
2722 return &bpf_csum_diff_proto;
2723 case BPF_FUNC_get_cgroup_classid:
2724 return &bpf_get_cgroup_classid_proto;
2725 case BPF_FUNC_get_route_realm:
2726 return &bpf_get_route_realm_proto;
2727 case BPF_FUNC_get_hash_recalc:
2728 return &bpf_get_hash_recalc_proto;
2729 case BPF_FUNC_perf_event_output:
2730 return &bpf_skb_event_output_proto;
2731 case BPF_FUNC_get_smp_processor_id:
2732 return &bpf_get_smp_processor_id_proto;
2733 case BPF_FUNC_skb_under_cgroup:
2734 return &bpf_skb_under_cgroup_proto;
2626 default: 2735 default:
2627 return sk_filter_func_proto(func_id); 2736 return sk_filter_func_proto(func_id);
2628 } 2737 }
2629} 2738}
2630 2739
2631static bool __is_valid_access(int off, int size, enum bpf_access_type type) 2740static const struct bpf_func_proto *
2741lwt_xmit_func_proto(enum bpf_func_id func_id)
2742{
2743 switch (func_id) {
2744 case BPF_FUNC_skb_get_tunnel_key:
2745 return &bpf_skb_get_tunnel_key_proto;
2746 case BPF_FUNC_skb_set_tunnel_key:
2747 return bpf_get_skb_set_tunnel_proto(func_id);
2748 case BPF_FUNC_skb_get_tunnel_opt:
2749 return &bpf_skb_get_tunnel_opt_proto;
2750 case BPF_FUNC_skb_set_tunnel_opt:
2751 return bpf_get_skb_set_tunnel_proto(func_id);
2752 case BPF_FUNC_redirect:
2753 return &bpf_redirect_proto;
2754 case BPF_FUNC_clone_redirect:
2755 return &bpf_clone_redirect_proto;
2756 case BPF_FUNC_skb_change_tail:
2757 return &bpf_skb_change_tail_proto;
2758 case BPF_FUNC_skb_change_head:
2759 return &bpf_skb_change_head_proto;
2760 case BPF_FUNC_skb_store_bytes:
2761 return &bpf_skb_store_bytes_proto;
2762 case BPF_FUNC_csum_update:
2763 return &bpf_csum_update_proto;
2764 case BPF_FUNC_l3_csum_replace:
2765 return &bpf_l3_csum_replace_proto;
2766 case BPF_FUNC_l4_csum_replace:
2767 return &bpf_l4_csum_replace_proto;
2768 case BPF_FUNC_set_hash_invalid:
2769 return &bpf_set_hash_invalid_proto;
2770 default:
2771 return lwt_inout_func_proto(func_id);
2772 }
2773}
2774
2775static bool __is_valid_access(int off, int size)
2632{ 2776{
2633 if (off < 0 || off >= sizeof(struct __sk_buff)) 2777 if (off < 0 || off >= sizeof(struct __sk_buff))
2634 return false; 2778 return false;
@@ -2662,7 +2806,64 @@ static bool sk_filter_is_valid_access(int off, int size,
2662 } 2806 }
2663 } 2807 }
2664 2808
2665 return __is_valid_access(off, size, type); 2809 return __is_valid_access(off, size);
2810}
2811
2812static bool lwt_is_valid_access(int off, int size,
2813 enum bpf_access_type type,
2814 enum bpf_reg_type *reg_type)
2815{
2816 switch (off) {
2817 case offsetof(struct __sk_buff, tc_classid):
2818 return false;
2819 }
2820
2821 if (type == BPF_WRITE) {
2822 switch (off) {
2823 case offsetof(struct __sk_buff, mark):
2824 case offsetof(struct __sk_buff, priority):
2825 case offsetof(struct __sk_buff, cb[0]) ...
2826 offsetof(struct __sk_buff, cb[4]):
2827 break;
2828 default:
2829 return false;
2830 }
2831 }
2832
2833 switch (off) {
2834 case offsetof(struct __sk_buff, data):
2835 *reg_type = PTR_TO_PACKET;
2836 break;
2837 case offsetof(struct __sk_buff, data_end):
2838 *reg_type = PTR_TO_PACKET_END;
2839 break;
2840 }
2841
2842 return __is_valid_access(off, size);
2843}
2844
2845static bool sock_filter_is_valid_access(int off, int size,
2846 enum bpf_access_type type,
2847 enum bpf_reg_type *reg_type)
2848{
2849 if (type == BPF_WRITE) {
2850 switch (off) {
2851 case offsetof(struct bpf_sock, bound_dev_if):
2852 break;
2853 default:
2854 return false;
2855 }
2856 }
2857
2858 if (off < 0 || off + size > sizeof(struct bpf_sock))
2859 return false;
2860 /* The verifier guarantees that size > 0. */
2861 if (off % size != 0)
2862 return false;
2863 if (size != sizeof(__u32))
2864 return false;
2865
2866 return true;
2666} 2867}
2667 2868
2668static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, 2869static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -2731,11 +2932,10 @@ static bool tc_cls_act_is_valid_access(int off, int size,
2731 break; 2932 break;
2732 } 2933 }
2733 2934
2734 return __is_valid_access(off, size, type); 2935 return __is_valid_access(off, size);
2735} 2936}
2736 2937
2737static bool __is_valid_xdp_access(int off, int size, 2938static bool __is_valid_xdp_access(int off, int size)
2738 enum bpf_access_type type)
2739{ 2939{
2740 if (off < 0 || off >= sizeof(struct xdp_md)) 2940 if (off < 0 || off >= sizeof(struct xdp_md))
2741 return false; 2941 return false;
@@ -2763,7 +2963,7 @@ static bool xdp_is_valid_access(int off, int size,
2763 break; 2963 break;
2764 } 2964 }
2765 2965
2766 return __is_valid_xdp_access(off, size, type); 2966 return __is_valid_xdp_access(off, size);
2767} 2967}
2768 2968
2769void bpf_warn_invalid_xdp_action(u32 act) 2969void bpf_warn_invalid_xdp_action(u32 act)
@@ -2772,6 +2972,12 @@ void bpf_warn_invalid_xdp_action(u32 act)
2772} 2972}
2773EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 2973EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
2774 2974
2975void bpf_warn_invalid_xdp_buffer(void)
2976{
2977 WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n");
2978}
2979EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer);
2980
2775static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, 2981static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
2776 int src_reg, int ctx_off, 2982 int src_reg, int ctx_off,
2777 struct bpf_insn *insn_buf, 2983 struct bpf_insn *insn_buf,
@@ -2923,6 +3129,51 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
2923 return insn - insn_buf; 3129 return insn - insn_buf;
2924} 3130}
2925 3131
3132static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
3133 int dst_reg, int src_reg,
3134 int ctx_off,
3135 struct bpf_insn *insn_buf,
3136 struct bpf_prog *prog)
3137{
3138 struct bpf_insn *insn = insn_buf;
3139
3140 switch (ctx_off) {
3141 case offsetof(struct bpf_sock, bound_dev_if):
3142 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
3143
3144 if (type == BPF_WRITE)
3145 *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
3146 offsetof(struct sock, sk_bound_dev_if));
3147 else
3148 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
3149 offsetof(struct sock, sk_bound_dev_if));
3150 break;
3151
3152 case offsetof(struct bpf_sock, family):
3153 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
3154
3155 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
3156 offsetof(struct sock, sk_family));
3157 break;
3158
3159 case offsetof(struct bpf_sock, type):
3160 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
3161 offsetof(struct sock, __sk_flags_offset));
3162 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK);
3163 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT);
3164 break;
3165
3166 case offsetof(struct bpf_sock, protocol):
3167 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
3168 offsetof(struct sock, __sk_flags_offset));
3169 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK);
3170 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT);
3171 break;
3172 }
3173
3174 return insn - insn_buf;
3175}
3176
2926static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg, 3177static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
2927 int src_reg, int ctx_off, 3178 int src_reg, int ctx_off,
2928 struct bpf_insn *insn_buf, 3179 struct bpf_insn *insn_buf,
@@ -2990,6 +3241,31 @@ static const struct bpf_verifier_ops xdp_ops = {
2990 .convert_ctx_access = xdp_convert_ctx_access, 3241 .convert_ctx_access = xdp_convert_ctx_access,
2991}; 3242};
2992 3243
3244static const struct bpf_verifier_ops cg_skb_ops = {
3245 .get_func_proto = cg_skb_func_proto,
3246 .is_valid_access = sk_filter_is_valid_access,
3247 .convert_ctx_access = sk_filter_convert_ctx_access,
3248};
3249
3250static const struct bpf_verifier_ops lwt_inout_ops = {
3251 .get_func_proto = lwt_inout_func_proto,
3252 .is_valid_access = lwt_is_valid_access,
3253 .convert_ctx_access = sk_filter_convert_ctx_access,
3254};
3255
3256static const struct bpf_verifier_ops lwt_xmit_ops = {
3257 .get_func_proto = lwt_xmit_func_proto,
3258 .is_valid_access = lwt_is_valid_access,
3259 .convert_ctx_access = sk_filter_convert_ctx_access,
3260 .gen_prologue = tc_cls_act_prologue,
3261};
3262
3263static const struct bpf_verifier_ops cg_sock_ops = {
3264 .get_func_proto = sk_filter_func_proto,
3265 .is_valid_access = sock_filter_is_valid_access,
3266 .convert_ctx_access = sock_filter_convert_ctx_access,
3267};
3268
2993static struct bpf_prog_type_list sk_filter_type __read_mostly = { 3269static struct bpf_prog_type_list sk_filter_type __read_mostly = {
2994 .ops = &sk_filter_ops, 3270 .ops = &sk_filter_ops,
2995 .type = BPF_PROG_TYPE_SOCKET_FILTER, 3271 .type = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3010,12 +3286,42 @@ static struct bpf_prog_type_list xdp_type __read_mostly = {
3010 .type = BPF_PROG_TYPE_XDP, 3286 .type = BPF_PROG_TYPE_XDP,
3011}; 3287};
3012 3288
3289static struct bpf_prog_type_list cg_skb_type __read_mostly = {
3290 .ops = &cg_skb_ops,
3291 .type = BPF_PROG_TYPE_CGROUP_SKB,
3292};
3293
3294static struct bpf_prog_type_list lwt_in_type __read_mostly = {
3295 .ops = &lwt_inout_ops,
3296 .type = BPF_PROG_TYPE_LWT_IN,
3297};
3298
3299static struct bpf_prog_type_list lwt_out_type __read_mostly = {
3300 .ops = &lwt_inout_ops,
3301 .type = BPF_PROG_TYPE_LWT_OUT,
3302};
3303
3304static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
3305 .ops = &lwt_xmit_ops,
3306 .type = BPF_PROG_TYPE_LWT_XMIT,
3307};
3308
3309static struct bpf_prog_type_list cg_sock_type __read_mostly = {
3310 .ops = &cg_sock_ops,
3311 .type = BPF_PROG_TYPE_CGROUP_SOCK
3312};
3313
3013static int __init register_sk_filter_ops(void) 3314static int __init register_sk_filter_ops(void)
3014{ 3315{
3015 bpf_register_prog_type(&sk_filter_type); 3316 bpf_register_prog_type(&sk_filter_type);
3016 bpf_register_prog_type(&sched_cls_type); 3317 bpf_register_prog_type(&sched_cls_type);
3017 bpf_register_prog_type(&sched_act_type); 3318 bpf_register_prog_type(&sched_act_type);
3018 bpf_register_prog_type(&xdp_type); 3319 bpf_register_prog_type(&xdp_type);
3320 bpf_register_prog_type(&cg_skb_type);
3321 bpf_register_prog_type(&cg_sock_type);
3322 bpf_register_prog_type(&lwt_in_type);
3323 bpf_register_prog_type(&lwt_out_type);
3324 bpf_register_prog_type(&lwt_xmit_type);
3019 3325
3020 return 0; 3326 return 0;
3021} 3327}
diff --git a/net/core/flow.c b/net/core/flow.c
index 18e8893d4be5..f765c11d8df5 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -417,28 +417,20 @@ static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
417 return 0; 417 return 0;
418} 418}
419 419
420static int flow_cache_cpu(struct notifier_block *nfb, 420static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
421 unsigned long action,
422 void *hcpu)
423{ 421{
424 struct flow_cache *fc = container_of(nfb, struct flow_cache, 422 struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
425 hotcpu_notifier); 423
426 int res, cpu = (unsigned long) hcpu; 424 return flow_cache_cpu_prepare(fc, cpu);
425}
426
427static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
428{
429 struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
427 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 430 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
428 431
429 switch (action) { 432 __flow_cache_shrink(fc, fcp, 0);
430 case CPU_UP_PREPARE: 433 return 0;
431 case CPU_UP_PREPARE_FROZEN:
432 res = flow_cache_cpu_prepare(fc, cpu);
433 if (res)
434 return notifier_from_errno(res);
435 break;
436 case CPU_DEAD:
437 case CPU_DEAD_FROZEN:
438 __flow_cache_shrink(fc, fcp, 0);
439 break;
440 }
441 return NOTIFY_OK;
442} 434}
443 435
444int flow_cache_init(struct net *net) 436int flow_cache_init(struct net *net)
@@ -465,18 +457,8 @@ int flow_cache_init(struct net *net)
465 if (!fc->percpu) 457 if (!fc->percpu)
466 return -ENOMEM; 458 return -ENOMEM;
467 459
468 cpu_notifier_register_begin(); 460 if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
469 461 goto err;
470 for_each_online_cpu(i) {
471 if (flow_cache_cpu_prepare(fc, i))
472 goto err;
473 }
474 fc->hotcpu_notifier = (struct notifier_block){
475 .notifier_call = flow_cache_cpu,
476 };
477 __register_hotcpu_notifier(&fc->hotcpu_notifier);
478
479 cpu_notifier_register_done();
480 462
481 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, 463 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
482 (unsigned long) fc); 464 (unsigned long) fc);
@@ -492,8 +474,6 @@ err:
492 fcp->hash_table = NULL; 474 fcp->hash_table = NULL;
493 } 475 }
494 476
495 cpu_notifier_register_done();
496
497 free_percpu(fc->percpu); 477 free_percpu(fc->percpu);
498 fc->percpu = NULL; 478 fc->percpu = NULL;
499 479
@@ -507,7 +487,8 @@ void flow_cache_fini(struct net *net)
507 struct flow_cache *fc = &net->xfrm.flow_cache_global; 487 struct flow_cache *fc = &net->xfrm.flow_cache_global;
508 488
509 del_timer_sync(&fc->rnd_timer); 489 del_timer_sync(&fc->rnd_timer);
510 unregister_hotcpu_notifier(&fc->hotcpu_notifier); 490
491 cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
511 492
512 for_each_possible_cpu(i) { 493 for_each_possible_cpu(i) {
513 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); 494 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
@@ -519,3 +500,14 @@ void flow_cache_fini(struct net *net)
519 fc->percpu = NULL; 500 fc->percpu = NULL;
520} 501}
521EXPORT_SYMBOL(flow_cache_fini); 502EXPORT_SYMBOL(flow_cache_fini);
503
504void __init flow_cache_hp_init(void)
505{
506 int ret;
507
508 ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
509 "net/flow:prepare",
510 flow_cache_cpu_up_prep,
511 flow_cache_cpu_dead);
512 WARN_ON(ret < 0);
513}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c6d8207ffa7e..d6447dc10371 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -58,6 +58,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
58EXPORT_SYMBOL(skb_flow_dissector_init); 58EXPORT_SYMBOL(skb_flow_dissector_init);
59 59
60/** 60/**
61 * skb_flow_get_be16 - extract be16 entity
62 * @skb: sk_buff to extract from
63 * @poff: offset to extract at
64 * @data: raw buffer pointer to the packet
65 * @hlen: packet header length
66 *
67 * The function will try to retrieve a be32 entity at
68 * offset poff
69 */
70__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data,
71 int hlen)
72{
73 __be16 *u, _u;
74
75 u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
76 if (u)
77 return *u;
78
79 return 0;
80}
81
82/**
61 * __skb_flow_get_ports - extract the upper layer ports and return them 83 * __skb_flow_get_ports - extract the upper layer ports and return them
62 * @skb: sk_buff to extract the ports from 84 * @skb: sk_buff to extract the ports from
63 * @thoff: transport header offset 85 * @thoff: transport header offset
@@ -117,6 +139,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
117 struct flow_dissector_key_basic *key_basic; 139 struct flow_dissector_key_basic *key_basic;
118 struct flow_dissector_key_addrs *key_addrs; 140 struct flow_dissector_key_addrs *key_addrs;
119 struct flow_dissector_key_ports *key_ports; 141 struct flow_dissector_key_ports *key_ports;
142 struct flow_dissector_key_icmp *key_icmp;
120 struct flow_dissector_key_tags *key_tags; 143 struct flow_dissector_key_tags *key_tags;
121 struct flow_dissector_key_vlan *key_vlan; 144 struct flow_dissector_key_vlan *key_vlan;
122 struct flow_dissector_key_keyid *key_keyid; 145 struct flow_dissector_key_keyid *key_keyid;
@@ -546,6 +569,14 @@ ip_proto_again:
546 data, hlen); 569 data, hlen);
547 } 570 }
548 571
572 if (dissector_uses_key(flow_dissector,
573 FLOW_DISSECTOR_KEY_ICMP)) {
574 key_icmp = skb_flow_dissector_target(flow_dissector,
575 FLOW_DISSECTOR_KEY_ICMP,
576 target_container);
577 key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
578 }
579
549out_good: 580out_good:
550 ret = true; 581 ret = true;
551 582
@@ -726,7 +757,7 @@ EXPORT_SYMBOL(make_flow_keys_digest);
726 757
727static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; 758static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
728 759
729u32 __skb_get_hash_symmetric(struct sk_buff *skb) 760u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
730{ 761{
731 struct flow_keys keys; 762 struct flow_keys keys;
732 763
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cad8e791f28e..0385dece1f6f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -7,13 +7,14 @@
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 * 8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Eric Dumazet <edumazet@google.com>
10 * 11 *
11 * Changes: 12 * Changes:
12 * Jamal Hadi Salim - moved it to net/core and reshulfed 13 * Jamal Hadi Salim - moved it to net/core and reshulfed
13 * names to make it usable in general net subsystem. 14 * names to make it usable in general net subsystem.
14 */ 15 */
15 16
16#include <asm/uaccess.h> 17#include <linux/uaccess.h>
17#include <linux/bitops.h> 18#include <linux/bitops.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/types.h> 20#include <linux/types.h>
@@ -30,165 +31,79 @@
30#include <linux/skbuff.h> 31#include <linux/skbuff.h>
31#include <linux/rtnetlink.h> 32#include <linux/rtnetlink.h>
32#include <linux/init.h> 33#include <linux/init.h>
33#include <linux/rbtree.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/seqlock.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/gen_stats.h> 37#include <net/gen_stats.h>
37 38
38/* 39/* This code is NOT intended to be used for statistics collection,
39 This code is NOT intended to be used for statistics collection, 40 * its purpose is to provide a base for statistical multiplexing
40 its purpose is to provide a base for statistical multiplexing 41 * for controlled load service.
41 for controlled load service. 42 * If you need only statistics, run a user level daemon which
42 If you need only statistics, run a user level daemon which 43 * periodically reads byte counters.
43 periodically reads byte counters.
44
45 Unfortunately, rate estimation is not a very easy task.
46 F.e. I did not find a simple way to estimate the current peak rate
47 and even failed to formulate the problem 8)8)
48
49 So I preferred not to built an estimator into the scheduler,
50 but run this task separately.
51 Ideally, it should be kernel thread(s), but for now it runs
52 from timers, which puts apparent top bounds on the number of rated
53 flows, has minimal overhead on small, but is enough
54 to handle controlled load service, sets of aggregates.
55
56 We measure rate over A=(1<<interval) seconds and evaluate EWMA:
57
58 avrate = avrate*(1-W) + rate*W
59
60 where W is chosen as negative power of 2: W = 2^(-ewma_log)
61
62 The resulting time constant is:
63
64 T = A/(-ln(1-W))
65
66
67 NOTES.
68
69 * avbps and avpps are scaled by 2^5.
70 * both values are reported as 32 bit unsigned values. bps can
71 overflow for fast links : max speed being 34360Mbit/sec
72 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
73 for HZ=100 and HZ=1024 8)), maximal interval
74 is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
75 are too expensive, longer ones can be implemented
76 at user level painlessly.
77 */ 44 */
78 45
79#define EST_MAX_INTERVAL 5 46struct net_rate_estimator {
80
81struct gen_estimator
82{
83 struct list_head list;
84 struct gnet_stats_basic_packed *bstats; 47 struct gnet_stats_basic_packed *bstats;
85 struct gnet_stats_rate_est64 *rate_est;
86 spinlock_t *stats_lock; 48 spinlock_t *stats_lock;
87 seqcount_t *running; 49 seqcount_t *running;
88 int ewma_log; 50 struct gnet_stats_basic_cpu __percpu *cpu_bstats;
51 u8 ewma_log;
52 u8 intvl_log; /* period : (250ms << intvl_log) */
53
54 seqcount_t seq;
89 u32 last_packets; 55 u32 last_packets;
90 unsigned long avpps;
91 u64 last_bytes; 56 u64 last_bytes;
57
58 u64 avpps;
92 u64 avbps; 59 u64 avbps;
93 struct rcu_head e_rcu;
94 struct rb_node node;
95 struct gnet_stats_basic_cpu __percpu *cpu_bstats;
96 struct rcu_head head;
97};
98 60
99struct gen_estimator_head 61 unsigned long next_jiffies;
100{ 62 struct timer_list timer;
101 struct timer_list timer; 63 struct rcu_head rcu;
102 struct list_head list;
103}; 64};
104 65
105static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; 66static void est_fetch_counters(struct net_rate_estimator *e,
106 67 struct gnet_stats_basic_packed *b)
107/* Protects against NULL dereference */
108static DEFINE_RWLOCK(est_lock);
109
110/* Protects against soft lockup during large deletion */
111static struct rb_root est_root = RB_ROOT;
112static DEFINE_SPINLOCK(est_tree_lock);
113
114static void est_timer(unsigned long arg)
115{ 68{
116 int idx = (int)arg; 69 if (e->stats_lock)
117 struct gen_estimator *e; 70 spin_lock(e->stats_lock);
118 71
119 rcu_read_lock(); 72 __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
120 list_for_each_entry_rcu(e, &elist[idx].list, list) { 73
121 struct gnet_stats_basic_packed b = {0}; 74 if (e->stats_lock)
122 unsigned long rate; 75 spin_unlock(e->stats_lock);
123 u64 brate;
124
125 if (e->stats_lock)
126 spin_lock(e->stats_lock);
127 read_lock(&est_lock);
128 if (e->bstats == NULL)
129 goto skip;
130
131 __gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats);
132
133 brate = (b.bytes - e->last_bytes)<<(7 - idx);
134 e->last_bytes = b.bytes;
135 e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
136 WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5);
137
138 rate = b.packets - e->last_packets;
139 rate <<= (7 - idx);
140 e->last_packets = b.packets;
141 e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
142 WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5);
143skip:
144 read_unlock(&est_lock);
145 if (e->stats_lock)
146 spin_unlock(e->stats_lock);
147 }
148 76
149 if (!list_empty(&elist[idx].list))
150 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
151 rcu_read_unlock();
152} 77}
153 78
154static void gen_add_node(struct gen_estimator *est) 79static void est_timer(unsigned long arg)
155{ 80{
156 struct rb_node **p = &est_root.rb_node, *parent = NULL; 81 struct net_rate_estimator *est = (struct net_rate_estimator *)arg;
82 struct gnet_stats_basic_packed b;
83 u64 rate, brate;
157 84
158 while (*p) { 85 est_fetch_counters(est, &b);
159 struct gen_estimator *e; 86 brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log);
87 brate -= (est->avbps >> est->ewma_log);
160 88
161 parent = *p; 89 rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log);
162 e = rb_entry(parent, struct gen_estimator, node); 90 rate -= (est->avpps >> est->ewma_log);
163 91
164 if (est->bstats > e->bstats) 92 write_seqcount_begin(&est->seq);
165 p = &parent->rb_right; 93 est->avbps += brate;
166 else 94 est->avpps += rate;
167 p = &parent->rb_left; 95 write_seqcount_end(&est->seq);
168 }
169 rb_link_node(&est->node, parent, p);
170 rb_insert_color(&est->node, &est_root);
171}
172 96
173static 97 est->last_bytes = b.bytes;
174struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, 98 est->last_packets = b.packets;
175 const struct gnet_stats_rate_est64 *rate_est)
176{
177 struct rb_node *p = est_root.rb_node;
178
179 while (p) {
180 struct gen_estimator *e;
181 99
182 e = rb_entry(p, struct gen_estimator, node); 100 est->next_jiffies += ((HZ/4) << est->intvl_log);
183 101
184 if (bstats > e->bstats) 102 if (unlikely(time_after_eq(jiffies, est->next_jiffies))) {
185 p = p->rb_right; 103 /* Ouch... timer was delayed. */
186 else if (bstats < e->bstats || rate_est != e->rate_est) 104 est->next_jiffies = jiffies + 1;
187 p = p->rb_left;
188 else
189 return e;
190 } 105 }
191 return NULL; 106 mod_timer(&est->timer, est->next_jiffies);
192} 107}
193 108
194/** 109/**
@@ -211,83 +126,76 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
211 */ 126 */
212int gen_new_estimator(struct gnet_stats_basic_packed *bstats, 127int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
213 struct gnet_stats_basic_cpu __percpu *cpu_bstats, 128 struct gnet_stats_basic_cpu __percpu *cpu_bstats,
214 struct gnet_stats_rate_est64 *rate_est, 129 struct net_rate_estimator __rcu **rate_est,
215 spinlock_t *stats_lock, 130 spinlock_t *stats_lock,
216 seqcount_t *running, 131 seqcount_t *running,
217 struct nlattr *opt) 132 struct nlattr *opt)
218{ 133{
219 struct gen_estimator *est;
220 struct gnet_estimator *parm = nla_data(opt); 134 struct gnet_estimator *parm = nla_data(opt);
221 struct gnet_stats_basic_packed b = {0}; 135 struct net_rate_estimator *old, *est;
222 int idx; 136 struct gnet_stats_basic_packed b;
137 int intvl_log;
223 138
224 if (nla_len(opt) < sizeof(*parm)) 139 if (nla_len(opt) < sizeof(*parm))
225 return -EINVAL; 140 return -EINVAL;
226 141
142 /* allowed timer periods are :
143 * -2 : 250ms, -1 : 500ms, 0 : 1 sec
144 * 1 : 2 sec, 2 : 4 sec, 3 : 8 sec
145 */
227 if (parm->interval < -2 || parm->interval > 3) 146 if (parm->interval < -2 || parm->interval > 3)
228 return -EINVAL; 147 return -EINVAL;
229 148
230 est = kzalloc(sizeof(*est), GFP_KERNEL); 149 est = kzalloc(sizeof(*est), GFP_KERNEL);
231 if (est == NULL) 150 if (!est)
232 return -ENOBUFS; 151 return -ENOBUFS;
233 152
234 __gnet_stats_copy_basic(running, &b, cpu_bstats, bstats); 153 seqcount_init(&est->seq);
235 154 intvl_log = parm->interval + 2;
236 idx = parm->interval + 2;
237 est->bstats = bstats; 155 est->bstats = bstats;
238 est->rate_est = rate_est;
239 est->stats_lock = stats_lock; 156 est->stats_lock = stats_lock;
240 est->running = running; 157 est->running = running;
241 est->ewma_log = parm->ewma_log; 158 est->ewma_log = parm->ewma_log;
242 est->last_bytes = b.bytes; 159 est->intvl_log = intvl_log;
243 est->avbps = rate_est->bps<<5;
244 est->last_packets = b.packets;
245 est->avpps = rate_est->pps<<10;
246 est->cpu_bstats = cpu_bstats; 160 est->cpu_bstats = cpu_bstats;
247 161
248 spin_lock_bh(&est_tree_lock); 162 est_fetch_counters(est, &b);
249 if (!elist[idx].timer.function) { 163 est->last_bytes = b.bytes;
250 INIT_LIST_HEAD(&elist[idx].list); 164 est->last_packets = b.packets;
251 setup_timer(&elist[idx].timer, est_timer, idx); 165 old = rcu_dereference_protected(*rate_est, 1);
166 if (old) {
167 del_timer_sync(&old->timer);
168 est->avbps = old->avbps;
169 est->avpps = old->avpps;
252 } 170 }
253 171
254 if (list_empty(&elist[idx].list)) 172 est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
255 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx)); 173 setup_timer(&est->timer, est_timer, (unsigned long)est);
256 174 mod_timer(&est->timer, est->next_jiffies);
257 list_add_rcu(&est->list, &elist[idx].list);
258 gen_add_node(est);
259 spin_unlock_bh(&est_tree_lock);
260 175
176 rcu_assign_pointer(*rate_est, est);
177 if (old)
178 kfree_rcu(old, rcu);
261 return 0; 179 return 0;
262} 180}
263EXPORT_SYMBOL(gen_new_estimator); 181EXPORT_SYMBOL(gen_new_estimator);
264 182
265/** 183/**
266 * gen_kill_estimator - remove a rate estimator 184 * gen_kill_estimator - remove a rate estimator
267 * @bstats: basic statistics 185 * @rate_est: rate estimator
268 * @rate_est: rate estimator statistics
269 * 186 *
270 * Removes the rate estimator specified by &bstats and &rate_est. 187 * Removes the rate estimator.
271 * 188 *
272 * Note : Caller should respect an RCU grace period before freeing stats_lock
273 */ 189 */
274void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, 190void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est)
275 struct gnet_stats_rate_est64 *rate_est)
276{ 191{
277 struct gen_estimator *e; 192 struct net_rate_estimator *est;
278
279 spin_lock_bh(&est_tree_lock);
280 while ((e = gen_find_node(bstats, rate_est))) {
281 rb_erase(&e->node, &est_root);
282 193
283 write_lock(&est_lock); 194 est = xchg((__force struct net_rate_estimator **)rate_est, NULL);
284 e->bstats = NULL; 195 if (est) {
285 write_unlock(&est_lock); 196 del_timer_sync(&est->timer);
286 197 kfree_rcu(est, rcu);
287 list_del_rcu(&e->list);
288 kfree_rcu(e, e_rcu);
289 } 198 }
290 spin_unlock_bh(&est_tree_lock);
291} 199}
292EXPORT_SYMBOL(gen_kill_estimator); 200EXPORT_SYMBOL(gen_kill_estimator);
293 201
@@ -307,33 +215,47 @@ EXPORT_SYMBOL(gen_kill_estimator);
307 */ 215 */
308int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, 216int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
309 struct gnet_stats_basic_cpu __percpu *cpu_bstats, 217 struct gnet_stats_basic_cpu __percpu *cpu_bstats,
310 struct gnet_stats_rate_est64 *rate_est, 218 struct net_rate_estimator __rcu **rate_est,
311 spinlock_t *stats_lock, 219 spinlock_t *stats_lock,
312 seqcount_t *running, struct nlattr *opt) 220 seqcount_t *running, struct nlattr *opt)
313{ 221{
314 gen_kill_estimator(bstats, rate_est); 222 return gen_new_estimator(bstats, cpu_bstats, rate_est,
315 return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt); 223 stats_lock, running, opt);
316} 224}
317EXPORT_SYMBOL(gen_replace_estimator); 225EXPORT_SYMBOL(gen_replace_estimator);
318 226
319/** 227/**
320 * gen_estimator_active - test if estimator is currently in use 228 * gen_estimator_active - test if estimator is currently in use
321 * @bstats: basic statistics 229 * @rate_est: rate estimator
322 * @rate_est: rate estimator statistics
323 * 230 *
324 * Returns true if estimator is active, and false if not. 231 * Returns true if estimator is active, and false if not.
325 */ 232 */
326bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, 233bool gen_estimator_active(struct net_rate_estimator __rcu **rate_est)
327 const struct gnet_stats_rate_est64 *rate_est)
328{ 234{
329 bool res; 235 return !!rcu_access_pointer(*rate_est);
236}
237EXPORT_SYMBOL(gen_estimator_active);
330 238
331 ASSERT_RTNL(); 239bool gen_estimator_read(struct net_rate_estimator __rcu **rate_est,
240 struct gnet_stats_rate_est64 *sample)
241{
242 struct net_rate_estimator *est;
243 unsigned seq;
244
245 rcu_read_lock();
246 est = rcu_dereference(*rate_est);
247 if (!est) {
248 rcu_read_unlock();
249 return false;
250 }
332 251
333 spin_lock_bh(&est_tree_lock); 252 do {
334 res = gen_find_node(bstats, rate_est) != NULL; 253 seq = read_seqcount_begin(&est->seq);
335 spin_unlock_bh(&est_tree_lock); 254 sample->bps = est->avbps >> 8;
255 sample->pps = est->avpps >> 8;
256 } while (read_seqcount_retry(&est->seq, seq));
336 257
337 return res; 258 rcu_read_unlock();
259 return true;
338} 260}
339EXPORT_SYMBOL(gen_estimator_active); 261EXPORT_SYMBOL(gen_estimator_read);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 508e051304fb..87f28557b329 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -194,8 +194,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
194/** 194/**
195 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV 195 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
196 * @d: dumping handle 196 * @d: dumping handle
197 * @b: basic statistics 197 * @rate_est: rate estimator
198 * @r: rate estimator statistics
199 * 198 *
200 * Appends the rate estimator statistics to the top level TLV created by 199 * Appends the rate estimator statistics to the top level TLV created by
201 * gnet_stats_start_copy(). 200 * gnet_stats_start_copy().
@@ -205,18 +204,17 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
205 */ 204 */
206int 205int
207gnet_stats_copy_rate_est(struct gnet_dump *d, 206gnet_stats_copy_rate_est(struct gnet_dump *d,
208 const struct gnet_stats_basic_packed *b, 207 struct net_rate_estimator __rcu **rate_est)
209 struct gnet_stats_rate_est64 *r)
210{ 208{
209 struct gnet_stats_rate_est64 sample;
211 struct gnet_stats_rate_est est; 210 struct gnet_stats_rate_est est;
212 int res; 211 int res;
213 212
214 if (b && !gen_estimator_active(b, r)) 213 if (!gen_estimator_read(rate_est, &sample))
215 return 0; 214 return 0;
216 215 est.bps = min_t(u64, UINT_MAX, sample.bps);
217 est.bps = min_t(u64, UINT_MAX, r->bps);
218 /* we have some time before reaching 2^32 packets per second */ 216 /* we have some time before reaching 2^32 packets per second */
219 est.pps = r->pps; 217 est.pps = sample.pps;
220 218
221 if (d->compat_tc_stats) { 219 if (d->compat_tc_stats) {
222 d->tc_stats.bps = est.bps; 220 d->tc_stats.bps = est.bps;
@@ -226,11 +224,11 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
226 if (d->tail) { 224 if (d->tail) {
227 res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est), 225 res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est),
228 TCA_STATS_PAD); 226 TCA_STATS_PAD);
229 if (res < 0 || est.bps == r->bps) 227 if (res < 0 || est.bps == sample.bps)
230 return res; 228 return res;
231 /* emit 64bit stats only if needed */ 229 /* emit 64bit stats only if needed */
232 return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r), 230 return gnet_stats_copy(d, TCA_STATS_RATE_EST64, &sample,
233 TCA_STATS_PAD); 231 sizeof(sample), TCA_STATS_PAD);
234 } 232 }
235 233
236 return 0; 234 return 0;
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
new file mode 100644
index 000000000000..71bb3e2eca08
--- /dev/null
+++ b/net/core/lwt_bpf.c
@@ -0,0 +1,396 @@
1/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/skbuff.h>
16#include <linux/types.h>
17#include <linux/bpf.h>
18#include <net/lwtunnel.h>
19
20struct bpf_lwt_prog {
21 struct bpf_prog *prog;
22 char *name;
23};
24
25struct bpf_lwt {
26 struct bpf_lwt_prog in;
27 struct bpf_lwt_prog out;
28 struct bpf_lwt_prog xmit;
29 int family;
30};
31
32#define MAX_PROG_NAME 256
33
34static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt)
35{
36 return (struct bpf_lwt *)lwt->data;
37}
38
39#define NO_REDIRECT false
40#define CAN_REDIRECT true
41
42static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
43 struct dst_entry *dst, bool can_redirect)
44{
45 int ret;
46
47 /* Preempt disable is needed to protect per-cpu redirect_info between
48 * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and
49 * access to maps strictly require a rcu_read_lock() for protection,
50 * mixing with BH RCU lock doesn't work.
51 */
52 preempt_disable();
53 rcu_read_lock();
54 bpf_compute_data_end(skb);
55 ret = bpf_prog_run_save_cb(lwt->prog, skb);
56 rcu_read_unlock();
57
58 switch (ret) {
59 case BPF_OK:
60 break;
61
62 case BPF_REDIRECT:
63 if (unlikely(!can_redirect)) {
64 pr_warn_once("Illegal redirect return code in prog %s\n",
65 lwt->name ? : "<unknown>");
66 ret = BPF_OK;
67 } else {
68 ret = skb_do_redirect(skb);
69 if (ret == 0)
70 ret = BPF_REDIRECT;
71 }
72 break;
73
74 case BPF_DROP:
75 kfree_skb(skb);
76 ret = -EPERM;
77 break;
78
79 default:
80 pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n", ret);
81 kfree_skb(skb);
82 ret = -EINVAL;
83 break;
84 }
85
86 preempt_enable();
87
88 return ret;
89}
90
91static int bpf_input(struct sk_buff *skb)
92{
93 struct dst_entry *dst = skb_dst(skb);
94 struct bpf_lwt *bpf;
95 int ret;
96
97 bpf = bpf_lwt_lwtunnel(dst->lwtstate);
98 if (bpf->in.prog) {
99 ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
100 if (ret < 0)
101 return ret;
102 }
103
104 if (unlikely(!dst->lwtstate->orig_input)) {
105 pr_warn_once("orig_input not set on dst for prog %s\n",
106 bpf->out.name);
107 kfree_skb(skb);
108 return -EINVAL;
109 }
110
111 return dst->lwtstate->orig_input(skb);
112}
113
114static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
115{
116 struct dst_entry *dst = skb_dst(skb);
117 struct bpf_lwt *bpf;
118 int ret;
119
120 bpf = bpf_lwt_lwtunnel(dst->lwtstate);
121 if (bpf->out.prog) {
122 ret = run_lwt_bpf(skb, &bpf->out, dst, NO_REDIRECT);
123 if (ret < 0)
124 return ret;
125 }
126
127 if (unlikely(!dst->lwtstate->orig_output)) {
128 pr_warn_once("orig_output not set on dst for prog %s\n",
129 bpf->out.name);
130 kfree_skb(skb);
131 return -EINVAL;
132 }
133
134 return dst->lwtstate->orig_output(net, sk, skb);
135}
136
137static int xmit_check_hhlen(struct sk_buff *skb)
138{
139 int hh_len = skb_dst(skb)->dev->hard_header_len;
140
141 if (skb_headroom(skb) < hh_len) {
142 int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
143
144 if (pskb_expand_head(skb, nhead, 0, GFP_ATOMIC))
145 return -ENOMEM;
146 }
147
148 return 0;
149}
150
151static int bpf_xmit(struct sk_buff *skb)
152{
153 struct dst_entry *dst = skb_dst(skb);
154 struct bpf_lwt *bpf;
155
156 bpf = bpf_lwt_lwtunnel(dst->lwtstate);
157 if (bpf->xmit.prog) {
158 int ret;
159
160 ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
161 switch (ret) {
162 case BPF_OK:
163 /* If the header was expanded, headroom might be too
164 * small for L2 header to come, expand as needed.
165 */
166 ret = xmit_check_hhlen(skb);
167 if (unlikely(ret))
168 return ret;
169
170 return LWTUNNEL_XMIT_CONTINUE;
171 case BPF_REDIRECT:
172 return LWTUNNEL_XMIT_DONE;
173 default:
174 return ret;
175 }
176 }
177
178 return LWTUNNEL_XMIT_CONTINUE;
179}
180
181static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog)
182{
183 if (prog->prog)
184 bpf_prog_put(prog->prog);
185
186 kfree(prog->name);
187}
188
189static void bpf_destroy_state(struct lwtunnel_state *lwt)
190{
191 struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt);
192
193 bpf_lwt_prog_destroy(&bpf->in);
194 bpf_lwt_prog_destroy(&bpf->out);
195 bpf_lwt_prog_destroy(&bpf->xmit);
196}
197
198static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = {
199 [LWT_BPF_PROG_FD] = { .type = NLA_U32, },
200 [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
201 .len = MAX_PROG_NAME },
202};
203
204static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
205 enum bpf_prog_type type)
206{
207 struct nlattr *tb[LWT_BPF_PROG_MAX + 1];
208 struct bpf_prog *p;
209 int ret;
210 u32 fd;
211
212 ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy);
213 if (ret < 0)
214 return ret;
215
216 if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME])
217 return -EINVAL;
218
219 prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL);
220 if (!prog->name)
221 return -ENOMEM;
222
223 fd = nla_get_u32(tb[LWT_BPF_PROG_FD]);
224 p = bpf_prog_get_type(fd, type);
225 if (IS_ERR(p))
226 return PTR_ERR(p);
227
228 prog->prog = p;
229
230 return 0;
231}
232
233static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
234 [LWT_BPF_IN] = { .type = NLA_NESTED, },
235 [LWT_BPF_OUT] = { .type = NLA_NESTED, },
236 [LWT_BPF_XMIT] = { .type = NLA_NESTED, },
237 [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 },
238};
239
240static int bpf_build_state(struct net_device *dev, struct nlattr *nla,
241 unsigned int family, const void *cfg,
242 struct lwtunnel_state **ts)
243{
244 struct nlattr *tb[LWT_BPF_MAX + 1];
245 struct lwtunnel_state *newts;
246 struct bpf_lwt *bpf;
247 int ret;
248
249 if (family != AF_INET && family != AF_INET6)
250 return -EAFNOSUPPORT;
251
252 ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy);
253 if (ret < 0)
254 return ret;
255
256 if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT])
257 return -EINVAL;
258
259 newts = lwtunnel_state_alloc(sizeof(*bpf));
260 if (!newts)
261 return -ENOMEM;
262
263 newts->type = LWTUNNEL_ENCAP_BPF;
264 bpf = bpf_lwt_lwtunnel(newts);
265
266 if (tb[LWT_BPF_IN]) {
267 newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
268 ret = bpf_parse_prog(tb[LWT_BPF_IN], &bpf->in,
269 BPF_PROG_TYPE_LWT_IN);
270 if (ret < 0)
271 goto errout;
272 }
273
274 if (tb[LWT_BPF_OUT]) {
275 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
276 ret = bpf_parse_prog(tb[LWT_BPF_OUT], &bpf->out,
277 BPF_PROG_TYPE_LWT_OUT);
278 if (ret < 0)
279 goto errout;
280 }
281
282 if (tb[LWT_BPF_XMIT]) {
283 newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
284 ret = bpf_parse_prog(tb[LWT_BPF_XMIT], &bpf->xmit,
285 BPF_PROG_TYPE_LWT_XMIT);
286 if (ret < 0)
287 goto errout;
288 }
289
290 if (tb[LWT_BPF_XMIT_HEADROOM]) {
291 u32 headroom = nla_get_u32(tb[LWT_BPF_XMIT_HEADROOM]);
292
293 if (headroom > LWT_BPF_MAX_HEADROOM) {
294 ret = -ERANGE;
295 goto errout;
296 }
297
298 newts->headroom = headroom;
299 }
300
301 bpf->family = family;
302 *ts = newts;
303
304 return 0;
305
306errout:
307 bpf_destroy_state(newts);
308 kfree(newts);
309 return ret;
310}
311
312static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr,
313 struct bpf_lwt_prog *prog)
314{
315 struct nlattr *nest;
316
317 if (!prog->prog)
318 return 0;
319
320 nest = nla_nest_start(skb, attr);
321 if (!nest)
322 return -EMSGSIZE;
323
324 if (prog->name &&
325 nla_put_string(skb, LWT_BPF_PROG_NAME, prog->name))
326 return -EMSGSIZE;
327
328 return nla_nest_end(skb, nest);
329}
330
331static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt)
332{
333 struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt);
334
335 if (bpf_fill_lwt_prog(skb, LWT_BPF_IN, &bpf->in) < 0 ||
336 bpf_fill_lwt_prog(skb, LWT_BPF_OUT, &bpf->out) < 0 ||
337 bpf_fill_lwt_prog(skb, LWT_BPF_XMIT, &bpf->xmit) < 0)
338 return -EMSGSIZE;
339
340 return 0;
341}
342
343static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate)
344{
345 int nest_len = nla_total_size(sizeof(struct nlattr)) +
346 nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */
347 0;
348
349 return nest_len + /* LWT_BPF_IN */
350 nest_len + /* LWT_BPF_OUT */
351 nest_len + /* LWT_BPF_XMIT */
352 0;
353}
354
355int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b)
356{
357 /* FIXME:
358 * The LWT state is currently rebuilt for delete requests which
359 * results in a new bpf_prog instance. Comparing names for now.
360 */
361 if (!a->name && !b->name)
362 return 0;
363
364 if (!a->name || !b->name)
365 return 1;
366
367 return strcmp(a->name, b->name);
368}
369
370static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
371{
372 struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(a);
373 struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(b);
374
375 return bpf_lwt_prog_cmp(&a_bpf->in, &b_bpf->in) ||
376 bpf_lwt_prog_cmp(&a_bpf->out, &b_bpf->out) ||
377 bpf_lwt_prog_cmp(&a_bpf->xmit, &b_bpf->xmit);
378}
379
380static const struct lwtunnel_encap_ops bpf_encap_ops = {
381 .build_state = bpf_build_state,
382 .destroy_state = bpf_destroy_state,
383 .input = bpf_input,
384 .output = bpf_output,
385 .xmit = bpf_xmit,
386 .fill_encap = bpf_fill_encap_info,
387 .get_encap_size = bpf_encap_nlsize,
388 .cmp_encap = bpf_encap_cmp,
389};
390
391static int __init bpf_lwt_init(void)
392{
393 return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
394}
395
396subsys_initcall(bpf_lwt_init)
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index e5f84c26ba1a..a5d4e866ce88 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -39,6 +39,10 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
39 return "MPLS"; 39 return "MPLS";
40 case LWTUNNEL_ENCAP_ILA: 40 case LWTUNNEL_ENCAP_ILA:
41 return "ILA"; 41 return "ILA";
42 case LWTUNNEL_ENCAP_SEG6:
43 return "SEG6";
44 case LWTUNNEL_ENCAP_BPF:
45 return "BPF";
42 case LWTUNNEL_ENCAP_IP6: 46 case LWTUNNEL_ENCAP_IP6:
43 case LWTUNNEL_ENCAP_IP: 47 case LWTUNNEL_ENCAP_IP:
44 case LWTUNNEL_ENCAP_NONE: 48 case LWTUNNEL_ENCAP_NONE:
@@ -130,6 +134,19 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
130} 134}
131EXPORT_SYMBOL(lwtunnel_build_state); 135EXPORT_SYMBOL(lwtunnel_build_state);
132 136
137void lwtstate_free(struct lwtunnel_state *lws)
138{
139 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
140
141 if (ops->destroy_state) {
142 ops->destroy_state(lws);
143 kfree_rcu(lws, rcu);
144 } else {
145 kfree(lws);
146 }
147}
148EXPORT_SYMBOL(lwtstate_free);
149
133int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) 150int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
134{ 151{
135 const struct lwtunnel_encap_ops *ops; 152 const struct lwtunnel_encap_ops *ops;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2ae929f9bd06..7bb12e07ffef 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -100,6 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
100 neigh->parms->neigh_cleanup(neigh); 100 neigh->parms->neigh_cleanup(neigh);
101 101
102 __neigh_notify(neigh, RTM_DELNEIGH, 0); 102 __neigh_notify(neigh, RTM_DELNEIGH, 0);
103 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103 neigh_release(neigh); 104 neigh_release(neigh);
104} 105}
105 106
@@ -2291,13 +2292,10 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2291 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2292 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2292 n != NULL; 2293 n != NULL;
2293 n = rcu_dereference_bh(n->next)) { 2294 n = rcu_dereference_bh(n->next)) {
2294 if (!net_eq(dev_net(n->dev), net)) 2295 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2295 continue; 2296 goto next;
2296 if (neigh_ifindex_filtered(n->dev, filter_idx)) 2297 if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2297 continue; 2298 neigh_master_filtered(n->dev, filter_master_idx))
2298 if (neigh_master_filtered(n->dev, filter_master_idx))
2299 continue;
2300 if (idx < s_idx)
2301 goto next; 2299 goto next;
2302 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2300 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2303 cb->nlh->nlmsg_seq, 2301 cb->nlh->nlmsg_seq,
@@ -2332,9 +2330,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2332 if (h > s_h) 2330 if (h > s_h)
2333 s_idx = 0; 2331 s_idx = 0;
2334 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2332 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2335 if (pneigh_net(n) != net) 2333 if (idx < s_idx || pneigh_net(n) != net)
2336 continue;
2337 if (idx < s_idx)
2338 goto next; 2334 goto next;
2339 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2335 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2340 cb->nlh->nlmsg_seq, 2336 cb->nlh->nlmsg_seq,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 6e4f34721080..b0c04cf4851d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -950,10 +950,13 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
950 } 950 }
951 951
952 while (--i >= new_num) { 952 while (--i >= new_num) {
953 struct kobject *kobj = &dev->_rx[i].kobj;
954
955 if (!list_empty(&dev_net(dev)->exit_list))
956 kobj->uevent_suppress = 1;
953 if (dev->sysfs_rx_queue_group) 957 if (dev->sysfs_rx_queue_group)
954 sysfs_remove_group(&dev->_rx[i].kobj, 958 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
955 dev->sysfs_rx_queue_group); 959 kobject_put(kobj);
956 kobject_put(&dev->_rx[i].kobj);
957 } 960 }
958 961
959 return error; 962 return error;
@@ -1021,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
1021 return sprintf(buf, "%lu", trans_timeout); 1024 return sprintf(buf, "%lu", trans_timeout);
1022} 1025}
1023 1026
1024#ifdef CONFIG_XPS
1025static unsigned int get_netdev_queue_index(struct netdev_queue *queue) 1027static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
1026{ 1028{
1027 struct net_device *dev = queue->dev; 1029 struct net_device *dev = queue->dev;
@@ -1033,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
1033 return i; 1035 return i;
1034} 1036}
1035 1037
1038static ssize_t show_traffic_class(struct netdev_queue *queue,
1039 struct netdev_queue_attribute *attribute,
1040 char *buf)
1041{
1042 struct net_device *dev = queue->dev;
1043 int index = get_netdev_queue_index(queue);
1044 int tc = netdev_txq_to_tc(dev, index);
1045
1046 if (tc < 0)
1047 return -EINVAL;
1048
1049 return sprintf(buf, "%u\n", tc);
1050}
1051
1052#ifdef CONFIG_XPS
1036static ssize_t show_tx_maxrate(struct netdev_queue *queue, 1053static ssize_t show_tx_maxrate(struct netdev_queue *queue,
1037 struct netdev_queue_attribute *attribute, 1054 struct netdev_queue_attribute *attribute,
1038 char *buf) 1055 char *buf)
@@ -1075,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate =
1075static struct netdev_queue_attribute queue_trans_timeout = 1092static struct netdev_queue_attribute queue_trans_timeout =
1076 __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); 1093 __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
1077 1094
1095static struct netdev_queue_attribute queue_traffic_class =
1096 __ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
1097
1078#ifdef CONFIG_BQL 1098#ifdef CONFIG_BQL
1079/* 1099/*
1080 * Byte queue limits sysfs structures and functions. 1100 * Byte queue limits sysfs structures and functions.
@@ -1190,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
1190 struct netdev_queue_attribute *attribute, char *buf) 1210 struct netdev_queue_attribute *attribute, char *buf)
1191{ 1211{
1192 struct net_device *dev = queue->dev; 1212 struct net_device *dev = queue->dev;
1213 int cpu, len, num_tc = 1, tc = 0;
1193 struct xps_dev_maps *dev_maps; 1214 struct xps_dev_maps *dev_maps;
1194 cpumask_var_t mask; 1215 cpumask_var_t mask;
1195 unsigned long index; 1216 unsigned long index;
1196 int i, len;
1197 1217
1198 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) 1218 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
1199 return -ENOMEM; 1219 return -ENOMEM;
1200 1220
1201 index = get_netdev_queue_index(queue); 1221 index = get_netdev_queue_index(queue);
1202 1222
1223 if (dev->num_tc) {
1224 num_tc = dev->num_tc;
1225 tc = netdev_txq_to_tc(dev, index);
1226 if (tc < 0)
1227 return -EINVAL;
1228 }
1229
1203 rcu_read_lock(); 1230 rcu_read_lock();
1204 dev_maps = rcu_dereference(dev->xps_maps); 1231 dev_maps = rcu_dereference(dev->xps_maps);
1205 if (dev_maps) { 1232 if (dev_maps) {
1206 for_each_possible_cpu(i) { 1233 for_each_possible_cpu(cpu) {
1207 struct xps_map *map = 1234 int i, tci = cpu * num_tc + tc;
1208 rcu_dereference(dev_maps->cpu_map[i]); 1235 struct xps_map *map;
1209 if (map) { 1236
1210 int j; 1237 map = rcu_dereference(dev_maps->cpu_map[tci]);
1211 for (j = 0; j < map->len; j++) { 1238 if (!map)
1212 if (map->queues[j] == index) { 1239 continue;
1213 cpumask_set_cpu(i, mask); 1240
1214 break; 1241 for (i = map->len; i--;) {
1215 } 1242 if (map->queues[i] == index) {
1243 cpumask_set_cpu(cpu, mask);
1244 break;
1216 } 1245 }
1217 } 1246 }
1218 } 1247 }
@@ -1260,6 +1289,7 @@ static struct netdev_queue_attribute xps_cpus_attribute =
1260 1289
1261static struct attribute *netdev_queue_default_attrs[] = { 1290static struct attribute *netdev_queue_default_attrs[] = {
1262 &queue_trans_timeout.attr, 1291 &queue_trans_timeout.attr,
1292 &queue_traffic_class.attr,
1263#ifdef CONFIG_XPS 1293#ifdef CONFIG_XPS
1264 &xps_cpus_attribute.attr, 1294 &xps_cpus_attribute.attr,
1265 &queue_tx_maxrate.attr, 1295 &queue_tx_maxrate.attr,
@@ -1340,6 +1370,8 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1340 while (--i >= new_num) { 1370 while (--i >= new_num) {
1341 struct netdev_queue *queue = dev->_tx + i; 1371 struct netdev_queue *queue = dev->_tx + i;
1342 1372
1373 if (!list_empty(&dev_net(dev)->exit_list))
1374 queue->kobj.uevent_suppress = 1;
1343#ifdef CONFIG_BQL 1375#ifdef CONFIG_BQL
1344 sysfs_remove_group(&queue->kobj, &dql_group); 1376 sysfs_remove_group(&queue->kobj, &dql_group);
1345#endif 1377#endif
@@ -1525,6 +1557,9 @@ void netdev_unregister_kobject(struct net_device *ndev)
1525{ 1557{
1526 struct device *dev = &(ndev->dev); 1558 struct device *dev = &(ndev->dev);
1527 1559
1560 if (!list_empty(&dev_net(ndev)->exit_list))
1561 dev_set_uevent_suppress(dev, 1);
1562
1528 kobject_get(&dev->kobj); 1563 kobject_get(&dev->kobj);
1529 1564
1530 remove_queue_kobjects(ndev); 1565 remove_queue_kobjects(ndev);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7001da910c6b..3c4bbec39713 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -39,6 +39,9 @@ EXPORT_SYMBOL(init_net);
39 39
40static bool init_net_initialized; 40static bool init_net_initialized;
41 41
42#define MIN_PERNET_OPS_ID \
43 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
44
42#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 45#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
43 46
44static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; 47static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@ -46,27 +49,28 @@ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
46static struct net_generic *net_alloc_generic(void) 49static struct net_generic *net_alloc_generic(void)
47{ 50{
48 struct net_generic *ng; 51 struct net_generic *ng;
49 size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); 52 unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
50 53
51 ng = kzalloc(generic_size, GFP_KERNEL); 54 ng = kzalloc(generic_size, GFP_KERNEL);
52 if (ng) 55 if (ng)
53 ng->len = max_gen_ptrs; 56 ng->s.len = max_gen_ptrs;
54 57
55 return ng; 58 return ng;
56} 59}
57 60
58static int net_assign_generic(struct net *net, int id, void *data) 61static int net_assign_generic(struct net *net, unsigned int id, void *data)
59{ 62{
60 struct net_generic *ng, *old_ng; 63 struct net_generic *ng, *old_ng;
61 64
62 BUG_ON(!mutex_is_locked(&net_mutex)); 65 BUG_ON(!mutex_is_locked(&net_mutex));
63 BUG_ON(id == 0); 66 BUG_ON(id < MIN_PERNET_OPS_ID);
64 67
65 old_ng = rcu_dereference_protected(net->gen, 68 old_ng = rcu_dereference_protected(net->gen,
66 lockdep_is_held(&net_mutex)); 69 lockdep_is_held(&net_mutex));
67 ng = old_ng; 70 if (old_ng->s.len > id) {
68 if (old_ng->len >= id) 71 old_ng->ptr[id] = data;
69 goto assign; 72 return 0;
73 }
70 74
71 ng = net_alloc_generic(); 75 ng = net_alloc_generic();
72 if (ng == NULL) 76 if (ng == NULL)
@@ -83,12 +87,12 @@ static int net_assign_generic(struct net *net, int id, void *data)
83 * the old copy for kfree after a grace period. 87 * the old copy for kfree after a grace period.
84 */ 88 */
85 89
86 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 90 memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
91 (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
92 ng->ptr[id] = data;
87 93
88 rcu_assign_pointer(net->gen, ng); 94 rcu_assign_pointer(net->gen, ng);
89 kfree_rcu(old_ng, rcu); 95 kfree_rcu(old_ng, s.rcu);
90assign:
91 ng->ptr[id - 1] = data;
92 return 0; 96 return 0;
93} 97}
94 98
@@ -122,8 +126,7 @@ out:
122static void ops_free(const struct pernet_operations *ops, struct net *net) 126static void ops_free(const struct pernet_operations *ops, struct net *net)
123{ 127{
124 if (ops->id && ops->size) { 128 if (ops->id && ops->size) {
125 int id = *ops->id; 129 kfree(net_generic(net, *ops->id));
126 kfree(net_generic(net, id));
127 } 130 }
128} 131}
129 132
@@ -215,16 +218,15 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
215 */ 218 */
216int peernet2id_alloc(struct net *net, struct net *peer) 219int peernet2id_alloc(struct net *net, struct net *peer)
217{ 220{
218 unsigned long flags;
219 bool alloc; 221 bool alloc;
220 int id; 222 int id;
221 223
222 if (atomic_read(&net->count) == 0) 224 if (atomic_read(&net->count) == 0)
223 return NETNSA_NSID_NOT_ASSIGNED; 225 return NETNSA_NSID_NOT_ASSIGNED;
224 spin_lock_irqsave(&net->nsid_lock, flags); 226 spin_lock_bh(&net->nsid_lock);
225 alloc = atomic_read(&peer->count) == 0 ? false : true; 227 alloc = atomic_read(&peer->count) == 0 ? false : true;
226 id = __peernet2id_alloc(net, peer, &alloc); 228 id = __peernet2id_alloc(net, peer, &alloc);
227 spin_unlock_irqrestore(&net->nsid_lock, flags); 229 spin_unlock_bh(&net->nsid_lock);
228 if (alloc && id >= 0) 230 if (alloc && id >= 0)
229 rtnl_net_notifyid(net, RTM_NEWNSID, id); 231 rtnl_net_notifyid(net, RTM_NEWNSID, id);
230 return id; 232 return id;
@@ -233,12 +235,11 @@ int peernet2id_alloc(struct net *net, struct net *peer)
233/* This function returns, if assigned, the id of a peer netns. */ 235/* This function returns, if assigned, the id of a peer netns. */
234int peernet2id(struct net *net, struct net *peer) 236int peernet2id(struct net *net, struct net *peer)
235{ 237{
236 unsigned long flags;
237 int id; 238 int id;
238 239
239 spin_lock_irqsave(&net->nsid_lock, flags); 240 spin_lock_bh(&net->nsid_lock);
240 id = __peernet2id(net, peer); 241 id = __peernet2id(net, peer);
241 spin_unlock_irqrestore(&net->nsid_lock, flags); 242 spin_unlock_bh(&net->nsid_lock);
242 return id; 243 return id;
243} 244}
244EXPORT_SYMBOL(peernet2id); 245EXPORT_SYMBOL(peernet2id);
@@ -253,18 +254,17 @@ bool peernet_has_id(struct net *net, struct net *peer)
253 254
254struct net *get_net_ns_by_id(struct net *net, int id) 255struct net *get_net_ns_by_id(struct net *net, int id)
255{ 256{
256 unsigned long flags;
257 struct net *peer; 257 struct net *peer;
258 258
259 if (id < 0) 259 if (id < 0)
260 return NULL; 260 return NULL;
261 261
262 rcu_read_lock(); 262 rcu_read_lock();
263 spin_lock_irqsave(&net->nsid_lock, flags); 263 spin_lock_bh(&net->nsid_lock);
264 peer = idr_find(&net->netns_ids, id); 264 peer = idr_find(&net->netns_ids, id);
265 if (peer) 265 if (peer)
266 get_net(peer); 266 get_net(peer);
267 spin_unlock_irqrestore(&net->nsid_lock, flags); 267 spin_unlock_bh(&net->nsid_lock);
268 rcu_read_unlock(); 268 rcu_read_unlock();
269 269
270 return peer; 270 return peer;
@@ -384,7 +384,14 @@ struct net *copy_net_ns(unsigned long flags,
384 384
385 get_user_ns(user_ns); 385 get_user_ns(user_ns);
386 386
387 mutex_lock(&net_mutex); 387 rv = mutex_lock_killable(&net_mutex);
388 if (rv < 0) {
389 net_free(net);
390 dec_net_namespaces(ucounts);
391 put_user_ns(user_ns);
392 return ERR_PTR(rv);
393 }
394
388 net->ucounts = ucounts; 395 net->ucounts = ucounts;
389 rv = setup_net(net, user_ns); 396 rv = setup_net(net, user_ns);
390 if (rv == 0) { 397 if (rv == 0) {
@@ -427,17 +434,17 @@ static void cleanup_net(struct work_struct *work)
427 for_each_net(tmp) { 434 for_each_net(tmp) {
428 int id; 435 int id;
429 436
430 spin_lock_irq(&tmp->nsid_lock); 437 spin_lock_bh(&tmp->nsid_lock);
431 id = __peernet2id(tmp, net); 438 id = __peernet2id(tmp, net);
432 if (id >= 0) 439 if (id >= 0)
433 idr_remove(&tmp->netns_ids, id); 440 idr_remove(&tmp->netns_ids, id);
434 spin_unlock_irq(&tmp->nsid_lock); 441 spin_unlock_bh(&tmp->nsid_lock);
435 if (id >= 0) 442 if (id >= 0)
436 rtnl_net_notifyid(tmp, RTM_DELNSID, id); 443 rtnl_net_notifyid(tmp, RTM_DELNSID, id);
437 } 444 }
438 spin_lock_irq(&net->nsid_lock); 445 spin_lock_bh(&net->nsid_lock);
439 idr_destroy(&net->netns_ids); 446 idr_destroy(&net->netns_ids);
440 spin_unlock_irq(&net->nsid_lock); 447 spin_unlock_bh(&net->nsid_lock);
441 448
442 } 449 }
443 rtnl_unlock(); 450 rtnl_unlock();
@@ -566,7 +573,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
566{ 573{
567 struct net *net = sock_net(skb->sk); 574 struct net *net = sock_net(skb->sk);
568 struct nlattr *tb[NETNSA_MAX + 1]; 575 struct nlattr *tb[NETNSA_MAX + 1];
569 unsigned long flags;
570 struct net *peer; 576 struct net *peer;
571 int nsid, err; 577 int nsid, err;
572 578
@@ -587,15 +593,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
587 if (IS_ERR(peer)) 593 if (IS_ERR(peer))
588 return PTR_ERR(peer); 594 return PTR_ERR(peer);
589 595
590 spin_lock_irqsave(&net->nsid_lock, flags); 596 spin_lock_bh(&net->nsid_lock);
591 if (__peernet2id(net, peer) >= 0) { 597 if (__peernet2id(net, peer) >= 0) {
592 spin_unlock_irqrestore(&net->nsid_lock, flags); 598 spin_unlock_bh(&net->nsid_lock);
593 err = -EEXIST; 599 err = -EEXIST;
594 goto out; 600 goto out;
595 } 601 }
596 602
597 err = alloc_netid(net, peer, nsid); 603 err = alloc_netid(net, peer, nsid);
598 spin_unlock_irqrestore(&net->nsid_lock, flags); 604 spin_unlock_bh(&net->nsid_lock);
599 if (err >= 0) { 605 if (err >= 0) {
600 rtnl_net_notifyid(net, RTM_NEWNSID, err); 606 rtnl_net_notifyid(net, RTM_NEWNSID, err);
601 err = 0; 607 err = 0;
@@ -717,11 +723,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
717 .idx = 0, 723 .idx = 0,
718 .s_idx = cb->args[0], 724 .s_idx = cb->args[0],
719 }; 725 };
720 unsigned long flags;
721 726
722 spin_lock_irqsave(&net->nsid_lock, flags); 727 spin_lock_bh(&net->nsid_lock);
723 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); 728 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
724 spin_unlock_irqrestore(&net->nsid_lock, flags); 729 spin_unlock_bh(&net->nsid_lock);
725 730
726 cb->args[0] = net_cb.idx; 731 cb->args[0] = net_cb.idx;
727 return skb->len; 732 return skb->len;
@@ -868,7 +873,7 @@ static int register_pernet_operations(struct list_head *list,
868 873
869 if (ops->id) { 874 if (ops->id) {
870again: 875again:
871 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 876 error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id);
872 if (error < 0) { 877 if (error < 0) {
873 if (error == -EAGAIN) { 878 if (error == -EAGAIN) {
874 ida_pre_get(&net_generic_ids, GFP_KERNEL); 879 ida_pre_get(&net_generic_ids, GFP_KERNEL);
@@ -876,7 +881,7 @@ again:
876 } 881 }
877 return error; 882 return error;
878 } 883 }
879 max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); 884 max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
880 } 885 }
881 error = __register_pernet_operations(list, ops); 886 error = __register_pernet_operations(list, ops);
882 if (error) { 887 if (error) {
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 53599bd0c82d..9424673009c1 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -171,12 +171,12 @@ static void poll_one_napi(struct napi_struct *napi)
171static void poll_napi(struct net_device *dev) 171static void poll_napi(struct net_device *dev)
172{ 172{
173 struct napi_struct *napi; 173 struct napi_struct *napi;
174 int cpu = smp_processor_id();
174 175
175 list_for_each_entry(napi, &dev->napi_list, dev_list) { 176 list_for_each_entry(napi, &dev->napi_list, dev_list) {
176 if (napi->poll_owner != smp_processor_id() && 177 if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) {
177 spin_trylock(&napi->poll_lock)) {
178 poll_one_napi(napi); 178 poll_one_napi(napi);
179 spin_unlock(&napi->poll_lock); 179 smp_store_release(&napi->poll_owner, -1);
180 } 180 }
181 } 181 }
182} 182}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 306b8f0e03c1..8e69ce472236 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -413,7 +413,7 @@ struct pktgen_hdr {
413}; 413};
414 414
415 415
416static int pg_net_id __read_mostly; 416static unsigned int pg_net_id __read_mostly;
417 417
418struct pktgen_net { 418struct pktgen_net {
419 struct net *net; 419 struct net *net;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a6196cf844f6..18b5aae99bec 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -40,7 +40,7 @@
40#include <linux/pci.h> 40#include <linux/pci.h>
41#include <linux/etherdevice.h> 41#include <linux/etherdevice.h>
42 42
43#include <asm/uaccess.h> 43#include <linux/uaccess.h>
44 44
45#include <linux/inet.h> 45#include <linux/inet.h>
46#include <linux/netdevice.h> 46#include <linux/netdevice.h>
@@ -1505,6 +1505,7 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
1505static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { 1505static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
1506 [IFLA_XDP_FD] = { .type = NLA_S32 }, 1506 [IFLA_XDP_FD] = { .type = NLA_S32 },
1507 [IFLA_XDP_ATTACHED] = { .type = NLA_U8 }, 1507 [IFLA_XDP_ATTACHED] = { .type = NLA_U8 },
1508 [IFLA_XDP_FLAGS] = { .type = NLA_U32 },
1508}; 1509};
1509 1510
1510static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) 1511static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
@@ -2164,6 +2165,7 @@ static int do_setlink(const struct sk_buff *skb,
2164 2165
2165 if (tb[IFLA_XDP]) { 2166 if (tb[IFLA_XDP]) {
2166 struct nlattr *xdp[IFLA_XDP_MAX + 1]; 2167 struct nlattr *xdp[IFLA_XDP_MAX + 1];
2168 u32 xdp_flags = 0;
2167 2169
2168 err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], 2170 err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
2169 ifla_xdp_policy); 2171 ifla_xdp_policy);
@@ -2174,9 +2176,19 @@ static int do_setlink(const struct sk_buff *skb,
2174 err = -EINVAL; 2176 err = -EINVAL;
2175 goto errout; 2177 goto errout;
2176 } 2178 }
2179
2180 if (xdp[IFLA_XDP_FLAGS]) {
2181 xdp_flags = nla_get_u32(xdp[IFLA_XDP_FLAGS]);
2182 if (xdp_flags & ~XDP_FLAGS_MASK) {
2183 err = -EINVAL;
2184 goto errout;
2185 }
2186 }
2187
2177 if (xdp[IFLA_XDP_FD]) { 2188 if (xdp[IFLA_XDP_FD]) {
2178 err = dev_change_xdp_fd(dev, 2189 err = dev_change_xdp_fd(dev,
2179 nla_get_s32(xdp[IFLA_XDP_FD])); 2190 nla_get_s32(xdp[IFLA_XDP_FD]),
2191 xdp_flags);
2180 if (err) 2192 if (err)
2181 goto errout; 2193 goto errout;
2182 status |= DO_SETLINK_NOTIFY; 2194 status |= DO_SETLINK_NOTIFY;
@@ -3165,7 +3177,7 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
3165 err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); 3177 err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
3166 if (err) 3178 if (err)
3167 goto out; 3179 goto out;
3168 nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc); 3180 err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
3169out: 3181out:
3170 netif_addr_unlock_bh(dev); 3182 netif_addr_unlock_bh(dev);
3171 return err; 3183 return err;
@@ -3671,7 +3683,7 @@ static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
3671 if (!size) 3683 if (!size)
3672 continue; 3684 continue;
3673 3685
3674 if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) 3686 if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
3675 continue; 3687 continue;
3676 3688
3677 attr = nla_reserve_64bit(skb, attr_id, size, 3689 attr = nla_reserve_64bit(skb, attr_id, size,
@@ -3712,7 +3724,7 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev)
3712 3724
3713 for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; 3725 for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
3714 attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { 3726 attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
3715 if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) 3727 if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
3716 continue; 3728 continue;
3717 size = rtnl_get_offload_stats_attr_size(attr_id); 3729 size = rtnl_get_offload_stats_attr_size(attr_id);
3718 nla_size += nla_total_size_64bit(size); 3730 nla_size += nla_total_size_64bit(size);
diff --git a/net/core/scm.c b/net/core/scm.c
index 2696aefdc148..d8820438ba37 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -29,7 +29,7 @@
29#include <linux/nsproxy.h> 29#include <linux/nsproxy.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31 31
32#include <asm/uaccess.h> 32#include <linux/uaccess.h>
33 33
34#include <net/protocol.h> 34#include <net/protocol.h>
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index fd3ce461fbe6..88a8e429fc3e 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -12,6 +12,7 @@
12#include <net/secure_seq.h> 12#include <net/secure_seq.h>
13 13
14#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) 14#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
15#include <net/tcp.h>
15#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) 16#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
16 17
17static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; 18static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
@@ -40,8 +41,8 @@ static u32 seq_scale(u32 seq)
40#endif 41#endif
41 42
42#if IS_ENABLED(CONFIG_IPV6) 43#if IS_ENABLED(CONFIG_IPV6)
43__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, 44u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
44 __be16 sport, __be16 dport) 45 __be16 sport, __be16 dport, u32 *tsoff)
45{ 46{
46 u32 secret[MD5_MESSAGE_BYTES / 4]; 47 u32 secret[MD5_MESSAGE_BYTES / 4];
47 u32 hash[MD5_DIGEST_WORDS]; 48 u32 hash[MD5_DIGEST_WORDS];
@@ -58,6 +59,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
58 59
59 md5_transform(hash, secret); 60 md5_transform(hash, secret);
60 61
62 *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
61 return seq_scale(hash[0]); 63 return seq_scale(hash[0]);
62} 64}
63EXPORT_SYMBOL(secure_tcpv6_sequence_number); 65EXPORT_SYMBOL(secure_tcpv6_sequence_number);
@@ -86,8 +88,8 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
86 88
87#ifdef CONFIG_INET 89#ifdef CONFIG_INET
88 90
89__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, 91u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
90 __be16 sport, __be16 dport) 92 __be16 sport, __be16 dport, u32 *tsoff)
91{ 93{
92 u32 hash[MD5_DIGEST_WORDS]; 94 u32 hash[MD5_DIGEST_WORDS];
93 95
@@ -99,6 +101,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
99 101
100 md5_transform(hash, net_secret); 102 md5_transform(hash, net_secret);
101 103
104 *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
102 return seq_scale(hash[0]); 105 return seq_scale(hash[0]);
103} 106}
104 107
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1e3e0087245b..5a03730fbc1a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -72,7 +72,7 @@
72#include <net/ip6_checksum.h> 72#include <net/ip6_checksum.h>
73#include <net/xfrm.h> 73#include <net/xfrm.h>
74 74
75#include <asm/uaccess.h> 75#include <linux/uaccess.h>
76#include <trace/events/skb.h> 76#include <trace/events/skb.h>
77#include <linux/highmem.h> 77#include <linux/highmem.h>
78#include <linux/capability.h> 78#include <linux/capability.h>
@@ -354,7 +354,7 @@ EXPORT_SYMBOL(build_skb);
354 354
355struct napi_alloc_cache { 355struct napi_alloc_cache {
356 struct page_frag_cache page; 356 struct page_frag_cache page;
357 size_t skb_count; 357 unsigned int skb_count;
358 void *skb_cache[NAPI_SKB_CACHE_SIZE]; 358 void *skb_cache[NAPI_SKB_CACHE_SIZE];
359}; 359};
360 360
@@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2656 struct skb_frag_struct *fragfrom, *fragto; 2656 struct skb_frag_struct *fragfrom, *fragto;
2657 2657
2658 BUG_ON(shiftlen > skb->len); 2658 BUG_ON(shiftlen > skb->len);
2659 BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ 2659
2660 if (skb_headlen(skb))
2661 return 0;
2660 2662
2661 todo = shiftlen; 2663 todo = shiftlen;
2662 from = 0; 2664 from = 0;
@@ -3712,21 +3714,29 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
3712} 3714}
3713EXPORT_SYMBOL(sock_queue_err_skb); 3715EXPORT_SYMBOL(sock_queue_err_skb);
3714 3716
3717static bool is_icmp_err_skb(const struct sk_buff *skb)
3718{
3719 return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
3720 SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
3721}
3722
3715struct sk_buff *sock_dequeue_err_skb(struct sock *sk) 3723struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
3716{ 3724{
3717 struct sk_buff_head *q = &sk->sk_error_queue; 3725 struct sk_buff_head *q = &sk->sk_error_queue;
3718 struct sk_buff *skb, *skb_next; 3726 struct sk_buff *skb, *skb_next = NULL;
3727 bool icmp_next = false;
3719 unsigned long flags; 3728 unsigned long flags;
3720 int err = 0;
3721 3729
3722 spin_lock_irqsave(&q->lock, flags); 3730 spin_lock_irqsave(&q->lock, flags);
3723 skb = __skb_dequeue(q); 3731 skb = __skb_dequeue(q);
3724 if (skb && (skb_next = skb_peek(q))) 3732 if (skb && (skb_next = skb_peek(q)))
3725 err = SKB_EXT_ERR(skb_next)->ee.ee_errno; 3733 icmp_next = is_icmp_err_skb(skb_next);
3726 spin_unlock_irqrestore(&q->lock, flags); 3734 spin_unlock_irqrestore(&q->lock, flags);
3727 3735
3728 sk->sk_err = err; 3736 if (is_icmp_err_skb(skb) && !icmp_next)
3729 if (err) 3737 sk->sk_err = 0;
3738
3739 if (skb_next)
3730 sk->sk_error_report(sk); 3740 sk->sk_error_report(sk);
3731 3741
3732 return skb; 3742 return skb;
@@ -3838,10 +3848,18 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
3838 if (!skb_may_tx_timestamp(sk, tsonly)) 3848 if (!skb_may_tx_timestamp(sk, tsonly))
3839 return; 3849 return;
3840 3850
3841 if (tsonly) 3851 if (tsonly) {
3842 skb = alloc_skb(0, GFP_ATOMIC); 3852#ifdef CONFIG_INET
3843 else 3853 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
3854 sk->sk_protocol == IPPROTO_TCP &&
3855 sk->sk_type == SOCK_STREAM)
3856 skb = tcp_get_timestamping_opt_stats(sk);
3857 else
3858#endif
3859 skb = alloc_skb(0, GFP_ATOMIC);
3860 } else {
3844 skb = skb_clone(orig_skb, GFP_ATOMIC); 3861 skb = skb_clone(orig_skb, GFP_ATOMIC);
3862 }
3845 if (!skb) 3863 if (!skb)
3846 return; 3864 return;
3847 3865
@@ -4350,7 +4368,7 @@ EXPORT_SYMBOL(skb_try_coalesce);
4350 */ 4368 */
4351void skb_scrub_packet(struct sk_buff *skb, bool xnet) 4369void skb_scrub_packet(struct sk_buff *skb, bool xnet)
4352{ 4370{
4353 skb->tstamp.tv64 = 0; 4371 skb->tstamp = 0;
4354 skb->pkt_type = PACKET_HOST; 4372 skb->pkt_type = PACKET_HOST;
4355 skb->skb_iif = 0; 4373 skb->skb_iif = 0;
4356 skb->ignore_df = 0; 4374 skb->ignore_df = 0;
@@ -4913,3 +4931,35 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
4913 return clone; 4931 return clone;
4914} 4932}
4915EXPORT_SYMBOL(pskb_extract); 4933EXPORT_SYMBOL(pskb_extract);
4934
4935/**
4936 * skb_condense - try to get rid of fragments/frag_list if possible
4937 * @skb: buffer
4938 *
4939 * Can be used to save memory before skb is added to a busy queue.
4940 * If packet has bytes in frags and enough tail room in skb->head,
4941 * pull all of them, so that we can free the frags right now and adjust
4942 * truesize.
4943 * Notes:
4944 * We do not reallocate skb->head thus can not fail.
4945 * Caller must re-evaluate skb->truesize if needed.
4946 */
4947void skb_condense(struct sk_buff *skb)
4948{
4949 if (skb->data_len) {
4950 if (skb->data_len > skb->end - skb->tail ||
4951 skb_cloned(skb))
4952 return;
4953
4954 /* Nice, we can free page frag(s) right now */
4955 __pskb_pull_tail(skb, skb->data_len);
4956 }
4957 /* At this point, skb->truesize might be over estimated,
4958 * because skb had a fragment, and fragments do not tell
4959 * their truesize.
4960 * When we pulled its content into skb->head, fragment
4961 * was freed, but __pskb_pull_tail() could not possibly
4962 * adjust skb->truesize, not knowing the frag truesize.
4963 */
4964 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
4965}
diff --git a/net/core/sock.c b/net/core/sock.c
index 00a074dbfe9b..f560e0826009 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,7 +118,7 @@
118#include <linux/memcontrol.h> 118#include <linux/memcontrol.h>
119#include <linux/prefetch.h> 119#include <linux/prefetch.h>
120 120
121#include <asm/uaccess.h> 121#include <linux/uaccess.h>
122 122
123#include <linux/netdevice.h> 123#include <linux/netdevice.h>
124#include <net/protocol.h> 124#include <net/protocol.h>
@@ -854,6 +854,13 @@ set_rcvbuf:
854 sk->sk_tskey = 0; 854 sk->sk_tskey = 0;
855 } 855 }
856 } 856 }
857
858 if (val & SOF_TIMESTAMPING_OPT_STATS &&
859 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
860 ret = -EINVAL;
861 break;
862 }
863
857 sk->sk_tsflags = val; 864 sk->sk_tsflags = val;
858 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 865 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
859 sock_enable_timestamp(sk, 866 sock_enable_timestamp(sk,
@@ -2080,37 +2087,31 @@ void __sk_flush_backlog(struct sock *sk)
2080 */ 2087 */
2081int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) 2088int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2082{ 2089{
2090 DEFINE_WAIT_FUNC(wait, woken_wake_function);
2083 int rc; 2091 int rc;
2084 DEFINE_WAIT(wait);
2085 2092
2086 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2093 add_wait_queue(sk_sleep(sk), &wait);
2087 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2094 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2088 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); 2095 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2089 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2096 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2090 finish_wait(sk_sleep(sk), &wait); 2097 remove_wait_queue(sk_sleep(sk), &wait);
2091 return rc; 2098 return rc;
2092} 2099}
2093EXPORT_SYMBOL(sk_wait_data); 2100EXPORT_SYMBOL(sk_wait_data);
2094 2101
2095/** 2102/**
2096 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated 2103 * __sk_mem_raise_allocated - increase memory_allocated
2097 * @sk: socket 2104 * @sk: socket
2098 * @size: memory size to allocate 2105 * @size: memory size to allocate
2106 * @amt: pages to allocate
2099 * @kind: allocation type 2107 * @kind: allocation type
2100 * 2108 *
2101 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means 2109 * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
2102 * rmem allocation. This function assumes that protocols which have
2103 * memory_pressure use sk_wmem_queued as write buffer accounting.
2104 */ 2110 */
2105int __sk_mem_schedule(struct sock *sk, int size, int kind) 2111int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2106{ 2112{
2107 struct proto *prot = sk->sk_prot; 2113 struct proto *prot = sk->sk_prot;
2108 int amt = sk_mem_pages(size); 2114 long allocated = sk_memory_allocated_add(sk, amt);
2109 long allocated;
2110
2111 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2112
2113 allocated = sk_memory_allocated_add(sk, amt);
2114 2115
2115 if (mem_cgroup_sockets_enabled && sk->sk_memcg && 2116 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2116 !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) 2117 !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
@@ -2171,9 +2172,6 @@ suppress_allocation:
2171 2172
2172 trace_sock_exceed_buf_limit(sk, prot, allocated); 2173 trace_sock_exceed_buf_limit(sk, prot, allocated);
2173 2174
2174 /* Alas. Undo changes. */
2175 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
2176
2177 sk_memory_allocated_sub(sk, amt); 2175 sk_memory_allocated_sub(sk, amt);
2178 2176
2179 if (mem_cgroup_sockets_enabled && sk->sk_memcg) 2177 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
@@ -2181,18 +2179,40 @@ suppress_allocation:
2181 2179
2182 return 0; 2180 return 0;
2183} 2181}
2182EXPORT_SYMBOL(__sk_mem_raise_allocated);
2183
2184/**
2185 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2186 * @sk: socket
2187 * @size: memory size to allocate
2188 * @kind: allocation type
2189 *
2190 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2191 * rmem allocation. This function assumes that protocols which have
2192 * memory_pressure use sk_wmem_queued as write buffer accounting.
2193 */
2194int __sk_mem_schedule(struct sock *sk, int size, int kind)
2195{
2196 int ret, amt = sk_mem_pages(size);
2197
2198 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2199 ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2200 if (!ret)
2201 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2202 return ret;
2203}
2184EXPORT_SYMBOL(__sk_mem_schedule); 2204EXPORT_SYMBOL(__sk_mem_schedule);
2185 2205
2186/** 2206/**
2187 * __sk_mem_reclaim - reclaim memory_allocated 2207 * __sk_mem_reduce_allocated - reclaim memory_allocated
2188 * @sk: socket 2208 * @sk: socket
2189 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) 2209 * @amount: number of quanta
2210 *
2211 * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
2190 */ 2212 */
2191void __sk_mem_reclaim(struct sock *sk, int amount) 2213void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2192{ 2214{
2193 amount >>= SK_MEM_QUANTUM_SHIFT;
2194 sk_memory_allocated_sub(sk, amount); 2215 sk_memory_allocated_sub(sk, amount);
2195 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2196 2216
2197 if (mem_cgroup_sockets_enabled && sk->sk_memcg) 2217 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2198 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); 2218 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
@@ -2201,6 +2221,19 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
2201 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 2221 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2202 sk_leave_memory_pressure(sk); 2222 sk_leave_memory_pressure(sk);
2203} 2223}
2224EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2225
2226/**
2227 * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2228 * @sk: socket
2229 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2230 */
2231void __sk_mem_reclaim(struct sock *sk, int amount)
2232{
2233 amount >>= SK_MEM_QUANTUM_SHIFT;
2234 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2235 __sk_mem_reduce_allocated(sk, amount);
2236}
2204EXPORT_SYMBOL(__sk_mem_reclaim); 2237EXPORT_SYMBOL(__sk_mem_reclaim);
2205 2238
2206int sk_set_peek_off(struct sock *sk, int val) 2239int sk_set_peek_off(struct sock *sk, int val)
@@ -2436,8 +2469,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2436 sk->sk_type = sock->type; 2469 sk->sk_type = sock->type;
2437 sk->sk_wq = sock->wq; 2470 sk->sk_wq = sock->wq;
2438 sock->sk = sk; 2471 sock->sk = sk;
2439 } else 2472 sk->sk_uid = SOCK_INODE(sock)->i_uid;
2473 } else {
2440 sk->sk_wq = NULL; 2474 sk->sk_wq = NULL;
2475 sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
2476 }
2441 2477
2442 rwlock_init(&sk->sk_callback_lock); 2478 rwlock_init(&sk->sk_callback_lock);
2443 lockdep_set_class_and_name(&sk->sk_callback_lock, 2479 lockdep_set_class_and_name(&sk->sk_callback_lock,
diff --git a/net/core/stream.c b/net/core/stream.c
index 1086c8b280a8..f575bcf64af2 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -53,8 +53,8 @@ void sk_stream_write_space(struct sock *sk)
53 */ 53 */
54int sk_stream_wait_connect(struct sock *sk, long *timeo_p) 54int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
55{ 55{
56 DEFINE_WAIT_FUNC(wait, woken_wake_function);
56 struct task_struct *tsk = current; 57 struct task_struct *tsk = current;
57 DEFINE_WAIT(wait);
58 int done; 58 int done;
59 59
60 do { 60 do {
@@ -68,13 +68,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
68 if (signal_pending(tsk)) 68 if (signal_pending(tsk))
69 return sock_intr_errno(*timeo_p); 69 return sock_intr_errno(*timeo_p);
70 70
71 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 71 add_wait_queue(sk_sleep(sk), &wait);
72 sk->sk_write_pending++; 72 sk->sk_write_pending++;
73 done = sk_wait_event(sk, timeo_p, 73 done = sk_wait_event(sk, timeo_p,
74 !sk->sk_err && 74 !sk->sk_err &&
75 !((1 << sk->sk_state) & 75 !((1 << sk->sk_state) &
76 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); 76 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
77 finish_wait(sk_sleep(sk), &wait); 77 remove_wait_queue(sk_sleep(sk), &wait);
78 sk->sk_write_pending--; 78 sk->sk_write_pending--;
79 } while (!done); 79 } while (!done);
80 return 0; 80 return 0;
@@ -94,16 +94,16 @@ static inline int sk_stream_closing(struct sock *sk)
94void sk_stream_wait_close(struct sock *sk, long timeout) 94void sk_stream_wait_close(struct sock *sk, long timeout)
95{ 95{
96 if (timeout) { 96 if (timeout) {
97 DEFINE_WAIT(wait); 97 DEFINE_WAIT_FUNC(wait, woken_wake_function);
98
99 add_wait_queue(sk_sleep(sk), &wait);
98 100
99 do { 101 do {
100 prepare_to_wait(sk_sleep(sk), &wait, 102 if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk), &wait))
101 TASK_INTERRUPTIBLE);
102 if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
103 break; 103 break;
104 } while (!signal_pending(current) && timeout); 104 } while (!signal_pending(current) && timeout);
105 105
106 finish_wait(sk_sleep(sk), &wait); 106 remove_wait_queue(sk_sleep(sk), &wait);
107 } 107 }
108} 108}
109EXPORT_SYMBOL(sk_stream_wait_close); 109EXPORT_SYMBOL(sk_stream_wait_close);
@@ -119,16 +119,16 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
119 long vm_wait = 0; 119 long vm_wait = 0;
120 long current_timeo = *timeo_p; 120 long current_timeo = *timeo_p;
121 bool noblock = (*timeo_p ? false : true); 121 bool noblock = (*timeo_p ? false : true);
122 DEFINE_WAIT(wait); 122 DEFINE_WAIT_FUNC(wait, woken_wake_function);
123 123
124 if (sk_stream_memory_free(sk)) 124 if (sk_stream_memory_free(sk))
125 current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; 125 current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
126 126
127 add_wait_queue(sk_sleep(sk), &wait);
128
127 while (1) { 129 while (1) {
128 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 130 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
129 131
130 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
131
132 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 132 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
133 goto do_error; 133 goto do_error;
134 if (!*timeo_p) { 134 if (!*timeo_p) {
@@ -147,7 +147,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
147 sk_wait_event(sk, &current_timeo, sk->sk_err || 147 sk_wait_event(sk, &current_timeo, sk->sk_err ||
148 (sk->sk_shutdown & SEND_SHUTDOWN) || 148 (sk->sk_shutdown & SEND_SHUTDOWN) ||
149 (sk_stream_memory_free(sk) && 149 (sk_stream_memory_free(sk) &&
150 !vm_wait)); 150 !vm_wait), &wait);
151 sk->sk_write_pending--; 151 sk->sk_write_pending--;
152 152
153 if (vm_wait) { 153 if (vm_wait) {
@@ -161,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
161 *timeo_p = current_timeo; 161 *timeo_p = current_timeo;
162 } 162 }
163out: 163out:
164 finish_wait(sk_sleep(sk), &wait); 164 remove_wait_queue(sk_sleep(sk), &wait);
165 return err; 165 return err;
166 166
167do_error: 167do_error:
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0df2aa652530..2a46e4009f62 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -79,10 +79,13 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
79 79
80 if (sock_table != orig_sock_table) { 80 if (sock_table != orig_sock_table) {
81 rcu_assign_pointer(rps_sock_flow_table, sock_table); 81 rcu_assign_pointer(rps_sock_flow_table, sock_table);
82 if (sock_table) 82 if (sock_table) {
83 static_key_slow_inc(&rps_needed); 83 static_key_slow_inc(&rps_needed);
84 static_key_slow_inc(&rfs_needed);
85 }
84 if (orig_sock_table) { 86 if (orig_sock_table) {
85 static_key_slow_dec(&rps_needed); 87 static_key_slow_dec(&rps_needed);
88 static_key_slow_dec(&rfs_needed);
86 synchronize_rcu(); 89 synchronize_rcu();
87 vfree(orig_sock_table); 90 vfree(orig_sock_table);
88 } 91 }
diff --git a/net/core/utils.c b/net/core/utils.c
index cf5622b9ccc4..6592d7bbed39 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,7 +31,7 @@
31#include <net/net_ratelimit.h> 31#include <net/net_ratelimit.h>
32 32
33#include <asm/byteorder.h> 33#include <asm/byteorder.h>
34#include <asm/uaccess.h> 34#include <linux/uaccess.h>
35 35
36DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); 36DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
37/* 37/*
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index edbe59d203ef..d859a5c36e70 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -590,13 +590,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
590 if (inet_csk_reqsk_queue_is_full(sk)) 590 if (inet_csk_reqsk_queue_is_full(sk))
591 goto drop; 591 goto drop;
592 592
593 /* 593 if (sk_acceptq_is_full(sk))
594 * Accept backlog is full. If we have already queued enough
595 * of warm entries in syn queue, drop request. It is better than
596 * clogging syn queue with openreqs with exponentially increasing
597 * timeout.
598 */
599 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
600 goto drop; 594 goto drop;
601 595
602 req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true); 596 req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 715e5d1dc107..adfc790f7193 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -326,7 +326,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
326 if (inet_csk_reqsk_queue_is_full(sk)) 326 if (inet_csk_reqsk_queue_is_full(sk))
327 goto drop; 327 goto drop;
328 328
329 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 329 if (sk_acceptq_is_full(sk))
330 goto drop; 330 goto drop;
331 331
332 req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true); 332 req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 13d6b1a6e0fc..a90ed67027b0 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1718,7 +1718,7 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
1718 * See if there is data ready to read, sleep if there isn't 1718 * See if there is data ready to read, sleep if there isn't
1719 */ 1719 */
1720 for(;;) { 1720 for(;;) {
1721 DEFINE_WAIT(wait); 1721 DEFINE_WAIT_FUNC(wait, woken_wake_function);
1722 1722
1723 if (sk->sk_err) 1723 if (sk->sk_err)
1724 goto out; 1724 goto out;
@@ -1749,11 +1749,11 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
1749 goto out; 1749 goto out;
1750 } 1750 }
1751 1751
1752 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1752 add_wait_queue(sk_sleep(sk), &wait);
1753 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 1753 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1754 sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); 1754 sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait);
1755 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 1755 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1756 finish_wait(sk_sleep(sk), &wait); 1756 remove_wait_queue(sk_sleep(sk), &wait);
1757 } 1757 }
1758 1758
1759 skb_queue_walk_safe(queue, skb, n) { 1759 skb_queue_walk_safe(queue, skb, n) {
@@ -1999,19 +1999,19 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
1999 * size. 1999 * size.
2000 */ 2000 */
2001 if (dn_queue_too_long(scp, queue, flags)) { 2001 if (dn_queue_too_long(scp, queue, flags)) {
2002 DEFINE_WAIT(wait); 2002 DEFINE_WAIT_FUNC(wait, woken_wake_function);
2003 2003
2004 if (flags & MSG_DONTWAIT) { 2004 if (flags & MSG_DONTWAIT) {
2005 err = -EWOULDBLOCK; 2005 err = -EWOULDBLOCK;
2006 goto out; 2006 goto out;
2007 } 2007 }
2008 2008
2009 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2009 add_wait_queue(sk_sleep(sk), &wait);
2010 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2010 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2011 sk_wait_event(sk, &timeo, 2011 sk_wait_event(sk, &timeo,
2012 !dn_queue_too_long(scp, queue, flags)); 2012 !dn_queue_too_long(scp, queue, flags), &wait);
2013 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2013 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2014 finish_wait(sk_sleep(sk), &wait); 2014 remove_wait_queue(sk_sleep(sk), &wait);
2015 continue; 2015 continue;
2016 } 2016 }
2017 2017
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index b2c26b081134..8fdd9f492b0e 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -42,7 +42,7 @@
42#include <linux/notifier.h> 42#include <linux/notifier.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/jiffies.h> 44#include <linux/jiffies.h>
45#include <asm/uaccess.h> 45#include <linux/uaccess.h>
46#include <net/net_namespace.h> 46#include <net/net_namespace.h>
47#include <net/neighbour.h> 47#include <net/neighbour.h>
48#include <net/dst.h> 48#include <net/dst.h>
@@ -201,7 +201,7 @@ static struct dn_dev_sysctl_table {
201 .extra1 = &min_t3, 201 .extra1 = &min_t3,
202 .extra2 = &max_t3 202 .extra2 = &max_t3
203 }, 203 },
204 {0} 204 { }
205 }, 205 },
206}; 206};
207 207
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index a796fc7cbc35..7af0ba6157a1 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -31,7 +31,7 @@
31#include <linux/timer.h> 31#include <linux/timer.h>
32#include <linux/spinlock.h> 32#include <linux/spinlock.h>
33#include <linux/atomic.h> 33#include <linux/atomic.h>
34#include <asm/uaccess.h> 34#include <linux/uaccess.h>
35#include <net/neighbour.h> 35#include <net/neighbour.h>
36#include <net/dst.h> 36#include <net/dst.h>
37#include <net/flow.h> 37#include <net/flow.h>
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 1540b506e3e0..232675480756 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -25,7 +25,7 @@
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27#include <linux/atomic.h> 27#include <linux/atomic.h>
28#include <asm/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/route.h> /* RTF_xxx */ 29#include <linux/route.h> /* RTF_xxx */
30#include <net/neighbour.h> 30#include <net/neighbour.h>
31#include <net/netlink.h> 31#include <net/netlink.h>
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 5325b541c526..6c7da6c29bf0 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -22,7 +22,7 @@
22#include <net/dst.h> 22#include <net/dst.h>
23#include <net/flow.h> 23#include <net/flow.h>
24 24
25#include <asm/uaccess.h> 25#include <linux/uaccess.h>
26 26
27#include <net/dn.h> 27#include <net/dn.h>
28#include <net/dn_dev.h> 28#include <net/dn_dev.h>
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 30e2e21d7619..68c9eea00518 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -641,7 +641,8 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
641 641
642/* ethtool operations *******************************************************/ 642/* ethtool operations *******************************************************/
643static int 643static int
644dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 644dsa_slave_get_link_ksettings(struct net_device *dev,
645 struct ethtool_link_ksettings *cmd)
645{ 646{
646 struct dsa_slave_priv *p = netdev_priv(dev); 647 struct dsa_slave_priv *p = netdev_priv(dev);
647 int err; 648 int err;
@@ -650,19 +651,20 @@ dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
650 if (p->phy != NULL) { 651 if (p->phy != NULL) {
651 err = phy_read_status(p->phy); 652 err = phy_read_status(p->phy);
652 if (err == 0) 653 if (err == 0)
653 err = phy_ethtool_gset(p->phy, cmd); 654 err = phy_ethtool_ksettings_get(p->phy, cmd);
654 } 655 }
655 656
656 return err; 657 return err;
657} 658}
658 659
659static int 660static int
660dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) 661dsa_slave_set_link_ksettings(struct net_device *dev,
662 const struct ethtool_link_ksettings *cmd)
661{ 663{
662 struct dsa_slave_priv *p = netdev_priv(dev); 664 struct dsa_slave_priv *p = netdev_priv(dev);
663 665
664 if (p->phy != NULL) 666 if (p->phy != NULL)
665 return phy_ethtool_sset(p->phy, cmd); 667 return phy_ethtool_ksettings_set(p->phy, cmd);
666 668
667 return -EOPNOTSUPP; 669 return -EOPNOTSUPP;
668} 670}
@@ -990,8 +992,6 @@ void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
990} 992}
991 993
992static const struct ethtool_ops dsa_slave_ethtool_ops = { 994static const struct ethtool_ops dsa_slave_ethtool_ops = {
993 .get_settings = dsa_slave_get_settings,
994 .set_settings = dsa_slave_set_settings,
995 .get_drvinfo = dsa_slave_get_drvinfo, 995 .get_drvinfo = dsa_slave_get_drvinfo,
996 .get_regs_len = dsa_slave_get_regs_len, 996 .get_regs_len = dsa_slave_get_regs_len,
997 .get_regs = dsa_slave_get_regs, 997 .get_regs = dsa_slave_get_regs,
@@ -1007,6 +1007,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
1007 .get_wol = dsa_slave_get_wol, 1007 .get_wol = dsa_slave_get_wol,
1008 .set_eee = dsa_slave_set_eee, 1008 .set_eee = dsa_slave_set_eee,
1009 .get_eee = dsa_slave_get_eee, 1009 .get_eee = dsa_slave_get_eee,
1010 .get_link_ksettings = dsa_slave_get_link_ksettings,
1011 .set_link_ksettings = dsa_slave_set_link_ksettings,
1010}; 1012};
1011 1013
1012static const struct net_device_ops dsa_slave_netdev_ops = { 1014static const struct net_device_ops dsa_slave_netdev_ops = {
@@ -1250,6 +1252,8 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
1250 slave_dev->priv_flags |= IFF_NO_QUEUE; 1252 slave_dev->priv_flags |= IFF_NO_QUEUE;
1251 slave_dev->netdev_ops = &dsa_slave_netdev_ops; 1253 slave_dev->netdev_ops = &dsa_slave_netdev_ops;
1252 slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; 1254 slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
1255 slave_dev->min_mtu = 0;
1256 slave_dev->max_mtu = ETH_MAX_MTU;
1253 SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); 1257 SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
1254 1258
1255 netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, 1259 netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 02acfff36028..8c5a479681ca 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -62,6 +62,7 @@
62#include <net/dsa.h> 62#include <net/dsa.h>
63#include <net/flow_dissector.h> 63#include <net/flow_dissector.h>
64#include <linux/uaccess.h> 64#include <linux/uaccess.h>
65#include <net/pkt_sched.h>
65 66
66__setup("ether=", netdev_boot_setup); 67__setup("ether=", netdev_boot_setup);
67 68
@@ -322,8 +323,7 @@ EXPORT_SYMBOL(eth_mac_addr);
322 */ 323 */
323int eth_change_mtu(struct net_device *dev, int new_mtu) 324int eth_change_mtu(struct net_device *dev, int new_mtu)
324{ 325{
325 if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) 326 netdev_warn(dev, "%s is deprecated\n", __func__);
326 return -EINVAL;
327 dev->mtu = new_mtu; 327 dev->mtu = new_mtu;
328 return 0; 328 return 0;
329} 329}
@@ -357,8 +357,10 @@ void ether_setup(struct net_device *dev)
357 dev->type = ARPHRD_ETHER; 357 dev->type = ARPHRD_ETHER;
358 dev->hard_header_len = ETH_HLEN; 358 dev->hard_header_len = ETH_HLEN;
359 dev->mtu = ETH_DATA_LEN; 359 dev->mtu = ETH_DATA_LEN;
360 dev->min_mtu = ETH_MIN_MTU;
361 dev->max_mtu = ETH_DATA_LEN;
360 dev->addr_len = ETH_ALEN; 362 dev->addr_len = ETH_ALEN;
361 dev->tx_queue_len = 1000; /* Ethernet wants good queues */ 363 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
362 dev->flags = IFF_BROADCAST|IFF_MULTICAST; 364 dev->flags = IFF_BROADCAST|IFF_MULTICAST;
363 dev->priv_flags |= IFF_TX_SKB_SHARING; 365 dev->priv_flags |= IFF_TX_SKB_SHARING;
364 366
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 16737cd8dae8..fc65b145f6e7 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -398,6 +398,7 @@ void hsr_dev_setup(struct net_device *dev)
398 random_ether_addr(dev->dev_addr); 398 random_ether_addr(dev->dev_addr);
399 399
400 ether_setup(dev); 400 ether_setup(dev);
401 dev->min_mtu = 0;
401 dev->header_ops = &hsr_header_ops; 402 dev->header_ops = &hsr_header_ops;
402 dev->netdev_ops = &hsr_device_ops; 403 dev->netdev_ops = &hsr_device_ops;
403 SET_NETDEV_DEVTYPE(dev, &hsr_type); 404 SET_NETDEV_DEVTYPE(dev, &hsr_type);
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index d4d1617f43a8..1ab30e7d3f99 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -131,13 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
131 [HSR_A_IF2_SEQ] = { .type = NLA_U16 }, 131 [HSR_A_IF2_SEQ] = { .type = NLA_U16 },
132}; 132};
133 133
134static struct genl_family hsr_genl_family = { 134static struct genl_family hsr_genl_family;
135 .id = GENL_ID_GENERATE,
136 .hdrsize = 0,
137 .name = "HSR",
138 .version = 1,
139 .maxattr = HSR_A_MAX,
140};
141 135
142static const struct genl_multicast_group hsr_mcgrps[] = { 136static const struct genl_multicast_group hsr_mcgrps[] = {
143 { .name = "hsr-network", }, 137 { .name = "hsr-network", },
@@ -467,6 +461,18 @@ static const struct genl_ops hsr_ops[] = {
467 }, 461 },
468}; 462};
469 463
464static struct genl_family hsr_genl_family __ro_after_init = {
465 .hdrsize = 0,
466 .name = "HSR",
467 .version = 1,
468 .maxattr = HSR_A_MAX,
469 .module = THIS_MODULE,
470 .ops = hsr_ops,
471 .n_ops = ARRAY_SIZE(hsr_ops),
472 .mcgrps = hsr_mcgrps,
473 .n_mcgrps = ARRAY_SIZE(hsr_mcgrps),
474};
475
470int __init hsr_netlink_init(void) 476int __init hsr_netlink_init(void)
471{ 477{
472 int rc; 478 int rc;
@@ -475,8 +481,7 @@ int __init hsr_netlink_init(void)
475 if (rc) 481 if (rc)
476 goto fail_rtnl_link_register; 482 goto fail_rtnl_link_register;
477 483
478 rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops, 484 rc = genl_register_family(&hsr_genl_family);
479 hsr_mcgrps);
480 if (rc) 485 if (rc)
481 goto fail_genl_register_family; 486 goto fail_genl_register_family;
482 487
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index 5ac778962e4e..ac7c96b73ad5 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -7,7 +7,7 @@
7#include <net/inet_frag.h> 7#include <net/inet_frag.h>
8#include <net/6lowpan.h> 8#include <net/6lowpan.h>
9 9
10typedef unsigned __bitwise__ lowpan_rx_result; 10typedef unsigned __bitwise lowpan_rx_result;
11#define RX_CONTINUE ((__force lowpan_rx_result) 0u) 11#define RX_CONTINUE ((__force lowpan_rx_result) 0u)
12#define RX_DROP_UNUSABLE ((__force lowpan_rx_result) 1u) 12#define RX_DROP_UNUSABLE ((__force lowpan_rx_result) 1u)
13#define RX_DROP ((__force lowpan_rx_result) 2u) 13#define RX_DROP ((__force lowpan_rx_result) 2u)
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 4adfd4d5471b..9b92ade687a3 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -7,5 +7,3 @@ ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
7ieee802154_socket-y := socket.o 7ieee802154_socket-y := socket.o
8 8
9CFLAGS_trace.o := -I$(src) 9CFLAGS_trace.o := -I$(src)
10
11ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index c8133c07ceee..6bde9e5a5503 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -28,14 +28,6 @@
28static unsigned int ieee802154_seq_num; 28static unsigned int ieee802154_seq_num;
29static DEFINE_SPINLOCK(ieee802154_seq_lock); 29static DEFINE_SPINLOCK(ieee802154_seq_lock);
30 30
31struct genl_family nl802154_family = {
32 .id = GENL_ID_GENERATE,
33 .hdrsize = 0,
34 .name = IEEE802154_NL_NAME,
35 .version = 1,
36 .maxattr = IEEE802154_ATTR_MAX,
37};
38
39/* Requests to userspace */ 31/* Requests to userspace */
40struct sk_buff *ieee802154_nl_create(int flags, u8 req) 32struct sk_buff *ieee802154_nl_create(int flags, u8 req)
41{ 33{
@@ -139,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = {
139 [IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, }, 131 [IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, },
140}; 132};
141 133
134struct genl_family nl802154_family __ro_after_init = {
135 .hdrsize = 0,
136 .name = IEEE802154_NL_NAME,
137 .version = 1,
138 .maxattr = IEEE802154_ATTR_MAX,
139 .module = THIS_MODULE,
140 .ops = ieee8021154_ops,
141 .n_ops = ARRAY_SIZE(ieee8021154_ops),
142 .mcgrps = ieee802154_mcgrps,
143 .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
144};
145
142int __init ieee802154_nl_init(void) 146int __init ieee802154_nl_init(void)
143{ 147{
144 return genl_register_family_with_ops_groups(&nl802154_family, 148 return genl_register_family(&nl802154_family);
145 ieee8021154_ops,
146 ieee802154_mcgrps);
147} 149}
148 150
149void ieee802154_nl_exit(void) 151void ieee802154_nl_exit(void)
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 77d73014bde3..dc2960be51e0 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -286,9 +286,12 @@ int ieee802154_del_iface(struct sk_buff *skb, struct genl_info *info)
286 if (name[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0') 286 if (name[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0')
287 return -EINVAL; /* name should be null-terminated */ 287 return -EINVAL; /* name should be null-terminated */
288 288
289 rc = -ENODEV;
289 dev = dev_get_by_name(genl_info_net(info), name); 290 dev = dev_get_by_name(genl_info_net(info), name);
290 if (!dev) 291 if (!dev)
291 return -ENODEV; 292 return rc;
293 if (dev->type != ARPHRD_IEEE802154)
294 goto out;
292 295
293 phy = dev->ieee802154_ptr->wpan_phy; 296 phy = dev->ieee802154_ptr->wpan_phy;
294 BUG_ON(!phy); 297 BUG_ON(!phy);
@@ -342,6 +345,7 @@ nla_put_failure:
342 nlmsg_free(msg); 345 nlmsg_free(msg);
343out_dev: 346out_dev:
344 wpan_phy_put(phy); 347 wpan_phy_put(phy);
348out:
345 if (dev) 349 if (dev)
346 dev_put(dev); 350 dev_put(dev);
347 351
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index d90a4ed5b8a0..fc60cd061f39 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -26,23 +26,8 @@
26#include "rdev-ops.h" 26#include "rdev-ops.h"
27#include "core.h" 27#include "core.h"
28 28
29static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
30 struct genl_info *info);
31
32static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
33 struct genl_info *info);
34
35/* the netlink family */ 29/* the netlink family */
36static struct genl_family nl802154_fam = { 30static struct genl_family nl802154_fam;
37 .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
38 .name = NL802154_GENL_NAME, /* have users key off the name instead */
39 .hdrsize = 0, /* no private header */
40 .version = 1, /* no particular meaning now */
41 .maxattr = NL802154_ATTR_MAX,
42 .netnsok = true,
43 .pre_doit = nl802154_pre_doit,
44 .post_doit = nl802154_post_doit,
45};
46 31
47/* multicast groups */ 32/* multicast groups */
48enum nl802154_multicast_groups { 33enum nl802154_multicast_groups {
@@ -263,13 +248,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
263 248
264 if (!cb->args[0]) { 249 if (!cb->args[0]) {
265 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, 250 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
266 nl802154_fam.attrbuf, nl802154_fam.maxattr, 251 genl_family_attrbuf(&nl802154_fam),
252 nl802154_fam.maxattr,
267 nl802154_policy); 253 nl802154_policy);
268 if (err) 254 if (err)
269 goto out_unlock; 255 goto out_unlock;
270 256
271 *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk), 257 *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
272 nl802154_fam.attrbuf); 258 genl_family_attrbuf(&nl802154_fam));
273 if (IS_ERR(*wpan_dev)) { 259 if (IS_ERR(*wpan_dev)) {
274 err = PTR_ERR(*wpan_dev); 260 err = PTR_ERR(*wpan_dev);
275 goto out_unlock; 261 goto out_unlock;
@@ -575,7 +561,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
575 struct netlink_callback *cb, 561 struct netlink_callback *cb,
576 struct nl802154_dump_wpan_phy_state *state) 562 struct nl802154_dump_wpan_phy_state *state)
577{ 563{
578 struct nlattr **tb = nl802154_fam.attrbuf; 564 struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
579 int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, 565 int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
580 tb, nl802154_fam.maxattr, nl802154_policy); 566 tb, nl802154_fam.maxattr, nl802154_policy);
581 567
@@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = {
2476#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ 2462#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
2477}; 2463};
2478 2464
2465static struct genl_family nl802154_fam __ro_after_init = {
2466 .name = NL802154_GENL_NAME, /* have users key off the name instead */
2467 .hdrsize = 0, /* no private header */
2468 .version = 1, /* no particular meaning now */
2469 .maxattr = NL802154_ATTR_MAX,
2470 .netnsok = true,
2471 .pre_doit = nl802154_pre_doit,
2472 .post_doit = nl802154_post_doit,
2473 .module = THIS_MODULE,
2474 .ops = nl802154_ops,
2475 .n_ops = ARRAY_SIZE(nl802154_ops),
2476 .mcgrps = nl802154_mcgrps,
2477 .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps),
2478};
2479
2479/* initialisation/exit functions */ 2480/* initialisation/exit functions */
2480int nl802154_init(void) 2481int __init nl802154_init(void)
2481{ 2482{
2482 return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops, 2483 return genl_register_family(&nl802154_fam);
2483 nl802154_mcgrps);
2484} 2484}
2485 2485
2486void nl802154_exit(void) 2486void nl802154_exit(void)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b54b3ca939db..6e7baaf814c6 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -430,6 +430,14 @@ config INET_UDP_DIAG
430 Support for UDP socket monitoring interface used by the ss tool. 430 Support for UDP socket monitoring interface used by the ss tool.
431 If unsure, say Y. 431 If unsure, say Y.
432 432
433config INET_RAW_DIAG
434 tristate "RAW: socket monitoring interface"
435 depends on INET_DIAG && (IPV6 || IPV6=n)
436 default n
437 ---help---
438 Support for RAW socket monitoring interface used by the ss tool.
439 If unsure, say Y.
440
433config INET_DIAG_DESTROY 441config INET_DIAG_DESTROY
434 bool "INET: allow privileged process to administratively close sockets" 442 bool "INET: allow privileged process to administratively close sockets"
435 depends on INET_DIAG 443 depends on INET_DIAG
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bc6a6c8b9bcd..48af58a5686e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
40obj-$(CONFIG_INET_DIAG) += inet_diag.o 40obj-$(CONFIG_INET_DIAG) += inet_diag.o
41obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o 41obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
42obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o 42obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
43obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
43obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o 44obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
44obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o 45obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
45obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o 46obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 215143246e4b..f75069883f2b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -90,7 +90,7 @@
90#include <linux/random.h> 90#include <linux/random.h>
91#include <linux/slab.h> 91#include <linux/slab.h>
92 92
93#include <asm/uaccess.h> 93#include <linux/uaccess.h>
94 94
95#include <linux/inet.h> 95#include <linux/inet.h>
96#include <linux/igmp.h> 96#include <linux/igmp.h>
@@ -374,8 +374,18 @@ lookup_protocol:
374 374
375 if (sk->sk_prot->init) { 375 if (sk->sk_prot->init) {
376 err = sk->sk_prot->init(sk); 376 err = sk->sk_prot->init(sk);
377 if (err) 377 if (err) {
378 sk_common_release(sk);
379 goto out;
380 }
381 }
382
383 if (!kern) {
384 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
385 if (err) {
378 sk_common_release(sk); 386 sk_common_release(sk);
387 goto out;
388 }
379 } 389 }
380out: 390out:
381 return err; 391 return err;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 062a67ca9a21..4cd2ee8857d2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -26,7 +26,7 @@
26 */ 26 */
27 27
28 28
29#include <asm/uaccess.h> 29#include <linux/uaccess.h>
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31#include <linux/capability.h> 31#include <linux/capability.h>
32#include <linux/module.h> 32#include <linux/module.h>
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 161fc0f0d752..3ff8938893ec 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -14,7 +14,7 @@
14 */ 14 */
15 15
16#include <linux/module.h> 16#include <linux/module.h>
17#include <asm/uaccess.h> 17#include <linux/uaccess.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/capability.h> 19#include <linux/capability.h>
20#include <linux/types.h> 20#include <linux/types.h>
@@ -620,6 +620,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
620 [RTA_FLOW] = { .type = NLA_U32 }, 620 [RTA_FLOW] = { .type = NLA_U32 },
621 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 621 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
622 [RTA_ENCAP] = { .type = NLA_NESTED }, 622 [RTA_ENCAP] = { .type = NLA_NESTED },
623 [RTA_UID] = { .type = NLA_U32 },
623}; 624};
624 625
625static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 626static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -1218,6 +1219,8 @@ static int __net_init ip_fib_net_init(struct net *net)
1218 int err; 1219 int err;
1219 size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ; 1220 size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
1220 1221
1222 net->ipv4.fib_seq = 0;
1223
1221 /* Avoid false sharing : Use at least a full cache line */ 1224 /* Avoid false sharing : Use at least a full cache line */
1222 size = max_t(size_t, size, L1_CACHE_BYTES); 1225 size = max_t(size_t, size, L1_CACHE_BYTES);
1223 1226
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 388d3e21629b..7a5b4c7d9a87 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -13,7 +13,7 @@
13 * 2 of the License, or (at your option) any later version. 13 * 2 of the License, or (at your option) any later version.
14 */ 14 */
15 15
16#include <asm/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
@@ -234,6 +234,7 @@ void free_fib_info(struct fib_info *fi)
234#endif 234#endif
235 call_rcu(&fi->rcu, free_fib_info_rcu); 235 call_rcu(&fi->rcu, free_fib_info_rcu);
236} 236}
237EXPORT_SYMBOL_GPL(free_fib_info);
237 238
238void fib_release_info(struct fib_info *fi) 239void fib_release_info(struct fib_info *fi)
239{ 240{
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e3665bf7a7f3..2919d1a10cfd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,7 +50,7 @@
50 50
51#define VERSION "0.409" 51#define VERSION "0.409"
52 52
53#include <asm/uaccess.h> 53#include <linux/uaccess.h>
54#include <linux/bitops.h> 54#include <linux/bitops.h>
55#include <linux/types.h> 55#include <linux/types.h>
56#include <linux/kernel.h> 56#include <linux/kernel.h>
@@ -84,25 +84,114 @@
84#include <trace/events/fib.h> 84#include <trace/events/fib.h>
85#include "fib_lookup.h" 85#include "fib_lookup.h"
86 86
87static BLOCKING_NOTIFIER_HEAD(fib_chain); 87static unsigned int fib_seq_sum(void)
88{
89 unsigned int fib_seq = 0;
90 struct net *net;
91
92 rtnl_lock();
93 for_each_net(net)
94 fib_seq += net->ipv4.fib_seq;
95 rtnl_unlock();
96
97 return fib_seq;
98}
99
100static ATOMIC_NOTIFIER_HEAD(fib_chain);
88 101
89int register_fib_notifier(struct notifier_block *nb) 102static int call_fib_notifier(struct notifier_block *nb, struct net *net,
103 enum fib_event_type event_type,
104 struct fib_notifier_info *info)
90{ 105{
91 return blocking_notifier_chain_register(&fib_chain, nb); 106 info->net = net;
107 return nb->notifier_call(nb, event_type, info);
108}
109
110static void fib_rules_notify(struct net *net, struct notifier_block *nb,
111 enum fib_event_type event_type)
112{
113#ifdef CONFIG_IP_MULTIPLE_TABLES
114 struct fib_notifier_info info;
115
116 if (net->ipv4.fib_has_custom_rules)
117 call_fib_notifier(nb, net, event_type, &info);
118#endif
119}
120
121static void fib_notify(struct net *net, struct notifier_block *nb,
122 enum fib_event_type event_type);
123
124static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
125 enum fib_event_type event_type, u32 dst,
126 int dst_len, struct fib_info *fi,
127 u8 tos, u8 type, u32 tb_id, u32 nlflags)
128{
129 struct fib_entry_notifier_info info = {
130 .dst = dst,
131 .dst_len = dst_len,
132 .fi = fi,
133 .tos = tos,
134 .type = type,
135 .tb_id = tb_id,
136 .nlflags = nlflags,
137 };
138 return call_fib_notifier(nb, net, event_type, &info.info);
139}
140
141static bool fib_dump_is_consistent(struct notifier_block *nb,
142 void (*cb)(struct notifier_block *nb),
143 unsigned int fib_seq)
144{
145 atomic_notifier_chain_register(&fib_chain, nb);
146 if (fib_seq == fib_seq_sum())
147 return true;
148 atomic_notifier_chain_unregister(&fib_chain, nb);
149 if (cb)
150 cb(nb);
151 return false;
152}
153
154#define FIB_DUMP_MAX_RETRIES 5
155int register_fib_notifier(struct notifier_block *nb,
156 void (*cb)(struct notifier_block *nb))
157{
158 int retries = 0;
159
160 do {
161 unsigned int fib_seq = fib_seq_sum();
162 struct net *net;
163
164 /* Mutex semantics guarantee that every change done to
165 * FIB tries before we read the change sequence counter
166 * is now visible to us.
167 */
168 rcu_read_lock();
169 for_each_net_rcu(net) {
170 fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
171 fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
172 }
173 rcu_read_unlock();
174
175 if (fib_dump_is_consistent(nb, cb, fib_seq))
176 return 0;
177 } while (++retries < FIB_DUMP_MAX_RETRIES);
178
179 return -EBUSY;
92} 180}
93EXPORT_SYMBOL(register_fib_notifier); 181EXPORT_SYMBOL(register_fib_notifier);
94 182
95int unregister_fib_notifier(struct notifier_block *nb) 183int unregister_fib_notifier(struct notifier_block *nb)
96{ 184{
97 return blocking_notifier_chain_unregister(&fib_chain, nb); 185 return atomic_notifier_chain_unregister(&fib_chain, nb);
98} 186}
99EXPORT_SYMBOL(unregister_fib_notifier); 187EXPORT_SYMBOL(unregister_fib_notifier);
100 188
101int call_fib_notifiers(struct net *net, enum fib_event_type event_type, 189int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
102 struct fib_notifier_info *info) 190 struct fib_notifier_info *info)
103{ 191{
192 net->ipv4.fib_seq++;
104 info->net = net; 193 info->net = net;
105 return blocking_notifier_call_chain(&fib_chain, event_type, info); 194 return atomic_notifier_call_chain(&fib_chain, event_type, info);
106} 195}
107 196
108static int call_fib_entry_notifiers(struct net *net, 197static int call_fib_entry_notifiers(struct net *net,
@@ -1903,6 +1992,62 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
1903 return found; 1992 return found;
1904} 1993}
1905 1994
1995static void fib_leaf_notify(struct net *net, struct key_vector *l,
1996 struct fib_table *tb, struct notifier_block *nb,
1997 enum fib_event_type event_type)
1998{
1999 struct fib_alias *fa;
2000
2001 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2002 struct fib_info *fi = fa->fa_info;
2003
2004 if (!fi)
2005 continue;
2006
2007 /* local and main table can share the same trie,
2008 * so don't notify twice for the same entry.
2009 */
2010 if (tb->tb_id != fa->tb_id)
2011 continue;
2012
2013 call_fib_entry_notifier(nb, net, event_type, l->key,
2014 KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
2015 fa->fa_type, fa->tb_id, 0);
2016 }
2017}
2018
2019static void fib_table_notify(struct net *net, struct fib_table *tb,
2020 struct notifier_block *nb,
2021 enum fib_event_type event_type)
2022{
2023 struct trie *t = (struct trie *)tb->tb_data;
2024 struct key_vector *l, *tp = t->kv;
2025 t_key key = 0;
2026
2027 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2028 fib_leaf_notify(net, l, tb, nb, event_type);
2029
2030 key = l->key + 1;
2031 /* stop in case of wrap around */
2032 if (key < l->key)
2033 break;
2034 }
2035}
2036
2037static void fib_notify(struct net *net, struct notifier_block *nb,
2038 enum fib_event_type event_type)
2039{
2040 unsigned int h;
2041
2042 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2043 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2044 struct fib_table *tb;
2045
2046 hlist_for_each_entry_rcu(tb, head, tb_hlist)
2047 fib_table_notify(net, tb, nb, event_type);
2048 }
2049}
2050
1906static void __trie_free_rcu(struct rcu_head *head) 2051static void __trie_free_rcu(struct rcu_head *head)
1907{ 2052{
1908 struct fib_table *tb = container_of(head, struct fib_table, rcu); 2053 struct fib_table *tb = container_of(head, struct fib_table, rcu);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 030d1531e897..805f6607f8d9 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -622,14 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
622 return err; 622 return err;
623} 623}
624 624
625static struct genl_family fou_nl_family = { 625static struct genl_family fou_nl_family;
626 .id = GENL_ID_GENERATE,
627 .hdrsize = 0,
628 .name = FOU_GENL_NAME,
629 .version = FOU_GENL_VERSION,
630 .maxattr = FOU_ATTR_MAX,
631 .netnsok = true,
632};
633 626
634static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { 627static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
635 [FOU_ATTR_PORT] = { .type = NLA_U16, }, 628 [FOU_ATTR_PORT] = { .type = NLA_U16, },
@@ -831,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = {
831 }, 824 },
832}; 825};
833 826
827static struct genl_family fou_nl_family __ro_after_init = {
828 .hdrsize = 0,
829 .name = FOU_GENL_NAME,
830 .version = FOU_GENL_VERSION,
831 .maxattr = FOU_ATTR_MAX,
832 .netnsok = true,
833 .module = THIS_MODULE,
834 .ops = fou_nl_ops,
835 .n_ops = ARRAY_SIZE(fou_nl_ops),
836};
837
834size_t fou_encap_hlen(struct ip_tunnel_encap *e) 838size_t fou_encap_hlen(struct ip_tunnel_encap *e)
835{ 839{
836 return sizeof(struct udphdr); 840 return sizeof(struct udphdr);
@@ -1086,8 +1090,7 @@ static int __init fou_init(void)
1086 if (ret) 1090 if (ret)
1087 goto exit; 1091 goto exit;
1088 1092
1089 ret = genl_register_family_with_ops(&fou_nl_family, 1093 ret = genl_register_family(&fou_nl_family);
1090 fou_nl_ops);
1091 if (ret < 0) 1094 if (ret < 0)
1092 goto unregister; 1095 goto unregister;
1093 1096
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 48734ee6293f..0777ea949223 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -91,7 +91,7 @@
91#include <linux/errno.h> 91#include <linux/errno.h>
92#include <linux/timer.h> 92#include <linux/timer.h>
93#include <linux/init.h> 93#include <linux/init.h>
94#include <asm/uaccess.h> 94#include <linux/uaccess.h>
95#include <net/checksum.h> 95#include <net/checksum.h>
96#include <net/xfrm.h> 96#include <net/xfrm.h>
97#include <net/inet_common.h> 97#include <net/inet_common.h>
@@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
425 fl4.daddr = daddr; 425 fl4.daddr = daddr;
426 fl4.saddr = saddr; 426 fl4.saddr = saddr;
427 fl4.flowi4_mark = mark; 427 fl4.flowi4_mark = mark;
428 fl4.flowi4_uid = sock_net_uid(net, NULL);
428 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 429 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
429 fl4.flowi4_proto = IPPROTO_ICMP; 430 fl4.flowi4_proto = IPPROTO_ICMP;
430 fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); 431 fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
@@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
473 param->replyopts.opt.opt.faddr : iph->saddr); 474 param->replyopts.opt.opt.faddr : iph->saddr);
474 fl4->saddr = saddr; 475 fl4->saddr = saddr;
475 fl4->flowi4_mark = mark; 476 fl4->flowi4_mark = mark;
477 fl4->flowi4_uid = sock_net_uid(net, NULL);
476 fl4->flowi4_tos = RT_TOS(tos); 478 fl4->flowi4_tos = RT_TOS(tos);
477 fl4->flowi4_proto = IPPROTO_ICMP; 479 fl4->flowi4_proto = IPPROTO_ICMP;
478 fl4->fl4_icmp_type = type; 480 fl4->fl4_icmp_type = type;
@@ -1045,12 +1047,12 @@ int icmp_rcv(struct sk_buff *skb)
1045 1047
1046 if (success) { 1048 if (success) {
1047 consume_skb(skb); 1049 consume_skb(skb);
1048 return 0; 1050 return NET_RX_SUCCESS;
1049 } 1051 }
1050 1052
1051drop: 1053drop:
1052 kfree_skb(skb); 1054 kfree_skb(skb);
1053 return 0; 1055 return NET_RX_DROP;
1054csum_error: 1056csum_error:
1055 __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS); 1057 __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
1056error: 1058error:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 15db786d50ed..68d622133f53 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -72,7 +72,7 @@
72 72
73#include <linux/module.h> 73#include <linux/module.h>
74#include <linux/slab.h> 74#include <linux/slab.h>
75#include <asm/uaccess.h> 75#include <linux/uaccess.h>
76#include <linux/types.h> 76#include <linux/types.h>
77#include <linux/kernel.h> 77#include <linux/kernel.h>
78#include <linux/jiffies.h> 78#include <linux/jiffies.h>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61a9deec2993..19ea045c50ed 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -45,11 +45,12 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
45EXPORT_SYMBOL(inet_get_local_port_range); 45EXPORT_SYMBOL(inet_get_local_port_range);
46 46
47int inet_csk_bind_conflict(const struct sock *sk, 47int inet_csk_bind_conflict(const struct sock *sk,
48 const struct inet_bind_bucket *tb, bool relax) 48 const struct inet_bind_bucket *tb, bool relax,
49 bool reuseport_ok)
49{ 50{
50 struct sock *sk2; 51 struct sock *sk2;
51 int reuse = sk->sk_reuse; 52 bool reuse = sk->sk_reuse;
52 int reuseport = sk->sk_reuseport; 53 bool reuseport = !!sk->sk_reuseport && reuseport_ok;
53 kuid_t uid = sock_i_uid((struct sock *)sk); 54 kuid_t uid = sock_i_uid((struct sock *)sk);
54 55
55 /* 56 /*
@@ -105,6 +106,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
105 struct inet_bind_bucket *tb; 106 struct inet_bind_bucket *tb;
106 kuid_t uid = sock_i_uid(sk); 107 kuid_t uid = sock_i_uid(sk);
107 u32 remaining, offset; 108 u32 remaining, offset;
109 bool reuseport_ok = !!snum;
108 110
109 if (port) { 111 if (port) {
110have_port: 112have_port:
@@ -165,7 +167,8 @@ other_parity_scan:
165 smallest_size = tb->num_owners; 167 smallest_size = tb->num_owners;
166 smallest_port = port; 168 smallest_port = port;
167 } 169 }
168 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) 170 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false,
171 reuseport_ok))
169 goto tb_found; 172 goto tb_found;
170 goto next_port; 173 goto next_port;
171 } 174 }
@@ -206,13 +209,14 @@ tb_found:
206 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && 209 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
207 smallest_size == -1) 210 smallest_size == -1)
208 goto success; 211 goto success;
209 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { 212 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true,
213 reuseport_ok)) {
210 if ((reuse || 214 if ((reuse ||
211 (tb->fastreuseport > 0 && 215 (tb->fastreuseport > 0 &&
212 sk->sk_reuseport && 216 sk->sk_reuseport &&
213 !rcu_access_pointer(sk->sk_reuseport_cb) && 217 !rcu_access_pointer(sk->sk_reuseport_cb) &&
214 uid_eq(tb->fastuid, uid))) && 218 uid_eq(tb->fastuid, uid))) &&
215 smallest_size != -1 && --attempts >= 0) { 219 !snum && smallest_size != -1 && --attempts >= 0) {
216 spin_unlock_bh(&head->lock); 220 spin_unlock_bh(&head->lock);
217 goto again; 221 goto again;
218 } 222 }
@@ -415,7 +419,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
415 sk->sk_protocol, inet_sk_flowi_flags(sk), 419 sk->sk_protocol, inet_sk_flowi_flags(sk),
416 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 420 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
417 ireq->ir_loc_addr, ireq->ir_rmt_port, 421 ireq->ir_loc_addr, ireq->ir_rmt_port,
418 htons(ireq->ir_num)); 422 htons(ireq->ir_num), sk->sk_uid);
419 security_req_classify_flow(req, flowi4_to_flowi(fl4)); 423 security_req_classify_flow(req, flowi4_to_flowi(fl4));
420 rt = ip_route_output_flow(net, fl4, sk); 424 rt = ip_route_output_flow(net, fl4, sk);
421 if (IS_ERR(rt)) 425 if (IS_ERR(rt))
@@ -452,7 +456,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
452 sk->sk_protocol, inet_sk_flowi_flags(sk), 456 sk->sk_protocol, inet_sk_flowi_flags(sk),
453 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 457 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
454 ireq->ir_loc_addr, ireq->ir_rmt_port, 458 ireq->ir_loc_addr, ireq->ir_rmt_port,
455 htons(ireq->ir_num)); 459 htons(ireq->ir_num), sk->sk_uid);
456 security_req_classify_flow(req, flowi4_to_flowi(fl4)); 460 security_req_classify_flow(req, flowi4_to_flowi(fl4));
457 rt = ip_route_output_flow(net, fl4, sk); 461 rt = ip_route_output_flow(net, fl4, sk);
458 if (IS_ERR(rt)) 462 if (IS_ERR(rt))
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e4d16fc5bbb3..4dea33e5f295 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -200,6 +200,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
200 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) 200 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
201 goto errout; 201 goto errout;
202 202
203 /*
204 * RAW sockets might have user-defined protocols assigned,
205 * so report the one supplied on socket creation.
206 */
207 if (sk->sk_type == SOCK_RAW) {
208 if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))
209 goto errout;
210 }
211
203 if (!icsk) { 212 if (!icsk) {
204 handler->idiag_get_info(sk, r, NULL); 213 handler->idiag_get_info(sk, r, NULL);
205 goto out; 214 goto out;
@@ -852,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
852 struct netlink_callback *cb, 861 struct netlink_callback *cb,
853 const struct inet_diag_req_v2 *r, struct nlattr *bc) 862 const struct inet_diag_req_v2 *r, struct nlattr *bc)
854{ 863{
864 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
855 struct net *net = sock_net(skb->sk); 865 struct net *net = sock_net(skb->sk);
856 int i, num, s_i, s_num;
857 u32 idiag_states = r->idiag_states; 866 u32 idiag_states = r->idiag_states;
858 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 867 int i, num, s_i, s_num;
868 struct sock *sk;
859 869
860 if (idiag_states & TCPF_SYN_RECV) 870 if (idiag_states & TCPF_SYN_RECV)
861 idiag_states |= TCPF_NEW_SYN_RECV; 871 idiag_states |= TCPF_NEW_SYN_RECV;
@@ -863,16 +873,15 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
863 s_num = num = cb->args[2]; 873 s_num = num = cb->args[2];
864 874
865 if (cb->args[0] == 0) { 875 if (cb->args[0] == 0) {
866 if (!(idiag_states & TCPF_LISTEN)) 876 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
867 goto skip_listen_ht; 877 goto skip_listen_ht;
868 878
869 for (i = s_i; i < INET_LHTABLE_SIZE; i++) { 879 for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
870 struct inet_listen_hashbucket *ilb; 880 struct inet_listen_hashbucket *ilb;
871 struct sock *sk;
872 881
873 num = 0; 882 num = 0;
874 ilb = &hashinfo->listening_hash[i]; 883 ilb = &hashinfo->listening_hash[i];
875 spin_lock_bh(&ilb->lock); 884 spin_lock(&ilb->lock);
876 sk_for_each(sk, &ilb->head) { 885 sk_for_each(sk, &ilb->head) {
877 struct inet_sock *inet = inet_sk(sk); 886 struct inet_sock *inet = inet_sk(sk);
878 887
@@ -892,26 +901,18 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
892 r->id.idiag_sport) 901 r->id.idiag_sport)
893 goto next_listen; 902 goto next_listen;
894 903
895 if (r->id.idiag_dport ||
896 cb->args[3] > 0)
897 goto next_listen;
898
899 if (inet_csk_diag_dump(sk, skb, cb, r, 904 if (inet_csk_diag_dump(sk, skb, cb, r,
900 bc, net_admin) < 0) { 905 bc, net_admin) < 0) {
901 spin_unlock_bh(&ilb->lock); 906 spin_unlock(&ilb->lock);
902 goto done; 907 goto done;
903 } 908 }
904 909
905next_listen: 910next_listen:
906 cb->args[3] = 0;
907 cb->args[4] = 0;
908 ++num; 911 ++num;
909 } 912 }
910 spin_unlock_bh(&ilb->lock); 913 spin_unlock(&ilb->lock);
911 914
912 s_num = 0; 915 s_num = 0;
913 cb->args[3] = 0;
914 cb->args[4] = 0;
915 } 916 }
916skip_listen_ht: 917skip_listen_ht:
917 cb->args[0] = 1; 918 cb->args[0] = 1;
@@ -921,13 +922,14 @@ skip_listen_ht:
921 if (!(idiag_states & ~TCPF_LISTEN)) 922 if (!(idiag_states & ~TCPF_LISTEN))
922 goto out; 923 goto out;
923 924
925#define SKARR_SZ 16
924 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 926 for (i = s_i; i <= hashinfo->ehash_mask; i++) {
925 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 927 struct inet_ehash_bucket *head = &hashinfo->ehash[i];
926 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 928 spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
927 struct hlist_nulls_node *node; 929 struct hlist_nulls_node *node;
928 struct sock *sk; 930 struct sock *sk_arr[SKARR_SZ];
929 931 int num_arr[SKARR_SZ];
930 num = 0; 932 int idx, accum, res;
931 933
932 if (hlist_nulls_empty(&head->chain)) 934 if (hlist_nulls_empty(&head->chain))
933 continue; 935 continue;
@@ -935,9 +937,12 @@ skip_listen_ht:
935 if (i > s_i) 937 if (i > s_i)
936 s_num = 0; 938 s_num = 0;
937 939
940next_chunk:
941 num = 0;
942 accum = 0;
938 spin_lock_bh(lock); 943 spin_lock_bh(lock);
939 sk_nulls_for_each(sk, node, &head->chain) { 944 sk_nulls_for_each(sk, node, &head->chain) {
940 int state, res; 945 int state;
941 946
942 if (!net_eq(sock_net(sk), net)) 947 if (!net_eq(sock_net(sk), net))
943 continue; 948 continue;
@@ -961,21 +966,35 @@ skip_listen_ht:
961 if (!inet_diag_bc_sk(bc, sk)) 966 if (!inet_diag_bc_sk(bc, sk))
962 goto next_normal; 967 goto next_normal;
963 968
964 res = sk_diag_fill(sk, skb, r, 969 sock_hold(sk);
970 num_arr[accum] = num;
971 sk_arr[accum] = sk;
972 if (++accum == SKARR_SZ)
973 break;
974next_normal:
975 ++num;
976 }
977 spin_unlock_bh(lock);
978 res = 0;
979 for (idx = 0; idx < accum; idx++) {
980 if (res >= 0) {
981 res = sk_diag_fill(sk_arr[idx], skb, r,
965 sk_user_ns(NETLINK_CB(cb->skb).sk), 982 sk_user_ns(NETLINK_CB(cb->skb).sk),
966 NETLINK_CB(cb->skb).portid, 983 NETLINK_CB(cb->skb).portid,
967 cb->nlh->nlmsg_seq, NLM_F_MULTI, 984 cb->nlh->nlmsg_seq, NLM_F_MULTI,
968 cb->nlh, net_admin); 985 cb->nlh, net_admin);
969 if (res < 0) { 986 if (res < 0)
970 spin_unlock_bh(lock); 987 num = num_arr[idx];
971 goto done;
972 } 988 }
973next_normal: 989 sock_gen_put(sk_arr[idx]);
974 ++num;
975 } 990 }
976 991 if (res < 0)
977 spin_unlock_bh(lock); 992 break;
978 cond_resched(); 993 cond_resched();
994 if (accum == SKARR_SZ) {
995 s_num = num + 1;
996 goto next_chunk;
997 }
979 } 998 }
980 999
981done: 1000done:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 576f705d8180..c9c1cb635d9a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -17,7 +17,7 @@
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <asm/uaccess.h> 20#include <linux/uaccess.h>
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/netdevice.h> 22#include <linux/netdevice.h>
23#include <linux/in.h> 23#include <linux/in.h>
@@ -113,8 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113static struct rtnl_link_ops ipgre_link_ops __read_mostly; 113static struct rtnl_link_ops ipgre_link_ops __read_mostly;
114static int ipgre_tunnel_init(struct net_device *dev); 114static int ipgre_tunnel_init(struct net_device *dev);
115 115
116static int ipgre_net_id __read_mostly; 116static unsigned int ipgre_net_id __read_mostly;
117static int gre_tap_net_id __read_mostly; 117static unsigned int gre_tap_net_id __read_mostly;
118 118
119static void ipgre_err(struct sk_buff *skb, u32 info, 119static void ipgre_err(struct sk_buff *skb, u32 info,
120 const struct tnl_ptk_info *tpi) 120 const struct tnl_ptk_info *tpi)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4d158ff1def1..93157f2f4758 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -15,7 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <asm/uaccess.h> 18#include <linux/uaccess.h>
19#include <asm/unaligned.h> 19#include <asm/unaligned.h>
20#include <linux/skbuff.h> 20#include <linux/skbuff.h>
21#include <linux/ip.h> 21#include <linux/ip.h>
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 877bdb02e887..fac275c48108 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -42,7 +42,7 @@
42 * Hirokazu Takahashi: sendfile() on UDP works now. 42 * Hirokazu Takahashi: sendfile() on UDP works now.
43 */ 43 */
44 44
45#include <asm/uaccess.h> 45#include <linux/uaccess.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/types.h> 47#include <linux/types.h>
48#include <linux/kernel.h> 48#include <linux/kernel.h>
@@ -74,6 +74,7 @@
74#include <net/checksum.h> 74#include <net/checksum.h>
75#include <net/inetpeer.h> 75#include <net/inetpeer.h>
76#include <net/lwtunnel.h> 76#include <net/lwtunnel.h>
77#include <linux/bpf-cgroup.h>
77#include <linux/igmp.h> 78#include <linux/igmp.h>
78#include <linux/netfilter_ipv4.h> 79#include <linux/netfilter_ipv4.h>
79#include <linux/netfilter_bridge.h> 80#include <linux/netfilter_bridge.h>
@@ -287,6 +288,13 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
287static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 288static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
288{ 289{
289 unsigned int mtu; 290 unsigned int mtu;
291 int ret;
292
293 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
294 if (ret) {
295 kfree_skb(skb);
296 return ret;
297 }
290 298
291#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 299#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
292 /* Policy lookup after SNAT yielded a new policy */ 300 /* Policy lookup after SNAT yielded a new policy */
@@ -305,6 +313,20 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
305 return ip_finish_output2(net, sk, skb); 313 return ip_finish_output2(net, sk, skb);
306} 314}
307 315
316static int ip_mc_finish_output(struct net *net, struct sock *sk,
317 struct sk_buff *skb)
318{
319 int ret;
320
321 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
322 if (ret) {
323 kfree_skb(skb);
324 return ret;
325 }
326
327 return dev_loopback_xmit(net, sk, skb);
328}
329
308int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) 330int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
309{ 331{
310 struct rtable *rt = skb_rtable(skb); 332 struct rtable *rt = skb_rtable(skb);
@@ -342,7 +364,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
342 if (newskb) 364 if (newskb)
343 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, 365 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
344 net, sk, newskb, NULL, newskb->dev, 366 net, sk, newskb, NULL, newskb->dev,
345 dev_loopback_xmit); 367 ip_mc_finish_output);
346 } 368 }
347 369
348 /* Multicasts with ttl 0 must not go beyond the host */ 370 /* Multicasts with ttl 0 must not go beyond the host */
@@ -358,7 +380,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
358 if (newskb) 380 if (newskb)
359 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, 381 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
360 net, sk, newskb, NULL, newskb->dev, 382 net, sk, newskb, NULL, newskb->dev,
361 dev_loopback_xmit); 383 ip_mc_finish_output);
362 } 384 }
363 385
364 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, 386 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
@@ -583,7 +605,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
583 */ 605 */
584 if (skb_has_frag_list(skb)) { 606 if (skb_has_frag_list(skb)) {
585 struct sk_buff *frag, *frag2; 607 struct sk_buff *frag, *frag2;
586 int first_len = skb_pagelen(skb); 608 unsigned int first_len = skb_pagelen(skb);
587 609
588 if (first_len - hlen > mtu || 610 if (first_len - hlen > mtu ||
589 ((first_len - hlen) & 7) || 611 ((first_len - hlen) & 7) ||
@@ -804,11 +826,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
804 struct msghdr *msg = from; 826 struct msghdr *msg = from;
805 827
806 if (skb->ip_summed == CHECKSUM_PARTIAL) { 828 if (skb->ip_summed == CHECKSUM_PARTIAL) {
807 if (copy_from_iter(to, len, &msg->msg_iter) != len) 829 if (!copy_from_iter_full(to, len, &msg->msg_iter))
808 return -EFAULT; 830 return -EFAULT;
809 } else { 831 } else {
810 __wsum csum = 0; 832 __wsum csum = 0;
811 if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len) 833 if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter))
812 return -EFAULT; 834 return -EFAULT;
813 skb->csum = csum_block_add(skb->csum, csum, odd); 835 skb->csum = csum_block_add(skb->csum, csum, odd);
814 } 836 }
@@ -936,7 +958,7 @@ static int __ip_append_data(struct sock *sk,
936 csummode = CHECKSUM_PARTIAL; 958 csummode = CHECKSUM_PARTIAL;
937 959
938 cork->length += length; 960 cork->length += length;
939 if (((length > mtu) || (skb && skb_is_gso(skb))) && 961 if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
940 (sk->sk_protocol == IPPROTO_UDP) && 962 (sk->sk_protocol == IPPROTO_UDP) &&
941 (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && 963 (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
942 (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { 964 (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
@@ -1594,7 +1616,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
1594 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, 1616 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
1595 ip_reply_arg_flowi_flags(arg), 1617 ip_reply_arg_flowi_flags(arg),
1596 daddr, saddr, 1618 daddr, saddr,
1597 tcp_hdr(skb)->source, tcp_hdr(skb)->dest); 1619 tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
1620 arg->uid);
1598 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 1621 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1599 rt = ip_route_output_key(net, &fl4); 1622 rt = ip_route_output_key(net, &fl4);
1600 if (IS_ERR(rt)) 1623 if (IS_ERR(rt))
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b8a2d63d1fb8..57e1405e8282 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -44,7 +44,7 @@
44#include <net/ip_fib.h> 44#include <net/ip_fib.h>
45 45
46#include <linux/errqueue.h> 46#include <linux/errqueue.h>
47#include <asm/uaccess.h> 47#include <linux/uaccess.h>
48 48
49/* 49/*
50 * SOL_IP control messages. 50 * SOL_IP control messages.
@@ -97,6 +97,17 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
97 put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); 97 put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
98} 98}
99 99
100static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
101{
102 int val;
103
104 if (IPCB(skb)->frag_max_size == 0)
105 return;
106
107 val = IPCB(skb)->frag_max_size;
108 put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
109}
110
100static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, 111static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
101 int tlen, int offset) 112 int tlen, int offset)
102{ 113{
@@ -137,7 +148,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
137 const struct iphdr *iph = ip_hdr(skb); 148 const struct iphdr *iph = ip_hdr(skb);
138 __be16 *ports = (__be16 *)skb_transport_header(skb); 149 __be16 *ports = (__be16 *)skb_transport_header(skb);
139 150
140 if (skb_transport_offset(skb) + 4 > skb->len) 151 if (skb_transport_offset(skb) + 4 > (int)skb->len)
141 return; 152 return;
142 153
143 /* All current transport protocols have the port numbers in the 154 /* All current transport protocols have the port numbers in the
@@ -153,10 +164,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
153 put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); 164 put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
154} 165}
155 166
156void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, 167void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
157 int tlen, int offset) 168 struct sk_buff *skb, int tlen, int offset)
158{ 169{
159 struct inet_sock *inet = inet_sk(skb->sk); 170 struct inet_sock *inet = inet_sk(sk);
160 unsigned int flags = inet->cmsg_flags; 171 unsigned int flags = inet->cmsg_flags;
161 172
162 /* Ordered by supposed usage frequency */ 173 /* Ordered by supposed usage frequency */
@@ -218,6 +229,9 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
218 229
219 if (flags & IP_CMSG_CHECKSUM) 230 if (flags & IP_CMSG_CHECKSUM)
220 ip_cmsg_recv_checksum(msg, skb, tlen, offset); 231 ip_cmsg_recv_checksum(msg, skb, tlen, offset);
232
233 if (flags & IP_CMSG_RECVFRAGSIZE)
234 ip_cmsg_recv_fragsize(msg, skb);
221} 235}
222EXPORT_SYMBOL(ip_cmsg_recv_offset); 236EXPORT_SYMBOL(ip_cmsg_recv_offset);
223 237
@@ -614,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
614 case IP_MULTICAST_LOOP: 628 case IP_MULTICAST_LOOP:
615 case IP_RECVORIGDSTADDR: 629 case IP_RECVORIGDSTADDR:
616 case IP_CHECKSUM: 630 case IP_CHECKSUM:
631 case IP_RECVFRAGSIZE:
617 if (optlen >= sizeof(int)) { 632 if (optlen >= sizeof(int)) {
618 if (get_user(val, (int __user *) optval)) 633 if (get_user(val, (int __user *) optval))
619 return -EFAULT; 634 return -EFAULT;
@@ -726,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level,
726 } 741 }
727 } 742 }
728 break; 743 break;
744 case IP_RECVFRAGSIZE:
745 if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
746 goto e_inval;
747 if (val)
748 inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
749 else
750 inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
751 break;
729 case IP_TOS: /* This sets both TOS and Precedence */ 752 case IP_TOS: /* This sets both TOS and Precedence */
730 if (sk->sk_type == SOCK_STREAM) { 753 if (sk->sk_type == SOCK_STREAM) {
731 val &= ~INET_ECN_MASK; 754 val &= ~INET_ECN_MASK;
@@ -1357,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1357 case IP_CHECKSUM: 1380 case IP_CHECKSUM:
1358 val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; 1381 val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
1359 break; 1382 break;
1383 case IP_RECVFRAGSIZE:
1384 val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
1385 break;
1360 case IP_TOS: 1386 case IP_TOS:
1361 val = inet->tos; 1387 val = inet->tos;
1362 break; 1388 break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5719d6ba0824..823abaef006b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -358,6 +358,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
358{ 358{
359 struct ip_tunnel *nt; 359 struct ip_tunnel *nt;
360 struct net_device *dev; 360 struct net_device *dev;
361 int t_hlen;
361 362
362 BUG_ON(!itn->fb_tunnel_dev); 363 BUG_ON(!itn->fb_tunnel_dev);
363 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 364 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
@@ -367,6 +368,9 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
367 dev->mtu = ip_tunnel_bind_dev(dev); 368 dev->mtu = ip_tunnel_bind_dev(dev);
368 369
369 nt = netdev_priv(dev); 370 nt = netdev_priv(dev);
371 t_hlen = nt->hlen + sizeof(struct iphdr);
372 dev->min_mtu = ETH_MIN_MTU;
373 dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
370 ip_tunnel_add(itn, nt); 374 ip_tunnel_add(itn, nt);
371 return nt; 375 return nt;
372} 376}
@@ -929,7 +933,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
929 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 933 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
930 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; 934 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
931 935
932 if (new_mtu < 68) 936 if (new_mtu < ETH_MIN_MTU)
933 return -EINVAL; 937 return -EINVAL;
934 938
935 if (new_mtu > max_mtu) { 939 if (new_mtu > max_mtu) {
@@ -990,7 +994,7 @@ int ip_tunnel_get_iflink(const struct net_device *dev)
990} 994}
991EXPORT_SYMBOL(ip_tunnel_get_iflink); 995EXPORT_SYMBOL(ip_tunnel_get_iflink);
992 996
993int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 997int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
994 struct rtnl_link_ops *ops, char *devname) 998 struct rtnl_link_ops *ops, char *devname)
995{ 999{
996 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 1000 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
@@ -1192,7 +1196,7 @@ void ip_tunnel_uninit(struct net_device *dev)
1192EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1196EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1193 1197
1194/* Do least required initialization, rest of init is done in tunnel_init call */ 1198/* Do least required initialization, rest of init is done in tunnel_init call */
1195void ip_tunnel_setup(struct net_device *dev, int net_id) 1199void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1196{ 1200{
1197 struct ip_tunnel *tunnel = netdev_priv(dev); 1201 struct ip_tunnel *tunnel = netdev_priv(dev);
1198 tunnel->ip_tnl_net_id = net_id; 1202 tunnel->ip_tnl_net_id = net_id;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5d7944f394d9..8b14f1404c8f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -46,7 +46,7 @@
46 46
47static struct rtnl_link_ops vti_link_ops __read_mostly; 47static struct rtnl_link_ops vti_link_ops __read_mostly;
48 48
49static int vti_net_id __read_mostly; 49static unsigned int vti_net_id __read_mostly;
50static int vti_tunnel_init(struct net_device *dev); 50static int vti_tunnel_init(struct net_device *dev);
51 51
52static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, 52static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 071a785c65eb..fd9f34bbd740 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -61,7 +61,7 @@
61#include <net/ipconfig.h> 61#include <net/ipconfig.h>
62#include <net/route.h> 62#include <net/route.h>
63 63
64#include <asm/uaccess.h> 64#include <linux/uaccess.h>
65#include <net/checksum.h> 65#include <net/checksum.h>
66#include <asm/processor.h> 66#include <asm/processor.h>
67 67
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c9392589c415..00d4229b6954 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -96,7 +96,7 @@
96#include <linux/types.h> 96#include <linux/types.h>
97#include <linux/kernel.h> 97#include <linux/kernel.h>
98#include <linux/slab.h> 98#include <linux/slab.h>
99#include <asm/uaccess.h> 99#include <linux/uaccess.h>
100#include <linux/skbuff.h> 100#include <linux/skbuff.h>
101#include <linux/netdevice.h> 101#include <linux/netdevice.h>
102#include <linux/in.h> 102#include <linux/in.h>
@@ -121,7 +121,7 @@ static bool log_ecn_error = true;
121module_param(log_ecn_error, bool, 0644); 121module_param(log_ecn_error, bool, 0644);
122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
123 123
124static int ipip_net_id __read_mostly; 124static unsigned int ipip_net_id __read_mostly;
125 125
126static int ipip_tunnel_init(struct net_device *dev); 126static int ipip_tunnel_init(struct net_device *dev);
127static struct rtnl_link_ops ipip_link_ops __read_mostly; 127static struct rtnl_link_ops ipip_link_ops __read_mostly;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 27089f5ebbb1..efc1e76d4977 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -26,7 +26,7 @@
26 * 26 *
27 */ 27 */
28 28
29#include <asm/uaccess.h> 29#include <linux/uaccess.h>
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/capability.h> 31#include <linux/capability.h>
32#include <linux/errno.h> 32#include <linux/errno.h>
@@ -137,6 +137,9 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
137 .flags = FIB_LOOKUP_NOREF, 137 .flags = FIB_LOOKUP_NOREF,
138 }; 138 };
139 139
140 /* update flow if oif or iif point to device enslaved to l3mdev */
141 l3mdev_update_flow(net, flowi4_to_flowi(flp4));
142
140 err = fib_rules_lookup(net->ipv4.mr_rules_ops, 143 err = fib_rules_lookup(net->ipv4.mr_rules_ops,
141 flowi4_to_flowi(flp4), 0, &arg); 144 flowi4_to_flowi(flp4), 0, &arg);
142 if (err < 0) 145 if (err < 0)
@@ -163,7 +166,9 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
163 return -EINVAL; 166 return -EINVAL;
164 } 167 }
165 168
166 mrt = ipmr_get_table(rule->fr_net, rule->table); 169 arg->table = fib_rule_get_table(rule, arg);
170
171 mrt = ipmr_get_table(rule->fr_net, arg->table);
167 if (!mrt) 172 if (!mrt)
168 return -EAGAIN; 173 return -EAGAIN;
169 res->mrt = mrt; 174 res->mrt = mrt;
@@ -1809,6 +1814,12 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
1809 1814
1810 /* Wrong interface: drop packet and (maybe) send PIM assert. */ 1815 /* Wrong interface: drop packet and (maybe) send PIM assert. */
1811 if (mrt->vif_table[vif].dev != skb->dev) { 1816 if (mrt->vif_table[vif].dev != skb->dev) {
1817 struct net_device *mdev;
1818
1819 mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
1820 if (mdev == skb->dev)
1821 goto forward;
1822
1812 if (rt_is_output_route(skb_rtable(skb))) { 1823 if (rt_is_output_route(skb_rtable(skb))) {
1813 /* It is our own packet, looped back. 1824 /* It is our own packet, looped back.
1814 * Very complicated situation... 1825 * Very complicated situation...
@@ -2053,7 +2064,7 @@ static int pim_rcv(struct sk_buff *skb)
2053 goto drop; 2064 goto drop;
2054 2065
2055 pim = (struct pimreghdr *)skb_transport_header(skb); 2066 pim = (struct pimreghdr *)skb_transport_header(skb);
2056 if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) || 2067 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
2057 (pim->flags & PIM_NULL_REGISTER) || 2068 (pim->flags & PIM_NULL_REGISTER) ||
2058 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 2069 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
2059 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 2070 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d613309e3e5d..c11eb1744ab1 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4
25 25
26 To compile it as a module, choose M here. If unsure, say N. 26 To compile it as a module, choose M here. If unsure, say N.
27 27
28config NF_SOCKET_IPV4
29 tristate "IPv4 socket lookup support"
30 help
31 This option enables the IPv4 socket lookup infrastructure. This is
32 is required by the iptables socket match.
33
28if NF_TABLES 34if NF_TABLES
29 35
30config NF_TABLES_IPV4 36config NF_TABLES_IPV4
@@ -54,6 +60,14 @@ config NFT_DUP_IPV4
54 help 60 help
55 This module enables IPv4 packet duplication support for nf_tables. 61 This module enables IPv4 packet duplication support for nf_tables.
56 62
63config NFT_FIB_IPV4
64 select NFT_FIB
65 tristate "nf_tables fib / ip route lookup support"
66 help
67 This module enables IPv4 FIB lookups, e.g. for reverse path filtering.
68 It also allows query of the FIB for the route type, e.g. local, unicast,
69 multicast or blackhole.
70
57endif # NF_TABLES_IPV4 71endif # NF_TABLES_IPV4
58 72
59config NF_TABLES_ARP 73config NF_TABLES_ARP
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 853328f8fd05..f462fee66ac8 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
14# defrag 14# defrag
15obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o 15obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
16 16
17obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
18
17# logging 19# logging
18obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o 20obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
19obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o 21obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
@@ -34,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
34obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o 36obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
35obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o 37obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
36obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o 38obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
39obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
37obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o 40obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
38obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o 41obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
39obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o 42obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 697538464e6e..a467e1236c43 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -24,7 +24,7 @@
24#include <linux/err.h> 24#include <linux/err.h>
25#include <net/compat.h> 25#include <net/compat.h>
26#include <net/sock.h> 26#include <net/sock.h>
27#include <asm/uaccess.h> 27#include <linux/uaccess.h>
28 28
29#include <linux/netfilter/x_tables.h> 29#include <linux/netfilter/x_tables.h>
30#include <linux/netfilter_arp/arp_tables.h> 30#include <linux/netfilter_arp/arp_tables.h>
@@ -217,11 +217,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
217 */ 217 */
218 e = get_entry(table_base, private->hook_entry[hook]); 218 e = get_entry(table_base, private->hook_entry[hook]);
219 219
220 acpar.net = state->net; 220 acpar.state = state;
221 acpar.in = state->in;
222 acpar.out = state->out;
223 acpar.hooknum = hook;
224 acpar.family = NFPROTO_ARP;
225 acpar.hotdrop = false; 221 acpar.hotdrop = false;
226 222
227 arp = arp_hdr(skb); 223 arp = arp_hdr(skb);
@@ -415,17 +411,15 @@ static inline int check_target(struct arpt_entry *e, const char *name)
415} 411}
416 412
417static inline int 413static inline int
418find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) 414find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
415 struct xt_percpu_counter_alloc_state *alloc_state)
419{ 416{
420 struct xt_entry_target *t; 417 struct xt_entry_target *t;
421 struct xt_target *target; 418 struct xt_target *target;
422 unsigned long pcnt;
423 int ret; 419 int ret;
424 420
425 pcnt = xt_percpu_counter_alloc(); 421 if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
426 if (IS_ERR_VALUE(pcnt))
427 return -ENOMEM; 422 return -ENOMEM;
428 e->counters.pcnt = pcnt;
429 423
430 t = arpt_get_target(e); 424 t = arpt_get_target(e);
431 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, 425 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
@@ -443,7 +437,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
443err: 437err:
444 module_put(t->u.kernel.target->me); 438 module_put(t->u.kernel.target->me);
445out: 439out:
446 xt_percpu_counter_free(e->counters.pcnt); 440 xt_percpu_counter_free(&e->counters);
447 441
448 return ret; 442 return ret;
449} 443}
@@ -523,7 +517,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
523 if (par.target->destroy != NULL) 517 if (par.target->destroy != NULL)
524 par.target->destroy(&par); 518 par.target->destroy(&par);
525 module_put(par.target->me); 519 module_put(par.target->me);
526 xt_percpu_counter_free(e->counters.pcnt); 520 xt_percpu_counter_free(&e->counters);
527} 521}
528 522
529/* Checks and translates the user-supplied table segment (held in 523/* Checks and translates the user-supplied table segment (held in
@@ -532,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
532static int translate_table(struct xt_table_info *newinfo, void *entry0, 526static int translate_table(struct xt_table_info *newinfo, void *entry0,
533 const struct arpt_replace *repl) 527 const struct arpt_replace *repl)
534{ 528{
529 struct xt_percpu_counter_alloc_state alloc_state = { 0 };
535 struct arpt_entry *iter; 530 struct arpt_entry *iter;
536 unsigned int *offsets; 531 unsigned int *offsets;
537 unsigned int i; 532 unsigned int i;
@@ -594,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
594 /* Finally, each sanity check must pass */ 589 /* Finally, each sanity check must pass */
595 i = 0; 590 i = 0;
596 xt_entry_foreach(iter, entry0, newinfo->size) { 591 xt_entry_foreach(iter, entry0, newinfo->size) {
597 ret = find_check_entry(iter, repl->name, repl->size); 592 ret = find_check_entry(iter, repl->name, repl->size,
593 &alloc_state);
598 if (ret != 0) 594 if (ret != 0)
599 break; 595 break;
600 ++i; 596 ++i;
@@ -809,7 +805,7 @@ static int get_info(struct net *net, void __user *user,
809#endif 805#endif
810 t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), 806 t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
811 "arptable_%s", name); 807 "arptable_%s", name);
812 if (!IS_ERR_OR_NULL(t)) { 808 if (t) {
813 struct arpt_getinfo info; 809 struct arpt_getinfo info;
814 const struct xt_table_info *private = t->private; 810 const struct xt_table_info *private = t->private;
815#ifdef CONFIG_COMPAT 811#ifdef CONFIG_COMPAT
@@ -838,7 +834,7 @@ static int get_info(struct net *net, void __user *user,
838 xt_table_unlock(t); 834 xt_table_unlock(t);
839 module_put(t->me); 835 module_put(t->me);
840 } else 836 } else
841 ret = t ? PTR_ERR(t) : -ENOENT; 837 ret = -ENOENT;
842#ifdef CONFIG_COMPAT 838#ifdef CONFIG_COMPAT
843 if (compat) 839 if (compat)
844 xt_compat_unlock(NFPROTO_ARP); 840 xt_compat_unlock(NFPROTO_ARP);
@@ -863,7 +859,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
863 get.name[sizeof(get.name) - 1] = '\0'; 859 get.name[sizeof(get.name) - 1] = '\0';
864 860
865 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 861 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
866 if (!IS_ERR_OR_NULL(t)) { 862 if (t) {
867 const struct xt_table_info *private = t->private; 863 const struct xt_table_info *private = t->private;
868 864
869 if (get.size == private->size) 865 if (get.size == private->size)
@@ -875,7 +871,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
875 module_put(t->me); 871 module_put(t->me);
876 xt_table_unlock(t); 872 xt_table_unlock(t);
877 } else 873 } else
878 ret = t ? PTR_ERR(t) : -ENOENT; 874 ret = -ENOENT;
879 875
880 return ret; 876 return ret;
881} 877}
@@ -902,8 +898,8 @@ static int __do_replace(struct net *net, const char *name,
902 898
903 t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), 899 t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
904 "arptable_%s", name); 900 "arptable_%s", name);
905 if (IS_ERR_OR_NULL(t)) { 901 if (!t) {
906 ret = t ? PTR_ERR(t) : -ENOENT; 902 ret = -ENOENT;
907 goto free_newinfo_counters_untrans; 903 goto free_newinfo_counters_untrans;
908 } 904 }
909 905
@@ -1018,8 +1014,8 @@ static int do_add_counters(struct net *net, const void __user *user,
1018 return PTR_ERR(paddc); 1014 return PTR_ERR(paddc);
1019 1015
1020 t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); 1016 t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
1021 if (IS_ERR_OR_NULL(t)) { 1017 if (!t) {
1022 ret = t ? PTR_ERR(t) : -ENOENT; 1018 ret = -ENOENT;
1023 goto free; 1019 goto free;
1024 } 1020 }
1025 1021
@@ -1408,7 +1404,7 @@ static int compat_get_entries(struct net *net,
1408 1404
1409 xt_compat_lock(NFPROTO_ARP); 1405 xt_compat_lock(NFPROTO_ARP);
1410 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 1406 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
1411 if (!IS_ERR_OR_NULL(t)) { 1407 if (t) {
1412 const struct xt_table_info *private = t->private; 1408 const struct xt_table_info *private = t->private;
1413 struct xt_table_info info; 1409 struct xt_table_info info;
1414 1410
@@ -1423,7 +1419,7 @@ static int compat_get_entries(struct net *net,
1423 module_put(t->me); 1419 module_put(t->me);
1424 xt_table_unlock(t); 1420 xt_table_unlock(t);
1425 } else 1421 } else
1426 ret = t ? PTR_ERR(t) : -ENOENT; 1422 ret = -ENOENT;
1427 1423
1428 xt_compat_unlock(NFPROTO_ARP); 1424 xt_compat_unlock(NFPROTO_ARP);
1429 return ret; 1425 return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7c00ce90adb8..91656a1d8fbd 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -20,7 +20,7 @@
20#include <linux/icmp.h> 20#include <linux/icmp.h>
21#include <net/ip.h> 21#include <net/ip.h>
22#include <net/compat.h> 22#include <net/compat.h>
23#include <asm/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/mutex.h> 24#include <linux/mutex.h>
25#include <linux/proc_fs.h> 25#include <linux/proc_fs.h>
26#include <linux/err.h> 26#include <linux/err.h>
@@ -261,11 +261,7 @@ ipt_do_table(struct sk_buff *skb,
261 acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; 261 acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
262 acpar.thoff = ip_hdrlen(skb); 262 acpar.thoff = ip_hdrlen(skb);
263 acpar.hotdrop = false; 263 acpar.hotdrop = false;
264 acpar.net = state->net; 264 acpar.state = state;
265 acpar.in = state->in;
266 acpar.out = state->out;
267 acpar.family = NFPROTO_IPV4;
268 acpar.hooknum = hook;
269 265
270 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 266 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
271 local_bh_disable(); 267 local_bh_disable();
@@ -535,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
535 531
536static int 532static int
537find_check_entry(struct ipt_entry *e, struct net *net, const char *name, 533find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
538 unsigned int size) 534 unsigned int size,
535 struct xt_percpu_counter_alloc_state *alloc_state)
539{ 536{
540 struct xt_entry_target *t; 537 struct xt_entry_target *t;
541 struct xt_target *target; 538 struct xt_target *target;
@@ -543,12 +540,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
543 unsigned int j; 540 unsigned int j;
544 struct xt_mtchk_param mtpar; 541 struct xt_mtchk_param mtpar;
545 struct xt_entry_match *ematch; 542 struct xt_entry_match *ematch;
546 unsigned long pcnt;
547 543
548 pcnt = xt_percpu_counter_alloc(); 544 if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
549 if (IS_ERR_VALUE(pcnt))
550 return -ENOMEM; 545 return -ENOMEM;
551 e->counters.pcnt = pcnt;
552 546
553 j = 0; 547 j = 0;
554 mtpar.net = net; 548 mtpar.net = net;
@@ -586,7 +580,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
586 cleanup_match(ematch, net); 580 cleanup_match(ematch, net);
587 } 581 }
588 582
589 xt_percpu_counter_free(e->counters.pcnt); 583 xt_percpu_counter_free(&e->counters);
590 584
591 return ret; 585 return ret;
592} 586}
@@ -674,7 +668,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
674 if (par.target->destroy != NULL) 668 if (par.target->destroy != NULL)
675 par.target->destroy(&par); 669 par.target->destroy(&par);
676 module_put(par.target->me); 670 module_put(par.target->me);
677 xt_percpu_counter_free(e->counters.pcnt); 671 xt_percpu_counter_free(&e->counters);
678} 672}
679 673
680/* Checks and translates the user-supplied table segment (held in 674/* Checks and translates the user-supplied table segment (held in
@@ -683,6 +677,7 @@ static int
683translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, 677translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
684 const struct ipt_replace *repl) 678 const struct ipt_replace *repl)
685{ 679{
680 struct xt_percpu_counter_alloc_state alloc_state = { 0 };
686 struct ipt_entry *iter; 681 struct ipt_entry *iter;
687 unsigned int *offsets; 682 unsigned int *offsets;
688 unsigned int i; 683 unsigned int i;
@@ -742,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
742 /* Finally, each sanity check must pass */ 737 /* Finally, each sanity check must pass */
743 i = 0; 738 i = 0;
744 xt_entry_foreach(iter, entry0, newinfo->size) { 739 xt_entry_foreach(iter, entry0, newinfo->size) {
745 ret = find_check_entry(iter, net, repl->name, repl->size); 740 ret = find_check_entry(iter, net, repl->name, repl->size,
741 &alloc_state);
746 if (ret != 0) 742 if (ret != 0)
747 break; 743 break;
748 ++i; 744 ++i;
@@ -977,7 +973,7 @@ static int get_info(struct net *net, void __user *user,
977#endif 973#endif
978 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), 974 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
979 "iptable_%s", name); 975 "iptable_%s", name);
980 if (!IS_ERR_OR_NULL(t)) { 976 if (t) {
981 struct ipt_getinfo info; 977 struct ipt_getinfo info;
982 const struct xt_table_info *private = t->private; 978 const struct xt_table_info *private = t->private;
983#ifdef CONFIG_COMPAT 979#ifdef CONFIG_COMPAT
@@ -1007,7 +1003,7 @@ static int get_info(struct net *net, void __user *user,
1007 xt_table_unlock(t); 1003 xt_table_unlock(t);
1008 module_put(t->me); 1004 module_put(t->me);
1009 } else 1005 } else
1010 ret = t ? PTR_ERR(t) : -ENOENT; 1006 ret = -ENOENT;
1011#ifdef CONFIG_COMPAT 1007#ifdef CONFIG_COMPAT
1012 if (compat) 1008 if (compat)
1013 xt_compat_unlock(AF_INET); 1009 xt_compat_unlock(AF_INET);
@@ -1032,7 +1028,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1032 get.name[sizeof(get.name) - 1] = '\0'; 1028 get.name[sizeof(get.name) - 1] = '\0';
1033 1029
1034 t = xt_find_table_lock(net, AF_INET, get.name); 1030 t = xt_find_table_lock(net, AF_INET, get.name);
1035 if (!IS_ERR_OR_NULL(t)) { 1031 if (t) {
1036 const struct xt_table_info *private = t->private; 1032 const struct xt_table_info *private = t->private;
1037 if (get.size == private->size) 1033 if (get.size == private->size)
1038 ret = copy_entries_to_user(private->size, 1034 ret = copy_entries_to_user(private->size,
@@ -1043,7 +1039,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1043 module_put(t->me); 1039 module_put(t->me);
1044 xt_table_unlock(t); 1040 xt_table_unlock(t);
1045 } else 1041 } else
1046 ret = t ? PTR_ERR(t) : -ENOENT; 1042 ret = -ENOENT;
1047 1043
1048 return ret; 1044 return ret;
1049} 1045}
@@ -1068,8 +1064,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1068 1064
1069 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), 1065 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1070 "iptable_%s", name); 1066 "iptable_%s", name);
1071 if (IS_ERR_OR_NULL(t)) { 1067 if (!t) {
1072 ret = t ? PTR_ERR(t) : -ENOENT; 1068 ret = -ENOENT;
1073 goto free_newinfo_counters_untrans; 1069 goto free_newinfo_counters_untrans;
1074 } 1070 }
1075 1071
@@ -1184,8 +1180,8 @@ do_add_counters(struct net *net, const void __user *user,
1184 return PTR_ERR(paddc); 1180 return PTR_ERR(paddc);
1185 1181
1186 t = xt_find_table_lock(net, AF_INET, tmp.name); 1182 t = xt_find_table_lock(net, AF_INET, tmp.name);
1187 if (IS_ERR_OR_NULL(t)) { 1183 if (!t) {
1188 ret = t ? PTR_ERR(t) : -ENOENT; 1184 ret = -ENOENT;
1189 goto free; 1185 goto free;
1190 } 1186 }
1191 1187
@@ -1630,7 +1626,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1630 1626
1631 xt_compat_lock(AF_INET); 1627 xt_compat_lock(AF_INET);
1632 t = xt_find_table_lock(net, AF_INET, get.name); 1628 t = xt_find_table_lock(net, AF_INET, get.name);
1633 if (!IS_ERR_OR_NULL(t)) { 1629 if (t) {
1634 const struct xt_table_info *private = t->private; 1630 const struct xt_table_info *private = t->private;
1635 struct xt_table_info info; 1631 struct xt_table_info info;
1636 ret = compat_table_info(private, &info); 1632 ret = compat_table_info(private, &info);
@@ -1644,7 +1640,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1644 module_put(t->me); 1640 module_put(t->me);
1645 xt_table_unlock(t); 1641 xt_table_unlock(t);
1646 } else 1642 } else
1647 ret = t ? PTR_ERR(t) : -ENOENT; 1643 ret = -ENOENT;
1648 1644
1649 xt_compat_unlock(AF_INET); 1645 xt_compat_unlock(AF_INET);
1650 return ret; 1646 return ret;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4a9e6db9df8d..21db00d0362b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -62,7 +62,7 @@ struct clusterip_config {
62static const struct file_operations clusterip_proc_fops; 62static const struct file_operations clusterip_proc_fops;
63#endif 63#endif
64 64
65static int clusterip_net_id __read_mostly; 65static unsigned int clusterip_net_id __read_mostly;
66 66
67struct clusterip_net { 67struct clusterip_net {
68 struct list_head configs; 68 struct list_head configs;
@@ -419,7 +419,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
419 } 419 }
420 cipinfo->config = config; 420 cipinfo->config = config;
421 421
422 ret = nf_ct_l3proto_try_module_get(par->family); 422 ret = nf_ct_netns_get(par->net, par->family);
423 if (ret < 0) 423 if (ret < 0)
424 pr_info("cannot load conntrack support for proto=%u\n", 424 pr_info("cannot load conntrack support for proto=%u\n",
425 par->family); 425 par->family);
@@ -444,7 +444,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
444 444
445 clusterip_config_put(cipinfo->config); 445 clusterip_config_put(cipinfo->config);
446 446
447 nf_ct_l3proto_module_put(par->family); 447 nf_ct_netns_get(par->net, par->family);
448} 448}
449 449
450#ifdef CONFIG_COMPAT 450#ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index da7f02a0b868..a03e4e7ef5f9 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -41,7 +41,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
41 pr_debug("bad rangesize %u\n", mr->rangesize); 41 pr_debug("bad rangesize %u\n", mr->rangesize);
42 return -EINVAL; 42 return -EINVAL;
43 } 43 }
44 return 0; 44 return nf_ct_netns_get(par->net, par->family);
45} 45}
46 46
47static unsigned int 47static unsigned int
@@ -55,7 +55,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
55 range.min_proto = mr->range[0].min; 55 range.min_proto = mr->range[0].min;
56 range.max_proto = mr->range[0].max; 56 range.max_proto = mr->range[0].max;
57 57
58 return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out); 58 return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
59 xt_out(par));
60}
61
62static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
63{
64 nf_ct_netns_put(par->net, par->family);
59} 65}
60 66
61static struct xt_target masquerade_tg_reg __read_mostly = { 67static struct xt_target masquerade_tg_reg __read_mostly = {
@@ -66,6 +72,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
66 .table = "nat", 72 .table = "nat",
67 .hooks = 1 << NF_INET_POST_ROUTING, 73 .hooks = 1 << NF_INET_POST_ROUTING,
68 .checkentry = masquerade_tg_check, 74 .checkentry = masquerade_tg_check,
75 .destroy = masquerade_tg_destroy,
69 .me = THIS_MODULE, 76 .me = THIS_MODULE,
70}; 77};
71 78
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 1d16c0f28df0..8bd0d7b26632 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -34,7 +34,7 @@ static unsigned int
34reject_tg(struct sk_buff *skb, const struct xt_action_param *par) 34reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
35{ 35{
36 const struct ipt_reject_info *reject = par->targinfo; 36 const struct ipt_reject_info *reject = par->targinfo;
37 int hook = par->hooknum; 37 int hook = xt_hooknum(par);
38 38
39 switch (reject->with) { 39 switch (reject->with) {
40 case IPT_ICMP_NET_UNREACHABLE: 40 case IPT_ICMP_NET_UNREACHABLE:
@@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
59 nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); 59 nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
60 break; 60 break;
61 case IPT_TCP_RESET: 61 case IPT_TCP_RESET:
62 nf_send_reset(par->net, skb, hook); 62 nf_send_reset(xt_net(par), skb, hook);
63 case IPT_ICMP_ECHOREPLY: 63 case IPT_ICMP_ECHOREPLY:
64 /* Doesn't happen. */ 64 /* Doesn't happen. */
65 break; 65 break;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index db5b87509446..30c0de53e254 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -263,12 +263,12 @@ static unsigned int
263synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) 263synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
264{ 264{
265 const struct xt_synproxy_info *info = par->targinfo; 265 const struct xt_synproxy_info *info = par->targinfo;
266 struct net *net = par->net; 266 struct net *net = xt_net(par);
267 struct synproxy_net *snet = synproxy_pernet(net); 267 struct synproxy_net *snet = synproxy_pernet(net);
268 struct synproxy_options opts = {}; 268 struct synproxy_options opts = {};
269 struct tcphdr *th, _th; 269 struct tcphdr *th, _th;
270 270
271 if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) 271 if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP))
272 return NF_DROP; 272 return NF_DROP;
273 273
274 th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); 274 th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
@@ -418,12 +418,12 @@ static int synproxy_tg4_check(const struct xt_tgchk_param *par)
418 e->ip.invflags & XT_INV_PROTO) 418 e->ip.invflags & XT_INV_PROTO)
419 return -EINVAL; 419 return -EINVAL;
420 420
421 return nf_ct_l3proto_try_module_get(par->family); 421 return nf_ct_netns_get(par->net, par->family);
422} 422}
423 423
424static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) 424static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
425{ 425{
426 nf_ct_l3proto_module_put(par->family); 426 nf_ct_netns_put(par->net, par->family);
427} 427}
428 428
429static struct xt_target synproxy_tg4_reg __read_mostly = { 429static struct xt_target synproxy_tg4_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 78cc64eddfc1..f273098e48fd 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -83,10 +83,12 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
83 return true ^ invert; 83 return true ^ invert;
84 84
85 iph = ip_hdr(skb); 85 iph = ip_hdr(skb);
86 if (ipv4_is_multicast(iph->daddr)) { 86 if (ipv4_is_zeronet(iph->saddr)) {
87 if (ipv4_is_zeronet(iph->saddr)) 87 if (ipv4_is_lbcast(iph->daddr) ||
88 return ipv4_is_local_multicast(iph->daddr) ^ invert; 88 ipv4_is_local_multicast(iph->daddr))
89 return true ^ invert;
89 } 90 }
91
90 flow.flowi4_iif = LOOPBACK_IFINDEX; 92 flow.flowi4_iif = LOOPBACK_IFINDEX;
91 flow.daddr = iph->saddr; 93 flow.daddr = iph->saddr;
92 flow.saddr = rpfilter_get_saddr(iph->daddr); 94 flow.saddr = rpfilter_get_saddr(iph->daddr);
@@ -95,7 +97,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
95 flow.flowi4_tos = RT_TOS(iph->tos); 97 flow.flowi4_tos = RT_TOS(iph->tos);
96 flow.flowi4_scope = RT_SCOPE_UNIVERSE; 98 flow.flowi4_scope = RT_SCOPE_UNIVERSE;
97 99
98 return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert; 100 return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
99} 101}
100 102
101static int rpfilter_check(const struct xt_mtchk_param *par) 103static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 713c09a74b90..fcfd071f4705 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -31,6 +31,13 @@
31#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 31#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
32#include <net/netfilter/nf_log.h> 32#include <net/netfilter/nf_log.h>
33 33
34static int conntrack4_net_id __read_mostly;
35static DEFINE_MUTEX(register_ipv4_hooks);
36
37struct conntrack4_net {
38 unsigned int users;
39};
40
34static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 41static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
35 struct nf_conntrack_tuple *tuple) 42 struct nf_conntrack_tuple *tuple)
36{ 43{
@@ -307,9 +314,42 @@ static struct nf_sockopt_ops so_getorigdst = {
307 .owner = THIS_MODULE, 314 .owner = THIS_MODULE,
308}; 315};
309 316
310static int ipv4_init_net(struct net *net) 317static int ipv4_hooks_register(struct net *net)
311{ 318{
312 return 0; 319 struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
320 int err = 0;
321
322 mutex_lock(&register_ipv4_hooks);
323
324 cnet->users++;
325 if (cnet->users > 1)
326 goto out_unlock;
327
328 err = nf_defrag_ipv4_enable(net);
329 if (err) {
330 cnet->users = 0;
331 goto out_unlock;
332 }
333
334 err = nf_register_net_hooks(net, ipv4_conntrack_ops,
335 ARRAY_SIZE(ipv4_conntrack_ops));
336
337 if (err)
338 cnet->users = 0;
339 out_unlock:
340 mutex_unlock(&register_ipv4_hooks);
341 return err;
342}
343
344static void ipv4_hooks_unregister(struct net *net)
345{
346 struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
347
348 mutex_lock(&register_ipv4_hooks);
349 if (cnet->users && (--cnet->users == 0))
350 nf_unregister_net_hooks(net, ipv4_conntrack_ops,
351 ARRAY_SIZE(ipv4_conntrack_ops));
352 mutex_unlock(&register_ipv4_hooks);
313} 353}
314 354
315struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { 355struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
@@ -325,7 +365,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
325 .nlattr_to_tuple = ipv4_nlattr_to_tuple, 365 .nlattr_to_tuple = ipv4_nlattr_to_tuple,
326 .nla_policy = ipv4_nla_policy, 366 .nla_policy = ipv4_nla_policy,
327#endif 367#endif
328 .init_net = ipv4_init_net, 368 .net_ns_get = ipv4_hooks_register,
369 .net_ns_put = ipv4_hooks_unregister,
329 .me = THIS_MODULE, 370 .me = THIS_MODULE,
330}; 371};
331 372
@@ -336,52 +377,50 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
336MODULE_ALIAS("ip_conntrack"); 377MODULE_ALIAS("ip_conntrack");
337MODULE_LICENSE("GPL"); 378MODULE_LICENSE("GPL");
338 379
380static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
381 &nf_conntrack_l4proto_tcp4,
382 &nf_conntrack_l4proto_udp4,
383 &nf_conntrack_l4proto_icmp,
384#ifdef CONFIG_NF_CT_PROTO_DCCP
385 &nf_conntrack_l4proto_dccp4,
386#endif
387#ifdef CONFIG_NF_CT_PROTO_SCTP
388 &nf_conntrack_l4proto_sctp4,
389#endif
390#ifdef CONFIG_NF_CT_PROTO_UDPLITE
391 &nf_conntrack_l4proto_udplite4,
392#endif
393};
394
339static int ipv4_net_init(struct net *net) 395static int ipv4_net_init(struct net *net)
340{ 396{
341 int ret = 0; 397 int ret = 0;
342 398
343 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4); 399 ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
344 if (ret < 0) { 400 ARRAY_SIZE(builtin_l4proto4));
345 pr_err("nf_conntrack_tcp4: pernet registration failed\n"); 401 if (ret < 0)
346 goto out_tcp; 402 return ret;
347 }
348 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
349 if (ret < 0) {
350 pr_err("nf_conntrack_udp4: pernet registration failed\n");
351 goto out_udp;
352 }
353 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
354 if (ret < 0) {
355 pr_err("nf_conntrack_icmp4: pernet registration failed\n");
356 goto out_icmp;
357 }
358 ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); 403 ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
359 if (ret < 0) { 404 if (ret < 0) {
360 pr_err("nf_conntrack_ipv4: pernet registration failed\n"); 405 pr_err("nf_conntrack_ipv4: pernet registration failed\n");
361 goto out_ipv4; 406 nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
407 ARRAY_SIZE(builtin_l4proto4));
362 } 408 }
363 return 0;
364out_ipv4:
365 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
366out_icmp:
367 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
368out_udp:
369 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
370out_tcp:
371 return ret; 409 return ret;
372} 410}
373 411
374static void ipv4_net_exit(struct net *net) 412static void ipv4_net_exit(struct net *net)
375{ 413{
376 nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); 414 nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
377 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); 415 nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
378 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); 416 ARRAY_SIZE(builtin_l4proto4));
379 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
380} 417}
381 418
382static struct pernet_operations ipv4_net_ops = { 419static struct pernet_operations ipv4_net_ops = {
383 .init = ipv4_net_init, 420 .init = ipv4_net_init,
384 .exit = ipv4_net_exit, 421 .exit = ipv4_net_exit,
422 .id = &conntrack4_net_id,
423 .size = sizeof(struct conntrack4_net),
385}; 424};
386 425
387static int __init nf_conntrack_l3proto_ipv4_init(void) 426static int __init nf_conntrack_l3proto_ipv4_init(void)
@@ -389,7 +428,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
389 int ret = 0; 428 int ret = 0;
390 429
391 need_conntrack(); 430 need_conntrack();
392 nf_defrag_ipv4_enable();
393 431
394 ret = nf_register_sockopt(&so_getorigdst); 432 ret = nf_register_sockopt(&so_getorigdst);
395 if (ret < 0) { 433 if (ret < 0) {
@@ -403,46 +441,21 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
403 goto cleanup_sockopt; 441 goto cleanup_sockopt;
404 } 442 }
405 443
406 ret = nf_register_hooks(ipv4_conntrack_ops, 444 ret = nf_ct_l4proto_register(builtin_l4proto4,
407 ARRAY_SIZE(ipv4_conntrack_ops)); 445 ARRAY_SIZE(builtin_l4proto4));
408 if (ret < 0) { 446 if (ret < 0)
409 pr_err("nf_conntrack_ipv4: can't register hooks.\n");
410 goto cleanup_pernet; 447 goto cleanup_pernet;
411 }
412
413 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
414 if (ret < 0) {
415 pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
416 goto cleanup_hooks;
417 }
418
419 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
420 if (ret < 0) {
421 pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
422 goto cleanup_tcp4;
423 }
424
425 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
426 if (ret < 0) {
427 pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
428 goto cleanup_udp4;
429 }
430 448
431 ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); 449 ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
432 if (ret < 0) { 450 if (ret < 0) {
433 pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); 451 pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
434 goto cleanup_icmpv4; 452 goto cleanup_l4proto;
435 } 453 }
436 454
437 return ret; 455 return ret;
438 cleanup_icmpv4: 456cleanup_l4proto:
439 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); 457 nf_ct_l4proto_unregister(builtin_l4proto4,
440 cleanup_udp4: 458 ARRAY_SIZE(builtin_l4proto4));
441 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
442 cleanup_tcp4:
443 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
444 cleanup_hooks:
445 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
446 cleanup_pernet: 459 cleanup_pernet:
447 unregister_pernet_subsys(&ipv4_net_ops); 460 unregister_pernet_subsys(&ipv4_net_ops);
448 cleanup_sockopt: 461 cleanup_sockopt:
@@ -454,10 +467,8 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
454{ 467{
455 synchronize_net(); 468 synchronize_net();
456 nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 469 nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
457 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); 470 nf_ct_l4proto_unregister(builtin_l4proto4,
458 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); 471 ARRAY_SIZE(builtin_l4proto4));
459 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
460 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
461 unregister_pernet_subsys(&ipv4_net_ops); 472 unregister_pernet_subsys(&ipv4_net_ops);
462 nf_unregister_sockopt(&so_getorigdst); 473 nf_unregister_sockopt(&so_getorigdst);
463} 474}
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index d88da36b383c..49bd6a54404f 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -11,6 +11,7 @@
11#include <linux/netfilter.h> 11#include <linux/netfilter.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <net/netns/generic.h>
14#include <net/route.h> 15#include <net/route.h>
15#include <net/ip.h> 16#include <net/ip.h>
16 17
@@ -22,6 +23,8 @@
22#endif 23#endif
23#include <net/netfilter/nf_conntrack_zones.h> 24#include <net/netfilter/nf_conntrack_zones.h>
24 25
26static DEFINE_MUTEX(defrag4_mutex);
27
25static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, 28static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
26 u_int32_t user) 29 u_int32_t user)
27{ 30{
@@ -102,18 +105,50 @@ static struct nf_hook_ops ipv4_defrag_ops[] = {
102 }, 105 },
103}; 106};
104 107
108static void __net_exit defrag4_net_exit(struct net *net)
109{
110 if (net->nf.defrag_ipv4) {
111 nf_unregister_net_hooks(net, ipv4_defrag_ops,
112 ARRAY_SIZE(ipv4_defrag_ops));
113 net->nf.defrag_ipv4 = false;
114 }
115}
116
117static struct pernet_operations defrag4_net_ops = {
118 .exit = defrag4_net_exit,
119};
120
105static int __init nf_defrag_init(void) 121static int __init nf_defrag_init(void)
106{ 122{
107 return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); 123 return register_pernet_subsys(&defrag4_net_ops);
108} 124}
109 125
110static void __exit nf_defrag_fini(void) 126static void __exit nf_defrag_fini(void)
111{ 127{
112 nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); 128 unregister_pernet_subsys(&defrag4_net_ops);
113} 129}
114 130
115void nf_defrag_ipv4_enable(void) 131int nf_defrag_ipv4_enable(struct net *net)
116{ 132{
133 int err = 0;
134
135 might_sleep();
136
137 if (net->nf.defrag_ipv4)
138 return 0;
139
140 mutex_lock(&defrag4_mutex);
141 if (net->nf.defrag_ipv4)
142 goto out_unlock;
143
144 err = nf_register_net_hooks(net, ipv4_defrag_ops,
145 ARRAY_SIZE(ipv4_defrag_ops));
146 if (err == 0)
147 net->nf.defrag_ipv4 = true;
148
149 out_unlock:
150 mutex_unlock(&defrag4_mutex);
151 return err;
117} 152}
118EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); 153EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
119 154
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
new file mode 100644
index 000000000000..a83d558e1aae
--- /dev/null
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -0,0 +1,163 @@
1/*
2 * Copyright (C) 2007-2008 BalaBit IT Ltd.
3 * Author: Krisztian Kovacs
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 */
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <net/tcp.h>
14#include <net/udp.h>
15#include <net/icmp.h>
16#include <net/sock.h>
17#include <net/inet_sock.h>
18#include <net/netfilter/nf_socket.h>
19#if IS_ENABLED(CONFIG_NF_CONNTRACK)
20#include <net/netfilter/nf_conntrack.h>
21#endif
22
23static int
24extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol,
25 __be32 *raddr, __be32 *laddr,
26 __be16 *rport, __be16 *lport)
27{
28 unsigned int outside_hdrlen = ip_hdrlen(skb);
29 struct iphdr *inside_iph, _inside_iph;
30 struct icmphdr *icmph, _icmph;
31 __be16 *ports, _ports[2];
32
33 icmph = skb_header_pointer(skb, outside_hdrlen,
34 sizeof(_icmph), &_icmph);
35 if (icmph == NULL)
36 return 1;
37
38 switch (icmph->type) {
39 case ICMP_DEST_UNREACH:
40 case ICMP_SOURCE_QUENCH:
41 case ICMP_REDIRECT:
42 case ICMP_TIME_EXCEEDED:
43 case ICMP_PARAMETERPROB:
44 break;
45 default:
46 return 1;
47 }
48
49 inside_iph = skb_header_pointer(skb, outside_hdrlen +
50 sizeof(struct icmphdr),
51 sizeof(_inside_iph), &_inside_iph);
52 if (inside_iph == NULL)
53 return 1;
54
55 if (inside_iph->protocol != IPPROTO_TCP &&
56 inside_iph->protocol != IPPROTO_UDP)
57 return 1;
58
59 ports = skb_header_pointer(skb, outside_hdrlen +
60 sizeof(struct icmphdr) +
61 (inside_iph->ihl << 2),
62 sizeof(_ports), &_ports);
63 if (ports == NULL)
64 return 1;
65
66 /* the inside IP packet is the one quoted from our side, thus
67 * its saddr is the local address */
68 *protocol = inside_iph->protocol;
69 *laddr = inside_iph->saddr;
70 *lport = ports[0];
71 *raddr = inside_iph->daddr;
72 *rport = ports[1];
73
74 return 0;
75}
76
77static struct sock *
78nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
79 const u8 protocol,
80 const __be32 saddr, const __be32 daddr,
81 const __be16 sport, const __be16 dport,
82 const struct net_device *in)
83{
84 switch (protocol) {
85 case IPPROTO_TCP:
86 return inet_lookup(net, &tcp_hashinfo, skb, doff,
87 saddr, sport, daddr, dport,
88 in->ifindex);
89 case IPPROTO_UDP:
90 return udp4_lib_lookup(net, saddr, sport, daddr, dport,
91 in->ifindex);
92 }
93 return NULL;
94}
95
96struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
97 const struct net_device *indev)
98{
99 __be32 uninitialized_var(daddr), uninitialized_var(saddr);
100 __be16 uninitialized_var(dport), uninitialized_var(sport);
101 const struct iphdr *iph = ip_hdr(skb);
102 struct sk_buff *data_skb = NULL;
103 u8 uninitialized_var(protocol);
104#if IS_ENABLED(CONFIG_NF_CONNTRACK)
105 enum ip_conntrack_info ctinfo;
106 struct nf_conn const *ct;
107#endif
108 int doff = 0;
109
110 if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
111 struct udphdr _hdr, *hp;
112
113 hp = skb_header_pointer(skb, ip_hdrlen(skb),
114 sizeof(_hdr), &_hdr);
115 if (hp == NULL)
116 return NULL;
117
118 protocol = iph->protocol;
119 saddr = iph->saddr;
120 sport = hp->source;
121 daddr = iph->daddr;
122 dport = hp->dest;
123 data_skb = (struct sk_buff *)skb;
124 doff = iph->protocol == IPPROTO_TCP ?
125 ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
126 ip_hdrlen(skb) + sizeof(*hp);
127
128 } else if (iph->protocol == IPPROTO_ICMP) {
129 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
130 &sport, &dport))
131 return NULL;
132 } else {
133 return NULL;
134 }
135
136#if IS_ENABLED(CONFIG_NF_CONNTRACK)
137 /* Do the lookup with the original socket address in
138 * case this is a reply packet of an established
139 * SNAT-ted connection.
140 */
141 ct = nf_ct_get(skb, &ctinfo);
142 if (ct && !nf_ct_is_untracked(ct) &&
143 ((iph->protocol != IPPROTO_ICMP &&
144 ctinfo == IP_CT_ESTABLISHED_REPLY) ||
145 (iph->protocol == IPPROTO_ICMP &&
146 ctinfo == IP_CT_RELATED_REPLY)) &&
147 (ct->status & IPS_SRC_NAT_DONE)) {
148
149 daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
150 dport = (iph->protocol == IPPROTO_TCP) ?
151 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
152 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
153 }
154#endif
155
156 return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
157 daddr, sport, dport, indev);
158}
159EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4);
160
161MODULE_LICENSE("GPL");
162MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
163MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure");
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index 0c01a270bf9f..0af3d8df70dd 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
30 }; 30 };
31 int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; 31 int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
32 32
33 nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); 33 nf_dup_ipv4(nft_net(pkt), pkt->skb, nft_hook(pkt), &gw, oif);
34} 34}
35 35
36static int nft_dup_ipv4_init(const struct nft_ctx *ctx, 36static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
new file mode 100644
index 000000000000..965b1a161369
--- /dev/null
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -0,0 +1,241 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 */
6
7#include <linux/kernel.h>
8#include <linux/init.h>
9#include <linux/module.h>
10#include <linux/netlink.h>
11#include <linux/netfilter.h>
12#include <linux/netfilter/nf_tables.h>
13#include <net/netfilter/nf_tables_core.h>
14#include <net/netfilter/nf_tables.h>
15#include <net/netfilter/nft_fib.h>
16
17#include <net/ip_fib.h>
18#include <net/route.h>
19
20/* don't try to find route from mcast/bcast/zeronet */
21static __be32 get_saddr(__be32 addr)
22{
23 if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
24 ipv4_is_zeronet(addr))
25 return 0;
26 return addr;
27}
28
29static bool fib4_is_local(const struct sk_buff *skb)
30{
31 const struct rtable *rt = skb_rtable(skb);
32
33 return rt && (rt->rt_flags & RTCF_LOCAL);
34}
35
36#define DSCP_BITS 0xfc
37
38void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
39 const struct nft_pktinfo *pkt)
40{
41 const struct nft_fib *priv = nft_expr_priv(expr);
42 u32 *dst = &regs->data[priv->dreg];
43 const struct net_device *dev = NULL;
44 const struct iphdr *iph;
45 __be32 addr;
46
47 if (priv->flags & NFTA_FIB_F_IIF)
48 dev = nft_in(pkt);
49 else if (priv->flags & NFTA_FIB_F_OIF)
50 dev = nft_out(pkt);
51
52 iph = ip_hdr(pkt->skb);
53 if (priv->flags & NFTA_FIB_F_DADDR)
54 addr = iph->daddr;
55 else
56 addr = iph->saddr;
57
58 *dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
59}
60EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
61
62static int get_ifindex(const struct net_device *dev)
63{
64 return dev ? dev->ifindex : 0;
65}
66
67void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
68 const struct nft_pktinfo *pkt)
69{
70 const struct nft_fib *priv = nft_expr_priv(expr);
71 u32 *dest = &regs->data[priv->dreg];
72 const struct iphdr *iph;
73 struct fib_result res;
74 struct flowi4 fl4 = {
75 .flowi4_scope = RT_SCOPE_UNIVERSE,
76 .flowi4_iif = LOOPBACK_IFINDEX,
77 };
78 const struct net_device *oif;
79 struct net_device *found;
80#ifdef CONFIG_IP_ROUTE_MULTIPATH
81 int i;
82#endif
83
84 /*
85 * Do not set flowi4_oif, it restricts results (for example, asking
86 * for oif 3 will get RTN_UNICAST result even if the daddr exits
87 * on another interface.
88 *
89 * Search results for the desired outinterface instead.
90 */
91 if (priv->flags & NFTA_FIB_F_OIF)
92 oif = nft_out(pkt);
93 else if (priv->flags & NFTA_FIB_F_IIF)
94 oif = nft_in(pkt);
95 else
96 oif = NULL;
97
98 if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) {
99 nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
100 return;
101 }
102
103 iph = ip_hdr(pkt->skb);
104 if (ipv4_is_zeronet(iph->saddr)) {
105 if (ipv4_is_lbcast(iph->daddr) ||
106 ipv4_is_local_multicast(iph->daddr)) {
107 nft_fib_store_result(dest, priv->result, pkt,
108 get_ifindex(pkt->skb->dev));
109 return;
110 }
111 }
112
113 if (priv->flags & NFTA_FIB_F_MARK)
114 fl4.flowi4_mark = pkt->skb->mark;
115
116 fl4.flowi4_tos = iph->tos & DSCP_BITS;
117
118 if (priv->flags & NFTA_FIB_F_DADDR) {
119 fl4.daddr = iph->daddr;
120 fl4.saddr = get_saddr(iph->saddr);
121 } else {
122 fl4.daddr = iph->saddr;
123 fl4.saddr = get_saddr(iph->daddr);
124 }
125
126 *dest = 0;
127
128 if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
129 return;
130
131 switch (res.type) {
132 case RTN_UNICAST:
133 break;
134 case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */
135 return;
136 default:
137 break;
138 }
139
140 if (!oif) {
141 found = FIB_RES_DEV(res);
142 goto ok;
143 }
144
145#ifdef CONFIG_IP_ROUTE_MULTIPATH
146 for (i = 0; i < res.fi->fib_nhs; i++) {
147 struct fib_nh *nh = &res.fi->fib_nh[i];
148
149 if (nh->nh_dev == oif) {
150 found = nh->nh_dev;
151 goto ok;
152 }
153 }
154 return;
155#else
156 found = FIB_RES_DEV(res);
157 if (found != oif)
158 return;
159#endif
160ok:
161 switch (priv->result) {
162 case NFT_FIB_RESULT_OIF:
163 *dest = found->ifindex;
164 break;
165 case NFT_FIB_RESULT_OIFNAME:
166 strncpy((char *)dest, found->name, IFNAMSIZ);
167 break;
168 default:
169 WARN_ON_ONCE(1);
170 break;
171 }
172}
173EXPORT_SYMBOL_GPL(nft_fib4_eval);
174
175static struct nft_expr_type nft_fib4_type;
176
177static const struct nft_expr_ops nft_fib4_type_ops = {
178 .type = &nft_fib4_type,
179 .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
180 .eval = nft_fib4_eval_type,
181 .init = nft_fib_init,
182 .dump = nft_fib_dump,
183 .validate = nft_fib_validate,
184};
185
186static const struct nft_expr_ops nft_fib4_ops = {
187 .type = &nft_fib4_type,
188 .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
189 .eval = nft_fib4_eval,
190 .init = nft_fib_init,
191 .dump = nft_fib_dump,
192 .validate = nft_fib_validate,
193};
194
195static const struct nft_expr_ops *
196nft_fib4_select_ops(const struct nft_ctx *ctx,
197 const struct nlattr * const tb[])
198{
199 enum nft_fib_result result;
200
201 if (!tb[NFTA_FIB_RESULT])
202 return ERR_PTR(-EINVAL);
203
204 result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
205
206 switch (result) {
207 case NFT_FIB_RESULT_OIF:
208 return &nft_fib4_ops;
209 case NFT_FIB_RESULT_OIFNAME:
210 return &nft_fib4_ops;
211 case NFT_FIB_RESULT_ADDRTYPE:
212 return &nft_fib4_type_ops;
213 default:
214 return ERR_PTR(-EOPNOTSUPP);
215 }
216}
217
218static struct nft_expr_type nft_fib4_type __read_mostly = {
219 .name = "fib",
220 .select_ops = &nft_fib4_select_ops,
221 .policy = nft_fib_policy,
222 .maxattr = NFTA_FIB_MAX,
223 .family = NFPROTO_IPV4,
224 .owner = THIS_MODULE,
225};
226
227static int __init nft_fib4_module_init(void)
228{
229 return nft_register_expr(&nft_fib4_type);
230}
231
232static void __exit nft_fib4_module_exit(void)
233{
234 nft_unregister_expr(&nft_fib4_type);
235}
236
237module_init(nft_fib4_module_init);
238module_exit(nft_fib4_module_exit);
239MODULE_LICENSE("GPL");
240MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
241MODULE_ALIAS_NFT_AF_EXPR(2, "fib");
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 51ced81b616c..a0ea8aad1bf1 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> 2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -31,8 +31,14 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
31 range.max_proto.all = 31 range.max_proto.all =
32 *(__be16 *)&regs->data[priv->sreg_proto_max]; 32 *(__be16 *)&regs->data[priv->sreg_proto_max];
33 } 33 }
34 regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook, 34 regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
35 &range, pkt->out); 35 &range, nft_out(pkt));
36}
37
38static void
39nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
40{
41 nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
36} 42}
37 43
38static struct nft_expr_type nft_masq_ipv4_type; 44static struct nft_expr_type nft_masq_ipv4_type;
@@ -41,6 +47,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = {
41 .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), 47 .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
42 .eval = nft_masq_ipv4_eval, 48 .eval = nft_masq_ipv4_eval,
43 .init = nft_masq_init, 49 .init = nft_masq_init,
50 .destroy = nft_masq_ipv4_destroy,
44 .dump = nft_masq_dump, 51 .dump = nft_masq_dump,
45 .validate = nft_masq_validate, 52 .validate = nft_masq_validate,
46}; 53};
@@ -77,5 +84,5 @@ module_init(nft_masq_ipv4_module_init);
77module_exit(nft_masq_ipv4_module_exit); 84module_exit(nft_masq_ipv4_module_exit);
78 85
79MODULE_LICENSE("GPL"); 86MODULE_LICENSE("GPL");
80MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); 87MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org");
81MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq"); 88MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index c09d4381427e..1650ed23c15d 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> 2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -35,8 +35,13 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
35 35
36 mr.range[0].flags |= priv->flags; 36 mr.range[0].flags |= priv->flags;
37 37
38 regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, 38 regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
39 pkt->hook); 39}
40
41static void
42nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
43{
44 nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
40} 45}
41 46
42static struct nft_expr_type nft_redir_ipv4_type; 47static struct nft_expr_type nft_redir_ipv4_type;
@@ -45,6 +50,7 @@ static const struct nft_expr_ops nft_redir_ipv4_ops = {
45 .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), 50 .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
46 .eval = nft_redir_ipv4_eval, 51 .eval = nft_redir_ipv4_eval,
47 .init = nft_redir_init, 52 .init = nft_redir_init,
53 .destroy = nft_redir_ipv4_destroy,
48 .dump = nft_redir_dump, 54 .dump = nft_redir_dump,
49 .validate = nft_redir_validate, 55 .validate = nft_redir_validate,
50}; 56};
@@ -72,5 +78,5 @@ module_init(nft_redir_ipv4_module_init);
72module_exit(nft_redir_ipv4_module_exit); 78module_exit(nft_redir_ipv4_module_exit);
73 79
74MODULE_LICENSE("GPL"); 80MODULE_LICENSE("GPL");
75MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); 81MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
76MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); 82MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index 2c2553b9026c..517ce93699de 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,10 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
27 27
28 switch (priv->type) { 28 switch (priv->type) {
29 case NFT_REJECT_ICMP_UNREACH: 29 case NFT_REJECT_ICMP_UNREACH:
30 nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook); 30 nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt));
31 break; 31 break;
32 case NFT_REJECT_TCP_RST: 32 case NFT_REJECT_TCP_RST:
33 nf_send_reset(pkt->net, pkt->skb, pkt->hook); 33 nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt));
34 break; 34 break;
35 default: 35 default:
36 break; 36 break;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 96b8e2b95731..86cca610f4c2 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -609,15 +609,15 @@ int ping_getfrag(void *from, char *to,
609 fraglen -= sizeof(struct icmphdr); 609 fraglen -= sizeof(struct icmphdr);
610 if (fraglen < 0) 610 if (fraglen < 0)
611 BUG(); 611 BUG();
612 if (csum_and_copy_from_iter(to + sizeof(struct icmphdr), 612 if (!csum_and_copy_from_iter_full(to + sizeof(struct icmphdr),
613 fraglen, &pfh->wcheck, 613 fraglen, &pfh->wcheck,
614 &pfh->msg->msg_iter) != fraglen) 614 &pfh->msg->msg_iter))
615 return -EFAULT; 615 return -EFAULT;
616 } else if (offset < sizeof(struct icmphdr)) { 616 } else if (offset < sizeof(struct icmphdr)) {
617 BUG(); 617 BUG();
618 } else { 618 } else {
619 if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck, 619 if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck,
620 &pfh->msg->msg_iter) != fraglen) 620 &pfh->msg->msg_iter))
621 return -EFAULT; 621 return -EFAULT;
622 } 622 }
623 623
@@ -793,7 +793,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
793 793
794 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 794 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
795 RT_SCOPE_UNIVERSE, sk->sk_protocol, 795 RT_SCOPE_UNIVERSE, sk->sk_protocol,
796 inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); 796 inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
797 sk->sk_uid);
797 798
798 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 799 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
799 rt = ip_route_output_flow(net, &fl4, sk); 800 rt = ip_route_output_flow(net, &fl4, sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ecbe5a7c2d6d..4e49e5cb001c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -41,7 +41,7 @@
41#include <linux/atomic.h> 41#include <linux/atomic.h>
42#include <asm/byteorder.h> 42#include <asm/byteorder.h>
43#include <asm/current.h> 43#include <asm/current.h>
44#include <asm/uaccess.h> 44#include <linux/uaccess.h>
45#include <asm/ioctls.h> 45#include <asm/ioctls.h>
46#include <linux/stddef.h> 46#include <linux/stddef.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
@@ -89,9 +89,10 @@ struct raw_frag_vec {
89 int hlen; 89 int hlen;
90}; 90};
91 91
92static struct raw_hashinfo raw_v4_hashinfo = { 92struct raw_hashinfo raw_v4_hashinfo = {
93 .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), 93 .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
94}; 94};
95EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
95 96
96int raw_hash_sk(struct sock *sk) 97int raw_hash_sk(struct sock *sk)
97{ 98{
@@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk)
120} 121}
121EXPORT_SYMBOL_GPL(raw_unhash_sk); 122EXPORT_SYMBOL_GPL(raw_unhash_sk);
122 123
123static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, 124struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
124 unsigned short num, __be32 raddr, __be32 laddr, int dif) 125 unsigned short num, __be32 raddr, __be32 laddr, int dif)
125{ 126{
126 sk_for_each_from(sk) { 127 sk_for_each_from(sk) {
@@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
136found: 137found:
137 return sk; 138 return sk;
138} 139}
140EXPORT_SYMBOL_GPL(__raw_v4_lookup);
139 141
140/* 142/*
141 * 0 - deliver 143 * 0 - deliver
@@ -604,7 +606,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
604 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 606 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
605 inet_sk_flowi_flags(sk) | 607 inet_sk_flowi_flags(sk) |
606 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), 608 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
607 daddr, saddr, 0, 0); 609 daddr, saddr, 0, 0, sk->sk_uid);
608 610
609 if (!inet->hdrincl) { 611 if (!inet->hdrincl) {
610 rfv.msg = msg; 612 rfv.msg = msg;
@@ -693,12 +695,20 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
693{ 695{
694 struct inet_sock *inet = inet_sk(sk); 696 struct inet_sock *inet = inet_sk(sk);
695 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; 697 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
698 u32 tb_id = RT_TABLE_LOCAL;
696 int ret = -EINVAL; 699 int ret = -EINVAL;
697 int chk_addr_ret; 700 int chk_addr_ret;
698 701
699 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) 702 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
700 goto out; 703 goto out;
701 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); 704
705 if (sk->sk_bound_dev_if)
706 tb_id = l3mdev_fib_table_by_index(sock_net(sk),
707 sk->sk_bound_dev_if) ? : tb_id;
708
709 chk_addr_ret = inet_addr_type_table(sock_net(sk), addr->sin_addr.s_addr,
710 tb_id);
711
702 ret = -EADDRNOTAVAIL; 712 ret = -EADDRNOTAVAIL;
703 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && 713 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
704 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) 714 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -912,6 +922,20 @@ static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg
912} 922}
913#endif 923#endif
914 924
925int raw_abort(struct sock *sk, int err)
926{
927 lock_sock(sk);
928
929 sk->sk_err = err;
930 sk->sk_error_report(sk);
931 __udp_disconnect(sk, 0);
932
933 release_sock(sk);
934
935 return 0;
936}
937EXPORT_SYMBOL_GPL(raw_abort);
938
915struct proto raw_prot = { 939struct proto raw_prot = {
916 .name = "RAW", 940 .name = "RAW",
917 .owner = THIS_MODULE, 941 .owner = THIS_MODULE,
@@ -937,6 +961,7 @@ struct proto raw_prot = {
937 .compat_getsockopt = compat_raw_getsockopt, 961 .compat_getsockopt = compat_raw_getsockopt,
938 .compat_ioctl = compat_raw_ioctl, 962 .compat_ioctl = compat_raw_ioctl,
939#endif 963#endif
964 .diag_destroy = raw_abort,
940}; 965};
941 966
942#ifdef CONFIG_PROC_FS 967#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
new file mode 100644
index 000000000000..e1a51ca68d23
--- /dev/null
+++ b/net/ipv4/raw_diag.c
@@ -0,0 +1,266 @@
1#include <linux/module.h>
2
3#include <linux/inet_diag.h>
4#include <linux/sock_diag.h>
5
6#include <net/inet_sock.h>
7#include <net/raw.h>
8#include <net/rawv6.h>
9
10#ifdef pr_fmt
11# undef pr_fmt
12#endif
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16static struct raw_hashinfo *
17raw_get_hashinfo(const struct inet_diag_req_v2 *r)
18{
19 if (r->sdiag_family == AF_INET) {
20 return &raw_v4_hashinfo;
21#if IS_ENABLED(CONFIG_IPV6)
22 } else if (r->sdiag_family == AF_INET6) {
23 return &raw_v6_hashinfo;
24#endif
25 } else {
26 pr_warn_once("Unexpected inet family %d\n",
27 r->sdiag_family);
28 WARN_ON_ONCE(1);
29 return ERR_PTR(-EINVAL);
30 }
31}
32
33/*
34 * Due to requirement of not breaking user API we can't simply
35 * rename @pad field in inet_diag_req_v2 structure, instead
36 * use helper to figure it out.
37 */
38
39static struct sock *raw_lookup(struct net *net, struct sock *from,
40 const struct inet_diag_req_v2 *req)
41{
42 struct inet_diag_req_raw *r = (void *)req;
43 struct sock *sk = NULL;
44
45 if (r->sdiag_family == AF_INET)
46 sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
47 r->id.idiag_dst[0],
48 r->id.idiag_src[0],
49 r->id.idiag_if);
50#if IS_ENABLED(CONFIG_IPV6)
51 else
52 sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
53 (const struct in6_addr *)r->id.idiag_src,
54 (const struct in6_addr *)r->id.idiag_dst,
55 r->id.idiag_if);
56#endif
57 return sk;
58}
59
60static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
61{
62 struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
63 struct sock *sk = NULL, *s;
64 int slot;
65
66 if (IS_ERR(hashinfo))
67 return ERR_CAST(hashinfo);
68
69 read_lock(&hashinfo->lock);
70 for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
71 sk_for_each(s, &hashinfo->ht[slot]) {
72 sk = raw_lookup(net, s, r);
73 if (sk) {
74 /*
75 * Grab it and keep until we fill
76 * diag meaage to be reported, so
77 * caller should call sock_put then.
78 * We can do that because we're keeping
79 * hashinfo->lock here.
80 */
81 sock_hold(sk);
82 goto out_unlock;
83 }
84 }
85 }
86out_unlock:
87 read_unlock(&hashinfo->lock);
88
89 return sk ? sk : ERR_PTR(-ENOENT);
90}
91
92static int raw_diag_dump_one(struct sk_buff *in_skb,
93 const struct nlmsghdr *nlh,
94 const struct inet_diag_req_v2 *r)
95{
96 struct net *net = sock_net(in_skb->sk);
97 struct sk_buff *rep;
98 struct sock *sk;
99 int err;
100
101 sk = raw_sock_get(net, r);
102 if (IS_ERR(sk))
103 return PTR_ERR(sk);
104
105 rep = nlmsg_new(sizeof(struct inet_diag_msg) +
106 sizeof(struct inet_diag_meminfo) + 64,
107 GFP_KERNEL);
108 if (!rep) {
109 sock_put(sk);
110 return -ENOMEM;
111 }
112
113 err = inet_sk_diag_fill(sk, NULL, rep, r,
114 sk_user_ns(NETLINK_CB(in_skb).sk),
115 NETLINK_CB(in_skb).portid,
116 nlh->nlmsg_seq, 0, nlh,
117 netlink_net_capable(in_skb, CAP_NET_ADMIN));
118 sock_put(sk);
119
120 if (err < 0) {
121 kfree_skb(rep);
122 return err;
123 }
124
125 err = netlink_unicast(net->diag_nlsk, rep,
126 NETLINK_CB(in_skb).portid,
127 MSG_DONTWAIT);
128 if (err > 0)
129 err = 0;
130 return err;
131}
132
133static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
134 struct netlink_callback *cb,
135 const struct inet_diag_req_v2 *r,
136 struct nlattr *bc, bool net_admin)
137{
138 if (!inet_diag_bc_sk(bc, sk))
139 return 0;
140
141 return inet_sk_diag_fill(sk, NULL, skb, r,
142 sk_user_ns(NETLINK_CB(cb->skb).sk),
143 NETLINK_CB(cb->skb).portid,
144 cb->nlh->nlmsg_seq, NLM_F_MULTI,
145 cb->nlh, net_admin);
146}
147
148static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
149 const struct inet_diag_req_v2 *r, struct nlattr *bc)
150{
151 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
152 struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
153 struct net *net = sock_net(skb->sk);
154 int num, s_num, slot, s_slot;
155 struct sock *sk = NULL;
156
157 if (IS_ERR(hashinfo))
158 return;
159
160 s_slot = cb->args[0];
161 num = s_num = cb->args[1];
162
163 read_lock(&hashinfo->lock);
164 for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
165 num = 0;
166
167 sk_for_each(sk, &hashinfo->ht[slot]) {
168 struct inet_sock *inet = inet_sk(sk);
169
170 if (!net_eq(sock_net(sk), net))
171 continue;
172 if (num < s_num)
173 goto next;
174 if (sk->sk_family != r->sdiag_family)
175 goto next;
176 if (r->id.idiag_sport != inet->inet_sport &&
177 r->id.idiag_sport)
178 goto next;
179 if (r->id.idiag_dport != inet->inet_dport &&
180 r->id.idiag_dport)
181 goto next;
182 if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
183 goto out_unlock;
184next:
185 num++;
186 }
187 }
188
189out_unlock:
190 read_unlock(&hashinfo->lock);
191
192 cb->args[0] = slot;
193 cb->args[1] = num;
194}
195
196static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
197 void *info)
198{
199 r->idiag_rqueue = sk_rmem_alloc_get(sk);
200 r->idiag_wqueue = sk_wmem_alloc_get(sk);
201}
202
203#ifdef CONFIG_INET_DIAG_DESTROY
204static int raw_diag_destroy(struct sk_buff *in_skb,
205 const struct inet_diag_req_v2 *r)
206{
207 struct net *net = sock_net(in_skb->sk);
208 struct sock *sk;
209 int err;
210
211 sk = raw_sock_get(net, r);
212 if (IS_ERR(sk))
213 return PTR_ERR(sk);
214 err = sock_diag_destroy(sk, ECONNABORTED);
215 sock_put(sk);
216 return err;
217}
218#endif
219
220static const struct inet_diag_handler raw_diag_handler = {
221 .dump = raw_diag_dump,
222 .dump_one = raw_diag_dump_one,
223 .idiag_get_info = raw_diag_get_info,
224 .idiag_type = IPPROTO_RAW,
225 .idiag_info_size = 0,
226#ifdef CONFIG_INET_DIAG_DESTROY
227 .destroy = raw_diag_destroy,
228#endif
229};
230
231static void __always_unused __check_inet_diag_req_raw(void)
232{
233 /*
234 * Make sure the two structures are identical,
235 * except the @pad field.
236 */
237#define __offset_mismatch(m1, m2) \
238 (offsetof(struct inet_diag_req_v2, m1) != \
239 offsetof(struct inet_diag_req_raw, m2))
240
241 BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) !=
242 sizeof(struct inet_diag_req_raw));
243 BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family));
244 BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol));
245 BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext));
246 BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol));
247 BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states));
248 BUILD_BUG_ON(__offset_mismatch(id, id));
249#undef __offset_mismatch
250}
251
252static int __init raw_diag_init(void)
253{
254 return inet_diag_register(&raw_diag_handler);
255}
256
257static void __exit raw_diag_exit(void)
258{
259 inet_diag_unregister(&raw_diag_handler);
260}
261
262module_init(raw_diag_init);
263module_exit(raw_diag_exit);
264MODULE_LICENSE("GPL");
265MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
266MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2a57566e6e91..a82a11747b3f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -65,7 +65,7 @@
65#define pr_fmt(fmt) "IPv4: " fmt 65#define pr_fmt(fmt) "IPv4: " fmt
66 66
67#include <linux/module.h> 67#include <linux/module.h>
68#include <asm/uaccess.h> 68#include <linux/uaccess.h>
69#include <linux/bitops.h> 69#include <linux/bitops.h>
70#include <linux/types.h> 70#include <linux/types.h>
71#include <linux/kernel.h> 71#include <linux/kernel.h>
@@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
507} 507}
508EXPORT_SYMBOL(__ip_select_ident); 508EXPORT_SYMBOL(__ip_select_ident);
509 509
510static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, 510static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
511 const struct sock *sk,
511 const struct iphdr *iph, 512 const struct iphdr *iph,
512 int oif, u8 tos, 513 int oif, u8 tos,
513 u8 prot, u32 mark, int flow_flags) 514 u8 prot, u32 mark, int flow_flags)
@@ -523,19 +524,21 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
523 flowi4_init_output(fl4, oif, mark, tos, 524 flowi4_init_output(fl4, oif, mark, tos,
524 RT_SCOPE_UNIVERSE, prot, 525 RT_SCOPE_UNIVERSE, prot,
525 flow_flags, 526 flow_flags,
526 iph->daddr, iph->saddr, 0, 0); 527 iph->daddr, iph->saddr, 0, 0,
528 sock_net_uid(net, sk));
527} 529}
528 530
529static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, 531static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
530 const struct sock *sk) 532 const struct sock *sk)
531{ 533{
534 const struct net *net = dev_net(skb->dev);
532 const struct iphdr *iph = ip_hdr(skb); 535 const struct iphdr *iph = ip_hdr(skb);
533 int oif = skb->dev->ifindex; 536 int oif = skb->dev->ifindex;
534 u8 tos = RT_TOS(iph->tos); 537 u8 tos = RT_TOS(iph->tos);
535 u8 prot = iph->protocol; 538 u8 prot = iph->protocol;
536 u32 mark = skb->mark; 539 u32 mark = skb->mark;
537 540
538 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); 541 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
539} 542}
540 543
541static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) 544static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
@@ -552,7 +555,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
552 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 555 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
553 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 556 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
554 inet_sk_flowi_flags(sk), 557 inet_sk_flowi_flags(sk),
555 daddr, inet->inet_saddr, 0, 0); 558 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
556 rcu_read_unlock(); 559 rcu_read_unlock();
557} 560}
558 561
@@ -795,6 +798,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
795 struct rtable *rt; 798 struct rtable *rt;
796 struct flowi4 fl4; 799 struct flowi4 fl4;
797 const struct iphdr *iph = (const struct iphdr *) skb->data; 800 const struct iphdr *iph = (const struct iphdr *) skb->data;
801 struct net *net = dev_net(skb->dev);
798 int oif = skb->dev->ifindex; 802 int oif = skb->dev->ifindex;
799 u8 tos = RT_TOS(iph->tos); 803 u8 tos = RT_TOS(iph->tos);
800 u8 prot = iph->protocol; 804 u8 prot = iph->protocol;
@@ -802,7 +806,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
802 806
803 rt = (struct rtable *) dst; 807 rt = (struct rtable *) dst;
804 808
805 __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); 809 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
806 __ip_do_redirect(rt, skb, &fl4, true); 810 __ip_do_redirect(rt, skb, &fl4, true);
807} 811}
808 812
@@ -1020,7 +1024,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1020 if (!mark) 1024 if (!mark)
1021 mark = IP4_REPLY_MARK(net, skb->mark); 1025 mark = IP4_REPLY_MARK(net, skb->mark);
1022 1026
1023 __build_flow_key(&fl4, NULL, iph, oif, 1027 __build_flow_key(net, &fl4, NULL, iph, oif,
1024 RT_TOS(iph->tos), protocol, mark, flow_flags); 1028 RT_TOS(iph->tos), protocol, mark, flow_flags);
1025 rt = __ip_route_output_key(net, &fl4); 1029 rt = __ip_route_output_key(net, &fl4);
1026 if (!IS_ERR(rt)) { 1030 if (!IS_ERR(rt)) {
@@ -1036,7 +1040,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1036 struct flowi4 fl4; 1040 struct flowi4 fl4;
1037 struct rtable *rt; 1041 struct rtable *rt;
1038 1042
1039 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); 1043 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
1040 1044
1041 if (!fl4.flowi4_mark) 1045 if (!fl4.flowi4_mark)
1042 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); 1046 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
@@ -1055,6 +1059,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1055 struct rtable *rt; 1059 struct rtable *rt;
1056 struct dst_entry *odst = NULL; 1060 struct dst_entry *odst = NULL;
1057 bool new = false; 1061 bool new = false;
1062 struct net *net = sock_net(sk);
1058 1063
1059 bh_lock_sock(sk); 1064 bh_lock_sock(sk);
1060 1065
@@ -1068,7 +1073,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1068 goto out; 1073 goto out;
1069 } 1074 }
1070 1075
1071 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); 1076 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1072 1077
1073 rt = (struct rtable *)odst; 1078 rt = (struct rtable *)odst;
1074 if (odst->obsolete && !odst->ops->check(odst, 0)) { 1079 if (odst->obsolete && !odst->ops->check(odst, 0)) {
@@ -1108,7 +1113,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
1108 struct flowi4 fl4; 1113 struct flowi4 fl4;
1109 struct rtable *rt; 1114 struct rtable *rt;
1110 1115
1111 __build_flow_key(&fl4, NULL, iph, oif, 1116 __build_flow_key(net, &fl4, NULL, iph, oif,
1112 RT_TOS(iph->tos), protocol, mark, flow_flags); 1117 RT_TOS(iph->tos), protocol, mark, flow_flags);
1113 rt = __ip_route_output_key(net, &fl4); 1118 rt = __ip_route_output_key(net, &fl4);
1114 if (!IS_ERR(rt)) { 1119 if (!IS_ERR(rt)) {
@@ -1123,9 +1128,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1123 const struct iphdr *iph = (const struct iphdr *) skb->data; 1128 const struct iphdr *iph = (const struct iphdr *) skb->data;
1124 struct flowi4 fl4; 1129 struct flowi4 fl4;
1125 struct rtable *rt; 1130 struct rtable *rt;
1131 struct net *net = sock_net(sk);
1126 1132
1127 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); 1133 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1128 rt = __ip_route_output_key(sock_net(sk), &fl4); 1134 rt = __ip_route_output_key(net, &fl4);
1129 if (!IS_ERR(rt)) { 1135 if (!IS_ERR(rt)) {
1130 __ip_do_redirect(rt, skb, &fl4, false); 1136 __ip_do_redirect(rt, skb, &fl4, false);
1131 ip_rt_put(rt); 1137 ip_rt_put(rt);
@@ -1598,6 +1604,19 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1598 spin_unlock_bh(&fnhe_lock); 1604 spin_unlock_bh(&fnhe_lock);
1599} 1605}
1600 1606
1607static void set_lwt_redirect(struct rtable *rth)
1608{
1609 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1610 rth->dst.lwtstate->orig_output = rth->dst.output;
1611 rth->dst.output = lwtunnel_output;
1612 }
1613
1614 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1615 rth->dst.lwtstate->orig_input = rth->dst.input;
1616 rth->dst.input = lwtunnel_input;
1617 }
1618}
1619
1601/* called in rcu_read_lock() section */ 1620/* called in rcu_read_lock() section */
1602static int __mkroute_input(struct sk_buff *skb, 1621static int __mkroute_input(struct sk_buff *skb,
1603 const struct fib_result *res, 1622 const struct fib_result *res,
@@ -1687,14 +1706,7 @@ rt_cache:
1687 rth->dst.input = ip_forward; 1706 rth->dst.input = ip_forward;
1688 1707
1689 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); 1708 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
1690 if (lwtunnel_output_redirect(rth->dst.lwtstate)) { 1709 set_lwt_redirect(rth);
1691 rth->dst.lwtstate->orig_output = rth->dst.output;
1692 rth->dst.output = lwtunnel_output;
1693 }
1694 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1695 rth->dst.lwtstate->orig_input = rth->dst.input;
1696 rth->dst.input = lwtunnel_input;
1697 }
1698 skb_dst_set(skb, &rth->dst); 1710 skb_dst_set(skb, &rth->dst);
1699out: 1711out:
1700 err = 0; 1712 err = 0;
@@ -1921,8 +1933,18 @@ local_input:
1921 rth->dst.error= -err; 1933 rth->dst.error= -err;
1922 rth->rt_flags &= ~RTCF_LOCAL; 1934 rth->rt_flags &= ~RTCF_LOCAL;
1923 } 1935 }
1936
1924 if (do_cache) { 1937 if (do_cache) {
1925 if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { 1938 struct fib_nh *nh = &FIB_RES_NH(res);
1939
1940 rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1941 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1942 WARN_ON(rth->dst.input == lwtunnel_input);
1943 rth->dst.lwtstate->orig_input = rth->dst.input;
1944 rth->dst.input = lwtunnel_input;
1945 }
1946
1947 if (unlikely(!rt_cache_route(nh, rth))) {
1926 rth->dst.flags |= DST_NOCACHE; 1948 rth->dst.flags |= DST_NOCACHE;
1927 rt_add_uncached_list(rth); 1949 rt_add_uncached_list(rth);
1928 } 1950 }
@@ -1982,25 +2004,35 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1982 */ 2004 */
1983 if (ipv4_is_multicast(daddr)) { 2005 if (ipv4_is_multicast(daddr)) {
1984 struct in_device *in_dev = __in_dev_get_rcu(dev); 2006 struct in_device *in_dev = __in_dev_get_rcu(dev);
2007 int our = 0;
2008
2009 if (in_dev)
2010 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2011 ip_hdr(skb)->protocol);
2012
2013 /* check l3 master if no match yet */
2014 if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
2015 struct in_device *l3_in_dev;
1985 2016
1986 if (in_dev) { 2017 l3_in_dev = __in_dev_get_rcu(skb->dev);
1987 int our = ip_check_mc_rcu(in_dev, daddr, saddr, 2018 if (l3_in_dev)
1988 ip_hdr(skb)->protocol); 2019 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
1989 if (our 2020 ip_hdr(skb)->protocol);
2021 }
2022
2023 res = -EINVAL;
2024 if (our
1990#ifdef CONFIG_IP_MROUTE 2025#ifdef CONFIG_IP_MROUTE
1991 || 2026 ||
1992 (!ipv4_is_local_multicast(daddr) && 2027 (!ipv4_is_local_multicast(daddr) &&
1993 IN_DEV_MFORWARD(in_dev)) 2028 IN_DEV_MFORWARD(in_dev))
1994#endif 2029#endif
1995 ) { 2030 ) {
1996 int res = ip_route_input_mc(skb, daddr, saddr, 2031 res = ip_route_input_mc(skb, daddr, saddr,
1997 tos, dev, our); 2032 tos, dev, our);
1998 rcu_read_unlock();
1999 return res;
2000 }
2001 } 2033 }
2002 rcu_read_unlock(); 2034 rcu_read_unlock();
2003 return -EINVAL; 2035 return res;
2004 } 2036 }
2005 res = ip_route_input_slow(skb, daddr, saddr, tos, dev); 2037 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2006 rcu_read_unlock(); 2038 rcu_read_unlock();
@@ -2140,8 +2172,7 @@ add:
2140 } 2172 }
2141 2173
2142 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); 2174 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
2143 if (lwtunnel_output_redirect(rth->dst.lwtstate)) 2175 set_lwt_redirect(rth);
2144 rth->dst.output = lwtunnel_output;
2145 2176
2146 return rth; 2177 return rth;
2147} 2178}
@@ -2268,7 +2299,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2268 res.fi = NULL; 2299 res.fi = NULL;
2269 res.table = NULL; 2300 res.table = NULL;
2270 if (fl4->flowi4_oif && 2301 if (fl4->flowi4_oif &&
2271 !netif_index_is_l3_master(net, fl4->flowi4_oif)) { 2302 (ipv4_is_multicast(fl4->daddr) ||
2303 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
2272 /* Apparently, routing tables are wrong. Assume, 2304 /* Apparently, routing tables are wrong. Assume,
2273 that the destination is on link. 2305 that the destination is on link.
2274 2306
@@ -2495,6 +2527,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
2495 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) 2527 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2496 goto nla_put_failure; 2528 goto nla_put_failure;
2497 2529
2530 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2531 nla_put_u32(skb, RTA_UID,
2532 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2533 goto nla_put_failure;
2534
2498 error = rt->dst.error; 2535 error = rt->dst.error;
2499 2536
2500 if (rt_is_input_route(rt)) { 2537 if (rt_is_input_route(rt)) {
@@ -2547,6 +2584,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2547 int mark; 2584 int mark;
2548 struct sk_buff *skb; 2585 struct sk_buff *skb;
2549 u32 table_id = RT_TABLE_MAIN; 2586 u32 table_id = RT_TABLE_MAIN;
2587 kuid_t uid;
2550 2588
2551 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2589 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2552 if (err < 0) 2590 if (err < 0)
@@ -2574,6 +2612,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2574 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2612 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2575 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; 2613 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2576 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; 2614 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2615 if (tb[RTA_UID])
2616 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2617 else
2618 uid = (iif ? INVALID_UID : current_uid());
2577 2619
2578 memset(&fl4, 0, sizeof(fl4)); 2620 memset(&fl4, 0, sizeof(fl4));
2579 fl4.daddr = dst; 2621 fl4.daddr = dst;
@@ -2581,6 +2623,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2581 fl4.flowi4_tos = rtm->rtm_tos; 2623 fl4.flowi4_tos = rtm->rtm_tos;
2582 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; 2624 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2583 fl4.flowi4_mark = mark; 2625 fl4.flowi4_mark = mark;
2626 fl4.flowi4_uid = uid;
2584 2627
2585 if (iif) { 2628 if (iif) {
2586 struct net_device *dev; 2629 struct net_device *dev;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e3c4043c27de..3e88467d70ee 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
334 treq = tcp_rsk(req); 334 treq = tcp_rsk(req);
335 treq->rcv_isn = ntohl(th->seq) - 1; 335 treq->rcv_isn = ntohl(th->seq) - 1;
336 treq->snt_isn = cookie; 336 treq->snt_isn = cookie;
337 treq->ts_off = 0;
337 req->mss = mss; 338 req->mss = mss;
338 ireq->ir_num = ntohs(th->dest); 339 ireq->ir_num = ntohs(th->dest);
339 ireq->ir_rmt_port = th->source; 340 ireq->ir_rmt_port = th->source;
@@ -372,7 +373,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
372 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, 373 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
373 inet_sk_flowi_flags(sk), 374 inet_sk_flowi_flags(sk),
374 opt->srr ? opt->faddr : ireq->ir_rmt_addr, 375 opt->srr ? opt->faddr : ireq->ir_rmt_addr,
375 ireq->ir_loc_addr, th->source, th->dest); 376 ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
376 security_req_classify_flow(req, flowi4_to_flowi(&fl4)); 377 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
377 rt = ip_route_output_key(sock_net(sk), &fl4); 378 rt = ip_route_output_key(sock_net(sk), &fl4);
378 if (IS_ERR(rt)) { 379 if (IS_ERR(rt)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 814af89c1bd3..4a044964da66 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -277,9 +277,8 @@
277#include <net/ip.h> 277#include <net/ip.h>
278#include <net/sock.h> 278#include <net/sock.h>
279 279
280#include <asm/uaccess.h> 280#include <linux/uaccess.h>
281#include <asm/ioctls.h> 281#include <asm/ioctls.h>
282#include <asm/unaligned.h>
283#include <net/busy_poll.h> 282#include <net/busy_poll.h>
284 283
285int sysctl_tcp_min_tso_segs __read_mostly = 2; 284int sysctl_tcp_min_tso_segs __read_mostly = 2;
@@ -405,7 +404,6 @@ void tcp_init_sock(struct sock *sk)
405 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 404 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
406 tp->snd_cwnd_clamp = ~0; 405 tp->snd_cwnd_clamp = ~0;
407 tp->mss_cache = TCP_MSS_DEFAULT; 406 tp->mss_cache = TCP_MSS_DEFAULT;
408 u64_stats_init(&tp->syncp);
409 407
410 tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; 408 tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
411 tcp_enable_early_retrans(tp); 409 tcp_enable_early_retrans(tp);
@@ -665,9 +663,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
665 if (tcp_should_autocork(sk, skb, size_goal)) { 663 if (tcp_should_autocork(sk, skb, size_goal)) {
666 664
667 /* avoid atomic op if TSQ_THROTTLED bit is already set */ 665 /* avoid atomic op if TSQ_THROTTLED bit is already set */
668 if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { 666 if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
669 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); 667 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
670 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 668 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
671 } 669 }
672 /* It is possible TX completion already happened 670 /* It is possible TX completion already happened
673 * before we set TSQ_THROTTLED. 671 * before we set TSQ_THROTTLED.
@@ -998,8 +996,11 @@ do_error:
998 goto out; 996 goto out;
999out_err: 997out_err:
1000 /* make sure we wake any epoll edge trigger waiter */ 998 /* make sure we wake any epoll edge trigger waiter */
1001 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 999 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
1000 err == -EAGAIN)) {
1002 sk->sk_write_space(sk); 1001 sk->sk_write_space(sk);
1002 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
1003 }
1003 return sk_stream_error(sk, flags, err); 1004 return sk_stream_error(sk, flags, err);
1004} 1005}
1005 1006
@@ -1333,8 +1334,11 @@ do_error:
1333out_err: 1334out_err:
1334 err = sk_stream_error(sk, flags, err); 1335 err = sk_stream_error(sk, flags, err);
1335 /* make sure we wake any epoll edge trigger waiter */ 1336 /* make sure we wake any epoll edge trigger waiter */
1336 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 1337 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
1338 err == -EAGAIN)) {
1337 sk->sk_write_space(sk); 1339 sk->sk_write_space(sk);
1340 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
1341 }
1338 release_sock(sk); 1342 release_sock(sk);
1339 return err; 1343 return err;
1340} 1344}
@@ -2302,7 +2306,7 @@ EXPORT_SYMBOL(tcp_disconnect);
2302static inline bool tcp_can_repair_sock(const struct sock *sk) 2306static inline bool tcp_can_repair_sock(const struct sock *sk)
2303{ 2307{
2304 return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && 2308 return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
2305 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); 2309 (sk->sk_state != TCP_LISTEN);
2306} 2310}
2307 2311
2308static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) 2312static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
@@ -2704,15 +2708,33 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2704EXPORT_SYMBOL(compat_tcp_setsockopt); 2708EXPORT_SYMBOL(compat_tcp_setsockopt);
2705#endif 2709#endif
2706 2710
2711static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
2712 struct tcp_info *info)
2713{
2714 u64 stats[__TCP_CHRONO_MAX], total = 0;
2715 enum tcp_chrono i;
2716
2717 for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
2718 stats[i] = tp->chrono_stat[i - 1];
2719 if (i == tp->chrono_type)
2720 stats[i] += tcp_time_stamp - tp->chrono_start;
2721 stats[i] *= USEC_PER_SEC / HZ;
2722 total += stats[i];
2723 }
2724
2725 info->tcpi_busy_time = total;
2726 info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED];
2727 info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED];
2728}
2729
2707/* Return information about state of tcp endpoint in API format. */ 2730/* Return information about state of tcp endpoint in API format. */
2708void tcp_get_info(struct sock *sk, struct tcp_info *info) 2731void tcp_get_info(struct sock *sk, struct tcp_info *info)
2709{ 2732{
2710 const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ 2733 const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
2711 const struct inet_connection_sock *icsk = inet_csk(sk); 2734 const struct inet_connection_sock *icsk = inet_csk(sk);
2712 u32 now = tcp_time_stamp, intv; 2735 u32 now = tcp_time_stamp, intv;
2713 unsigned int start;
2714 int notsent_bytes;
2715 u64 rate64; 2736 u64 rate64;
2737 bool slow;
2716 u32 rate; 2738 u32 rate;
2717 2739
2718 memset(info, 0, sizeof(*info)); 2740 memset(info, 0, sizeof(*info));
@@ -2721,6 +2743,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2721 2743
2722 info->tcpi_state = sk_state_load(sk); 2744 info->tcpi_state = sk_state_load(sk);
2723 2745
2746 /* Report meaningful fields for all TCP states, including listeners */
2747 rate = READ_ONCE(sk->sk_pacing_rate);
2748 rate64 = rate != ~0U ? rate : ~0ULL;
2749 info->tcpi_pacing_rate = rate64;
2750
2751 rate = READ_ONCE(sk->sk_max_pacing_rate);
2752 rate64 = rate != ~0U ? rate : ~0ULL;
2753 info->tcpi_max_pacing_rate = rate64;
2754
2755 info->tcpi_reordering = tp->reordering;
2756 info->tcpi_snd_cwnd = tp->snd_cwnd;
2757
2758 if (info->tcpi_state == TCP_LISTEN) {
2759 /* listeners aliased fields :
2760 * tcpi_unacked -> Number of children ready for accept()
2761 * tcpi_sacked -> max backlog
2762 */
2763 info->tcpi_unacked = sk->sk_ack_backlog;
2764 info->tcpi_sacked = sk->sk_max_ack_backlog;
2765 return;
2766 }
2724 info->tcpi_ca_state = icsk->icsk_ca_state; 2767 info->tcpi_ca_state = icsk->icsk_ca_state;
2725 info->tcpi_retransmits = icsk->icsk_retransmits; 2768 info->tcpi_retransmits = icsk->icsk_retransmits;
2726 info->tcpi_probes = icsk->icsk_probes_out; 2769 info->tcpi_probes = icsk->icsk_probes_out;
@@ -2748,13 +2791,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2748 info->tcpi_snd_mss = tp->mss_cache; 2791 info->tcpi_snd_mss = tp->mss_cache;
2749 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; 2792 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2750 2793
2751 if (info->tcpi_state == TCP_LISTEN) { 2794 info->tcpi_unacked = tp->packets_out;
2752 info->tcpi_unacked = sk->sk_ack_backlog; 2795 info->tcpi_sacked = tp->sacked_out;
2753 info->tcpi_sacked = sk->sk_max_ack_backlog; 2796
2754 } else {
2755 info->tcpi_unacked = tp->packets_out;
2756 info->tcpi_sacked = tp->sacked_out;
2757 }
2758 info->tcpi_lost = tp->lost_out; 2797 info->tcpi_lost = tp->lost_out;
2759 info->tcpi_retrans = tp->retrans_out; 2798 info->tcpi_retrans = tp->retrans_out;
2760 info->tcpi_fackets = tp->fackets_out; 2799 info->tcpi_fackets = tp->fackets_out;
@@ -2768,34 +2807,25 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2768 info->tcpi_rtt = tp->srtt_us >> 3; 2807 info->tcpi_rtt = tp->srtt_us >> 3;
2769 info->tcpi_rttvar = tp->mdev_us >> 2; 2808 info->tcpi_rttvar = tp->mdev_us >> 2;
2770 info->tcpi_snd_ssthresh = tp->snd_ssthresh; 2809 info->tcpi_snd_ssthresh = tp->snd_ssthresh;
2771 info->tcpi_snd_cwnd = tp->snd_cwnd;
2772 info->tcpi_advmss = tp->advmss; 2810 info->tcpi_advmss = tp->advmss;
2773 info->tcpi_reordering = tp->reordering;
2774 2811
2775 info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; 2812 info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
2776 info->tcpi_rcv_space = tp->rcvq_space.space; 2813 info->tcpi_rcv_space = tp->rcvq_space.space;
2777 2814
2778 info->tcpi_total_retrans = tp->total_retrans; 2815 info->tcpi_total_retrans = tp->total_retrans;
2779 2816
2780 rate = READ_ONCE(sk->sk_pacing_rate); 2817 slow = lock_sock_fast(sk);
2781 rate64 = rate != ~0U ? rate : ~0ULL;
2782 put_unaligned(rate64, &info->tcpi_pacing_rate);
2783 2818
2784 rate = READ_ONCE(sk->sk_max_pacing_rate); 2819 info->tcpi_bytes_acked = tp->bytes_acked;
2785 rate64 = rate != ~0U ? rate : ~0ULL; 2820 info->tcpi_bytes_received = tp->bytes_received;
2786 put_unaligned(rate64, &info->tcpi_max_pacing_rate); 2821 info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt);
2822 tcp_get_info_chrono_stats(tp, info);
2823
2824 unlock_sock_fast(sk, slow);
2787 2825
2788 do {
2789 start = u64_stats_fetch_begin_irq(&tp->syncp);
2790 put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
2791 put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
2792 } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
2793 info->tcpi_segs_out = tp->segs_out; 2826 info->tcpi_segs_out = tp->segs_out;
2794 info->tcpi_segs_in = tp->segs_in; 2827 info->tcpi_segs_in = tp->segs_in;
2795 2828
2796 notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
2797 info->tcpi_notsent_bytes = max(0, notsent_bytes);
2798
2799 info->tcpi_min_rtt = tcp_min_rtt(tp); 2829 info->tcpi_min_rtt = tcp_min_rtt(tp);
2800 info->tcpi_data_segs_in = tp->data_segs_in; 2830 info->tcpi_data_segs_in = tp->data_segs_in;
2801 info->tcpi_data_segs_out = tp->data_segs_out; 2831 info->tcpi_data_segs_out = tp->data_segs_out;
@@ -2806,11 +2836,31 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2806 if (rate && intv) { 2836 if (rate && intv) {
2807 rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; 2837 rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
2808 do_div(rate64, intv); 2838 do_div(rate64, intv);
2809 put_unaligned(rate64, &info->tcpi_delivery_rate); 2839 info->tcpi_delivery_rate = rate64;
2810 } 2840 }
2811} 2841}
2812EXPORT_SYMBOL_GPL(tcp_get_info); 2842EXPORT_SYMBOL_GPL(tcp_get_info);
2813 2843
2844struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
2845{
2846 const struct tcp_sock *tp = tcp_sk(sk);
2847 struct sk_buff *stats;
2848 struct tcp_info info;
2849
2850 stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
2851 if (!stats)
2852 return NULL;
2853
2854 tcp_get_info_chrono_stats(tp, &info);
2855 nla_put_u64_64bit(stats, TCP_NLA_BUSY,
2856 info.tcpi_busy_time, TCP_NLA_PAD);
2857 nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
2858 info.tcpi_rwnd_limited, TCP_NLA_PAD);
2859 nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
2860 info.tcpi_sndbuf_limited, TCP_NLA_PAD);
2861 return stats;
2862}
2863
2814static int do_tcp_getsockopt(struct sock *sk, int level, 2864static int do_tcp_getsockopt(struct sock *sk, int level,
2815 int optname, char __user *optval, int __user *optlen) 2865 int optname, char __user *optval, int __user *optlen)
2816{ 2866{
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 0ea66c2c9344..b89bce4c721e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -14,6 +14,36 @@
14 * observed, or adjust the sending rate if it estimates there is a 14 * observed, or adjust the sending rate if it estimates there is a
15 * traffic policer, in order to keep the drop rate reasonable. 15 * traffic policer, in order to keep the drop rate reasonable.
16 * 16 *
17 * Here is a state transition diagram for BBR:
18 *
19 * |
20 * V
21 * +---> STARTUP ----+
22 * | | |
23 * | V |
24 * | DRAIN ----+
25 * | | |
26 * | V |
27 * +---> PROBE_BW ----+
28 * | ^ | |
29 * | | | |
30 * | +----+ |
31 * | |
32 * +---- PROBE_RTT <--+
33 *
34 * A BBR flow starts in STARTUP, and ramps up its sending rate quickly.
35 * When it estimates the pipe is full, it enters DRAIN to drain the queue.
36 * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT.
37 * A long-lived BBR flow spends the vast majority of its time remaining
38 * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth
39 * in a fair manner, with a small, bounded queue. *If* a flow has been
40 * continuously sending for the entire min_rtt window, and hasn't seen an RTT
41 * sample that matches or decreases its min_rtt estimate for 10 seconds, then
42 * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe
43 * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if
44 * we estimated that we reached the full bw of the pipe then we enter PROBE_BW;
45 * otherwise we enter STARTUP to try to fill the pipe.
46 *
17 * BBR is described in detail in: 47 * BBR is described in detail in:
18 * "BBR: Congestion-Based Congestion Control", 48 * "BBR: Congestion-Based Congestion Control",
19 * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, 49 * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
@@ -51,7 +81,7 @@ enum bbr_mode {
51 BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ 81 BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */
52 BBR_DRAIN, /* drain any queue created during startup */ 82 BBR_DRAIN, /* drain any queue created during startup */
53 BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ 83 BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */
54 BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */ 84 BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */
55}; 85};
56 86
57/* BBR congestion control block */ 87/* BBR congestion control block */
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index f9038d6b109e..79c4817abc94 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -68,8 +68,9 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
68{ 68{
69 int ret = 0; 69 int ret = 0;
70 70
71 /* all algorithms must implement ssthresh and cong_avoid ops */ 71 /* all algorithms must implement these */
72 if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { 72 if (!ca->ssthresh || !ca->undo_cwnd ||
73 !(ca->cong_avoid || ca->cong_control)) {
73 pr_err("%s does not implement required ops\n", ca->name); 74 pr_err("%s does not implement required ops\n", ca->name);
74 return -EINVAL; 75 return -EINVAL;
75 } 76 }
@@ -443,10 +444,19 @@ u32 tcp_reno_ssthresh(struct sock *sk)
443} 444}
444EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); 445EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
445 446
447u32 tcp_reno_undo_cwnd(struct sock *sk)
448{
449 const struct tcp_sock *tp = tcp_sk(sk);
450
451 return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
452}
453EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
454
446struct tcp_congestion_ops tcp_reno = { 455struct tcp_congestion_ops tcp_reno = {
447 .flags = TCP_CONG_NON_RESTRICTED, 456 .flags = TCP_CONG_NON_RESTRICTED,
448 .name = "reno", 457 .name = "reno",
449 .owner = THIS_MODULE, 458 .owner = THIS_MODULE,
450 .ssthresh = tcp_reno_ssthresh, 459 .ssthresh = tcp_reno_ssthresh,
451 .cong_avoid = tcp_reno_cong_avoid, 460 .cong_avoid = tcp_reno_cong_avoid,
461 .undo_cwnd = tcp_reno_undo_cwnd,
452}; 462};
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index ab37c6775630..5f5e5936760e 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -335,6 +335,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = {
335static struct tcp_congestion_ops dctcp_reno __read_mostly = { 335static struct tcp_congestion_ops dctcp_reno __read_mostly = {
336 .ssthresh = tcp_reno_ssthresh, 336 .ssthresh = tcp_reno_ssthresh,
337 .cong_avoid = tcp_reno_cong_avoid, 337 .cong_avoid = tcp_reno_cong_avoid,
338 .undo_cwnd = tcp_reno_undo_cwnd,
338 .get_info = dctcp_get_info, 339 .get_info = dctcp_get_info,
339 .owner = THIS_MODULE, 340 .owner = THIS_MODULE,
340 .name = "dctcp-reno", 341 .name = "dctcp-reno",
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index db7842495a64..6d9879e93648 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -94,6 +94,7 @@ static const struct hstcp_aimd_val {
94 94
95struct hstcp { 95struct hstcp {
96 u32 ai; 96 u32 ai;
97 u32 loss_cwnd;
97}; 98};
98 99
99static void hstcp_init(struct sock *sk) 100static void hstcp_init(struct sock *sk)
@@ -150,16 +151,24 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
150static u32 hstcp_ssthresh(struct sock *sk) 151static u32 hstcp_ssthresh(struct sock *sk)
151{ 152{
152 const struct tcp_sock *tp = tcp_sk(sk); 153 const struct tcp_sock *tp = tcp_sk(sk);
153 const struct hstcp *ca = inet_csk_ca(sk); 154 struct hstcp *ca = inet_csk_ca(sk);
154 155
156 ca->loss_cwnd = tp->snd_cwnd;
155 /* Do multiplicative decrease */ 157 /* Do multiplicative decrease */
156 return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); 158 return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
157} 159}
158 160
161static u32 hstcp_cwnd_undo(struct sock *sk)
162{
163 const struct hstcp *ca = inet_csk_ca(sk);
164
165 return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
166}
159 167
160static struct tcp_congestion_ops tcp_highspeed __read_mostly = { 168static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
161 .init = hstcp_init, 169 .init = hstcp_init,
162 .ssthresh = hstcp_ssthresh, 170 .ssthresh = hstcp_ssthresh,
171 .undo_cwnd = hstcp_cwnd_undo,
163 .cong_avoid = hstcp_cong_avoid, 172 .cong_avoid = hstcp_cong_avoid,
164 173
165 .owner = THIS_MODULE, 174 .owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 083831e359df..0f7175c3338e 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -166,6 +166,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
166static struct tcp_congestion_ops tcp_hybla __read_mostly = { 166static struct tcp_congestion_ops tcp_hybla __read_mostly = {
167 .init = hybla_init, 167 .init = hybla_init,
168 .ssthresh = tcp_reno_ssthresh, 168 .ssthresh = tcp_reno_ssthresh,
169 .undo_cwnd = tcp_reno_undo_cwnd,
169 .cong_avoid = hybla_cong_avoid, 170 .cong_avoid = hybla_cong_avoid,
170 .set_state = hybla_state, 171 .set_state = hybla_state,
171 172
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index c8e6d86be114..60352ff4f5a8 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -48,6 +48,7 @@ struct illinois {
48 u32 end_seq; /* right edge of current RTT */ 48 u32 end_seq; /* right edge of current RTT */
49 u32 alpha; /* Additive increase */ 49 u32 alpha; /* Additive increase */
50 u32 beta; /* Muliplicative decrease */ 50 u32 beta; /* Muliplicative decrease */
51 u32 loss_cwnd; /* cwnd on loss */
51 u16 acked; /* # packets acked by current ACK */ 52 u16 acked; /* # packets acked by current ACK */
52 u8 rtt_above; /* average rtt has gone above threshold */ 53 u8 rtt_above; /* average rtt has gone above threshold */
53 u8 rtt_low; /* # of rtts measurements below threshold */ 54 u8 rtt_low; /* # of rtts measurements below threshold */
@@ -296,10 +297,18 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
296 struct tcp_sock *tp = tcp_sk(sk); 297 struct tcp_sock *tp = tcp_sk(sk);
297 struct illinois *ca = inet_csk_ca(sk); 298 struct illinois *ca = inet_csk_ca(sk);
298 299
300 ca->loss_cwnd = tp->snd_cwnd;
299 /* Multiplicative decrease */ 301 /* Multiplicative decrease */
300 return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); 302 return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
301} 303}
302 304
305static u32 tcp_illinois_cwnd_undo(struct sock *sk)
306{
307 const struct illinois *ca = inet_csk_ca(sk);
308
309 return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
310}
311
303/* Extract info for Tcp socket info provided via netlink. */ 312/* Extract info for Tcp socket info provided via netlink. */
304static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, 313static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
305 union tcp_cc_info *info) 314 union tcp_cc_info *info)
@@ -327,6 +336,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
327static struct tcp_congestion_ops tcp_illinois __read_mostly = { 336static struct tcp_congestion_ops tcp_illinois __read_mostly = {
328 .init = tcp_illinois_init, 337 .init = tcp_illinois_init,
329 .ssthresh = tcp_illinois_ssthresh, 338 .ssthresh = tcp_illinois_ssthresh,
339 .undo_cwnd = tcp_illinois_cwnd_undo,
330 .cong_avoid = tcp_illinois_cong_avoid, 340 .cong_avoid = tcp_illinois_cong_avoid,
331 .set_state = tcp_illinois_state, 341 .set_state = tcp_illinois_state,
332 .get_info = tcp_illinois_info, 342 .get_info = tcp_illinois_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c71d49ce0c93..6c790754ae3e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,6 +85,7 @@ int sysctl_tcp_dsack __read_mostly = 1;
85int sysctl_tcp_app_win __read_mostly = 31; 85int sysctl_tcp_app_win __read_mostly = 31;
86int sysctl_tcp_adv_win_scale __read_mostly = 1; 86int sysctl_tcp_adv_win_scale __read_mostly = 1;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); 87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
88EXPORT_SYMBOL(sysctl_tcp_timestamps);
88 89
89/* rfc5961 challenge ack rate limiting */ 90/* rfc5961 challenge ack rate limiting */
90int sysctl_tcp_challenge_ack_limit = 1000; 91int sysctl_tcp_challenge_ack_limit = 1000;
@@ -2414,10 +2415,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2414 if (tp->prior_ssthresh) { 2415 if (tp->prior_ssthresh) {
2415 const struct inet_connection_sock *icsk = inet_csk(sk); 2416 const struct inet_connection_sock *icsk = inet_csk(sk);
2416 2417
2417 if (icsk->icsk_ca_ops->undo_cwnd) 2418 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2418 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2419 else
2420 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2421 2419
2422 if (tp->prior_ssthresh > tp->snd_ssthresh) { 2420 if (tp->prior_ssthresh > tp->snd_ssthresh) {
2423 tp->snd_ssthresh = tp->prior_ssthresh; 2421 tp->snd_ssthresh = tp->prior_ssthresh;
@@ -3201,6 +3199,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3201 tp->lost_skb_hint = NULL; 3199 tp->lost_skb_hint = NULL;
3202 } 3200 }
3203 3201
3202 if (!skb)
3203 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
3204
3204 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) 3205 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
3205 tp->snd_up = tp->snd_una; 3206 tp->snd_up = tp->snd_una;
3206 3207
@@ -3371,9 +3372,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3371 u32 delta = ack - tp->snd_una; 3372 u32 delta = ack - tp->snd_una;
3372 3373
3373 sock_owned_by_me((struct sock *)tp); 3374 sock_owned_by_me((struct sock *)tp);
3374 u64_stats_update_begin_raw(&tp->syncp);
3375 tp->bytes_acked += delta; 3375 tp->bytes_acked += delta;
3376 u64_stats_update_end_raw(&tp->syncp);
3377 tp->snd_una = ack; 3376 tp->snd_una = ack;
3378} 3377}
3379 3378
@@ -3383,9 +3382,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3383 u32 delta = seq - tp->rcv_nxt; 3382 u32 delta = seq - tp->rcv_nxt;
3384 3383
3385 sock_owned_by_me((struct sock *)tp); 3384 sock_owned_by_me((struct sock *)tp);
3386 u64_stats_update_begin_raw(&tp->syncp);
3387 tp->bytes_received += delta; 3385 tp->bytes_received += delta;
3388 u64_stats_update_end_raw(&tp->syncp);
3389 tp->rcv_nxt = seq; 3386 tp->rcv_nxt = seq;
3390} 3387}
3391 3388
@@ -5083,8 +5080,11 @@ static void tcp_check_space(struct sock *sk)
5083 /* pairs with tcp_poll() */ 5080 /* pairs with tcp_poll() */
5084 smp_mb__after_atomic(); 5081 smp_mb__after_atomic();
5085 if (sk->sk_socket && 5082 if (sk->sk_socket &&
5086 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) 5083 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5087 tcp_new_space(sk); 5084 tcp_new_space(sk);
5085 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5086 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
5087 }
5088 } 5088 }
5089} 5089}
5090 5090
@@ -6318,13 +6318,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6318 goto drop; 6318 goto drop;
6319 } 6319 }
6320 6320
6321 6321 if (sk_acceptq_is_full(sk)) {
6322 /* Accept backlog is full. If we have already queued enough
6323 * of warm entries in syn queue, drop request. It is better than
6324 * clogging syn queue with openreqs with exponentially increasing
6325 * timeout.
6326 */
6327 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
6328 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 6322 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6329 goto drop; 6323 goto drop;
6330 } 6324 }
@@ -6334,6 +6328,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6334 goto drop; 6328 goto drop;
6335 6329
6336 tcp_rsk(req)->af_specific = af_ops; 6330 tcp_rsk(req)->af_specific = af_ops;
6331 tcp_rsk(req)->ts_off = 0;
6337 6332
6338 tcp_clear_options(&tmp_opt); 6333 tcp_clear_options(&tmp_opt);
6339 tmp_opt.mss_clamp = af_ops->mss_clamp; 6334 tmp_opt.mss_clamp = af_ops->mss_clamp;
@@ -6355,6 +6350,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6355 if (security_inet_conn_request(sk, skb, req)) 6350 if (security_inet_conn_request(sk, skb, req))
6356 goto drop_and_free; 6351 goto drop_and_free;
6357 6352
6353 if (isn && tmp_opt.tstamp_ok)
6354 af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
6355
6358 if (!want_cookie && !isn) { 6356 if (!want_cookie && !isn) {
6359 /* VJ's idea. We save last timestamp seen 6357 /* VJ's idea. We save last timestamp seen
6360 * from the destination in peer table, when entering 6358 * from the destination in peer table, when entering
@@ -6395,7 +6393,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6395 goto drop_and_release; 6393 goto drop_and_release;
6396 } 6394 }
6397 6395
6398 isn = af_ops->init_seq(skb); 6396 isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
6399 } 6397 }
6400 if (!dst) { 6398 if (!dst) {
6401 dst = af_ops->route_req(sk, &fl, req, NULL); 6399 dst = af_ops->route_req(sk, &fl, req, NULL);
@@ -6407,6 +6405,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6407 6405
6408 if (want_cookie) { 6406 if (want_cookie) {
6409 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); 6407 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
6408 tcp_rsk(req)->ts_off = 0;
6410 req->cookie_ts = tmp_opt.tstamp_ok; 6409 req->cookie_ts = tmp_opt.tstamp_ok;
6411 if (!tmp_opt.tstamp_ok) 6410 if (!tmp_opt.tstamp_ok)
6412 inet_rsk(req)->ecn_ok = 0; 6411 inet_rsk(req)->ecn_ok = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2259114c7242..30d81f533ada 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -95,12 +95,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95struct inet_hashinfo tcp_hashinfo; 95struct inet_hashinfo tcp_hashinfo;
96EXPORT_SYMBOL(tcp_hashinfo); 96EXPORT_SYMBOL(tcp_hashinfo);
97 97
98static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 98static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
99{ 99{
100 return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 100 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
101 ip_hdr(skb)->saddr, 101 ip_hdr(skb)->saddr,
102 tcp_hdr(skb)->dest, 102 tcp_hdr(skb)->dest,
103 tcp_hdr(skb)->source); 103 tcp_hdr(skb)->source, tsoff);
104} 104}
105 105
106int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 106int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -237,7 +237,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
237 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 237 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
238 inet->inet_daddr, 238 inet->inet_daddr,
239 inet->inet_sport, 239 inet->inet_sport,
240 usin->sin_port); 240 usin->sin_port,
241 &tp->tsoffset);
241 242
242 inet->inet_id = tp->write_seq ^ jiffies; 243 inet->inet_id = tp->write_seq ^ jiffies;
243 244
@@ -442,7 +443,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
442 if (!sock_owned_by_user(sk)) { 443 if (!sock_owned_by_user(sk)) {
443 tcp_v4_mtu_reduced(sk); 444 tcp_v4_mtu_reduced(sk);
444 } else { 445 } else {
445 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) 446 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
446 sock_hold(sk); 447 sock_hold(sk);
447 } 448 }
448 goto out; 449 goto out;
@@ -691,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
691 offsetof(struct inet_timewait_sock, tw_bound_dev_if)); 692 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
692 693
693 arg.tos = ip_hdr(skb)->tos; 694 arg.tos = ip_hdr(skb)->tos;
695 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
694 local_bh_disable(); 696 local_bh_disable();
695 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 697 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
696 skb, &TCP_SKB_CB(skb)->header.h4.opt, 698 skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -711,7 +713,7 @@ out:
711 outside socket context is ugly, certainly. What can I do? 713 outside socket context is ugly, certainly. What can I do?
712 */ 714 */
713 715
714static void tcp_v4_send_ack(struct net *net, 716static void tcp_v4_send_ack(const struct sock *sk,
715 struct sk_buff *skb, u32 seq, u32 ack, 717 struct sk_buff *skb, u32 seq, u32 ack,
716 u32 win, u32 tsval, u32 tsecr, int oif, 718 u32 win, u32 tsval, u32 tsecr, int oif,
717 struct tcp_md5sig_key *key, 719 struct tcp_md5sig_key *key,
@@ -726,6 +728,7 @@ static void tcp_v4_send_ack(struct net *net,
726#endif 728#endif
727 ]; 729 ];
728 } rep; 730 } rep;
731 struct net *net = sock_net(sk);
729 struct ip_reply_arg arg; 732 struct ip_reply_arg arg;
730 733
731 memset(&rep.th, 0, sizeof(struct tcphdr)); 734 memset(&rep.th, 0, sizeof(struct tcphdr));
@@ -775,6 +778,7 @@ static void tcp_v4_send_ack(struct net *net,
775 if (oif) 778 if (oif)
776 arg.bound_dev_if = oif; 779 arg.bound_dev_if = oif;
777 arg.tos = tos; 780 arg.tos = tos;
781 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
778 local_bh_disable(); 782 local_bh_disable();
779 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 783 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
780 skb, &TCP_SKB_CB(skb)->header.h4.opt, 784 skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -790,7 +794,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
790 struct inet_timewait_sock *tw = inet_twsk(sk); 794 struct inet_timewait_sock *tw = inet_twsk(sk);
791 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 795 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
792 796
793 tcp_v4_send_ack(sock_net(sk), skb, 797 tcp_v4_send_ack(sk, skb,
794 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 798 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
795 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 799 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
796 tcp_time_stamp + tcptw->tw_ts_offset, 800 tcp_time_stamp + tcptw->tw_ts_offset,
@@ -818,10 +822,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
818 * exception of <SYN> segments, MUST be right-shifted by 822 * exception of <SYN> segments, MUST be right-shifted by
819 * Rcv.Wind.Shift bits: 823 * Rcv.Wind.Shift bits:
820 */ 824 */
821 tcp_v4_send_ack(sock_net(sk), skb, seq, 825 tcp_v4_send_ack(sk, skb, seq,
822 tcp_rsk(req)->rcv_nxt, 826 tcp_rsk(req)->rcv_nxt,
823 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 827 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
824 tcp_time_stamp, 828 tcp_time_stamp + tcp_rsk(req)->ts_off,
825 req->ts_recent, 829 req->ts_recent,
826 0, 830 0,
827 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 831 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -1908,7 +1912,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1908 if (!sk) { 1912 if (!sk) {
1909get_head: 1913get_head:
1910 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 1914 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1911 spin_lock_bh(&ilb->lock); 1915 spin_lock(&ilb->lock);
1912 sk = sk_head(&ilb->head); 1916 sk = sk_head(&ilb->head);
1913 st->offset = 0; 1917 st->offset = 0;
1914 goto get_sk; 1918 goto get_sk;
@@ -1925,7 +1929,7 @@ get_sk:
1925 if (sk->sk_family == st->family) 1929 if (sk->sk_family == st->family)
1926 return sk; 1930 return sk;
1927 } 1931 }
1928 spin_unlock_bh(&ilb->lock); 1932 spin_unlock(&ilb->lock);
1929 st->offset = 0; 1933 st->offset = 0;
1930 if (++st->bucket < INET_LHTABLE_SIZE) 1934 if (++st->bucket < INET_LHTABLE_SIZE)
1931 goto get_head; 1935 goto get_head;
@@ -2133,7 +2137,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2133 switch (st->state) { 2137 switch (st->state) {
2134 case TCP_SEQ_STATE_LISTENING: 2138 case TCP_SEQ_STATE_LISTENING:
2135 if (v != SEQ_START_TOKEN) 2139 if (v != SEQ_START_TOKEN)
2136 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 2140 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
2137 break; 2141 break;
2138 case TCP_SEQ_STATE_ESTABLISHED: 2142 case TCP_SEQ_STATE_ESTABLISHED:
2139 if (v) 2143 if (v)
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c67ece1390c2..046fd3910873 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -316,6 +316,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
316static struct tcp_congestion_ops tcp_lp __read_mostly = { 316static struct tcp_congestion_ops tcp_lp __read_mostly = {
317 .init = tcp_lp_init, 317 .init = tcp_lp_init,
318 .ssthresh = tcp_reno_ssthresh, 318 .ssthresh = tcp_reno_ssthresh,
319 .undo_cwnd = tcp_reno_undo_cwnd,
319 .cong_avoid = tcp_lp_cong_avoid, 320 .cong_avoid = tcp_lp_cong_avoid,
320 .pkts_acked = tcp_lp_pkts_acked, 321 .pkts_acked = tcp_lp_pkts_acked,
321 322
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index bf1f3b2b29d1..d46f4d5b1c62 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -742,14 +742,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
742 rcu_read_unlock(); 742 rcu_read_unlock();
743} 743}
744 744
745static struct genl_family tcp_metrics_nl_family = { 745static struct genl_family tcp_metrics_nl_family;
746 .id = GENL_ID_GENERATE,
747 .hdrsize = 0,
748 .name = TCP_METRICS_GENL_NAME,
749 .version = TCP_METRICS_GENL_VERSION,
750 .maxattr = TCP_METRICS_ATTR_MAX,
751 .netnsok = true,
752};
753 746
754static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { 747static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
755 [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, 748 [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
@@ -1116,6 +1109,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = {
1116 }, 1109 },
1117}; 1110};
1118 1111
1112static struct genl_family tcp_metrics_nl_family __ro_after_init = {
1113 .hdrsize = 0,
1114 .name = TCP_METRICS_GENL_NAME,
1115 .version = TCP_METRICS_GENL_VERSION,
1116 .maxattr = TCP_METRICS_ATTR_MAX,
1117 .netnsok = true,
1118 .module = THIS_MODULE,
1119 .ops = tcp_metrics_nl_ops,
1120 .n_ops = ARRAY_SIZE(tcp_metrics_nl_ops),
1121};
1122
1119static unsigned int tcpmhash_entries; 1123static unsigned int tcpmhash_entries;
1120static int __init set_tcpmhash_entries(char *str) 1124static int __init set_tcpmhash_entries(char *str)
1121{ 1125{
@@ -1179,8 +1183,7 @@ void __init tcp_metrics_init(void)
1179 if (ret < 0) 1183 if (ret < 0)
1180 panic("Could not allocate the tcp_metrics hash table\n"); 1184 panic("Could not allocate the tcp_metrics hash table\n");
1181 1185
1182 ret = genl_register_family_with_ops(&tcp_metrics_nl_family, 1186 ret = genl_register_family(&tcp_metrics_nl_family);
1183 tcp_metrics_nl_ops);
1184 if (ret < 0) 1187 if (ret < 0)
1185 panic("Could not register tcp_metrics generic netlink\n"); 1188 panic("Could not register tcp_metrics generic netlink\n");
1186} 1189}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6234ebaa7db1..28ce5ee831f5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -532,7 +532,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
532 newtp->rx_opt.ts_recent_stamp = 0; 532 newtp->rx_opt.ts_recent_stamp = 0;
533 newtp->tcp_header_len = sizeof(struct tcphdr); 533 newtp->tcp_header_len = sizeof(struct tcphdr);
534 } 534 }
535 newtp->tsoffset = 0; 535 newtp->tsoffset = treq->ts_off;
536#ifdef CONFIG_TCP_MD5SIG 536#ifdef CONFIG_TCP_MD5SIG
537 newtp->md5sig_info = NULL; /*XXX*/ 537 newtp->md5sig_info = NULL; /*XXX*/
538 if (newtp->af_specific->md5_lookup(sk, newsk)) 538 if (newtp->af_specific->md5_lookup(sk, newsk))
@@ -581,6 +581,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
581 581
582 if (tmp_opt.saw_tstamp) { 582 if (tmp_opt.saw_tstamp) {
583 tmp_opt.ts_recent = req->ts_recent; 583 tmp_opt.ts_recent = req->ts_recent;
584 if (tmp_opt.rcv_tsecr)
585 tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
584 /* We do not store true stamp, but it is not required, 586 /* We do not store true stamp, but it is not required,
585 * it can be estimated (approximately) 587 * it can be estimated (approximately)
586 * from another data. 588 * from another data.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 896e9dfbdb5c..1d5331a1b1dc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -640,7 +640,7 @@ static unsigned int tcp_synack_options(struct request_sock *req,
640 } 640 }
641 if (likely(ireq->tstamp_ok)) { 641 if (likely(ireq->tstamp_ok)) {
642 opts->options |= OPTION_TS; 642 opts->options |= OPTION_TS;
643 opts->tsval = tcp_skb_timestamp(skb); 643 opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
644 opts->tsecr = req->ts_recent; 644 opts->tsecr = req->ts_recent;
645 remaining -= TCPOLEN_TSTAMP_ALIGNED; 645 remaining -= TCPOLEN_TSTAMP_ALIGNED;
646 } 646 }
@@ -769,25 +769,27 @@ static void tcp_tasklet_func(unsigned long data)
769 list_del(&tp->tsq_node); 769 list_del(&tp->tsq_node);
770 770
771 sk = (struct sock *)tp; 771 sk = (struct sock *)tp;
772 bh_lock_sock(sk); 772 smp_mb__before_atomic();
773 773 clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
774 if (!sock_owned_by_user(sk)) { 774
775 tcp_tsq_handler(sk); 775 if (!sk->sk_lock.owned &&
776 } else { 776 test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
777 /* defer the work to tcp_release_cb() */ 777 bh_lock_sock(sk);
778 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); 778 if (!sock_owned_by_user(sk)) {
779 clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
780 tcp_tsq_handler(sk);
781 }
782 bh_unlock_sock(sk);
779 } 783 }
780 bh_unlock_sock(sk);
781 784
782 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
783 sk_free(sk); 785 sk_free(sk);
784 } 786 }
785} 787}
786 788
787#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ 789#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
788 (1UL << TCP_WRITE_TIMER_DEFERRED) | \ 790 TCPF_WRITE_TIMER_DEFERRED | \
789 (1UL << TCP_DELACK_TIMER_DEFERRED) | \ 791 TCPF_DELACK_TIMER_DEFERRED | \
790 (1UL << TCP_MTU_REDUCED_DEFERRED)) 792 TCPF_MTU_REDUCED_DEFERRED)
791/** 793/**
792 * tcp_release_cb - tcp release_sock() callback 794 * tcp_release_cb - tcp release_sock() callback
793 * @sk: socket 795 * @sk: socket
@@ -797,18 +799,17 @@ static void tcp_tasklet_func(unsigned long data)
797 */ 799 */
798void tcp_release_cb(struct sock *sk) 800void tcp_release_cb(struct sock *sk)
799{ 801{
800 struct tcp_sock *tp = tcp_sk(sk);
801 unsigned long flags, nflags; 802 unsigned long flags, nflags;
802 803
803 /* perform an atomic operation only if at least one flag is set */ 804 /* perform an atomic operation only if at least one flag is set */
804 do { 805 do {
805 flags = tp->tsq_flags; 806 flags = sk->sk_tsq_flags;
806 if (!(flags & TCP_DEFERRED_ALL)) 807 if (!(flags & TCP_DEFERRED_ALL))
807 return; 808 return;
808 nflags = flags & ~TCP_DEFERRED_ALL; 809 nflags = flags & ~TCP_DEFERRED_ALL;
809 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); 810 } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
810 811
811 if (flags & (1UL << TCP_TSQ_DEFERRED)) 812 if (flags & TCPF_TSQ_DEFERRED)
812 tcp_tsq_handler(sk); 813 tcp_tsq_handler(sk);
813 814
814 /* Here begins the tricky part : 815 /* Here begins the tricky part :
@@ -822,15 +823,15 @@ void tcp_release_cb(struct sock *sk)
822 */ 823 */
823 sock_release_ownership(sk); 824 sock_release_ownership(sk);
824 825
825 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { 826 if (flags & TCPF_WRITE_TIMER_DEFERRED) {
826 tcp_write_timer_handler(sk); 827 tcp_write_timer_handler(sk);
827 __sock_put(sk); 828 __sock_put(sk);
828 } 829 }
829 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { 830 if (flags & TCPF_DELACK_TIMER_DEFERRED) {
830 tcp_delack_timer_handler(sk); 831 tcp_delack_timer_handler(sk);
831 __sock_put(sk); 832 __sock_put(sk);
832 } 833 }
833 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { 834 if (flags & TCPF_MTU_REDUCED_DEFERRED) {
834 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); 835 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
835 __sock_put(sk); 836 __sock_put(sk);
836 } 837 }
@@ -860,6 +861,7 @@ void tcp_wfree(struct sk_buff *skb)
860{ 861{
861 struct sock *sk = skb->sk; 862 struct sock *sk = skb->sk;
862 struct tcp_sock *tp = tcp_sk(sk); 863 struct tcp_sock *tp = tcp_sk(sk);
864 unsigned long flags, nval, oval;
863 int wmem; 865 int wmem;
864 866
865 /* Keep one reference on sk_wmem_alloc. 867 /* Keep one reference on sk_wmem_alloc.
@@ -877,16 +879,25 @@ void tcp_wfree(struct sk_buff *skb)
877 if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) 879 if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
878 goto out; 880 goto out;
879 881
880 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && 882 for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
881 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
882 unsigned long flags;
883 struct tsq_tasklet *tsq; 883 struct tsq_tasklet *tsq;
884 bool empty;
885
886 if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
887 goto out;
888
889 nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
890 nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
891 if (nval != oval)
892 continue;
884 893
885 /* queue this socket to tasklet queue */ 894 /* queue this socket to tasklet queue */
886 local_irq_save(flags); 895 local_irq_save(flags);
887 tsq = this_cpu_ptr(&tsq_tasklet); 896 tsq = this_cpu_ptr(&tsq_tasklet);
897 empty = list_empty(&tsq->head);
888 list_add(&tp->tsq_node, &tsq->head); 898 list_add(&tp->tsq_node, &tsq->head);
889 tasklet_schedule(&tsq->tasklet); 899 if (empty)
900 tasklet_schedule(&tsq->tasklet);
890 local_irq_restore(flags); 901 local_irq_restore(flags);
891 return; 902 return;
892 } 903 }
@@ -1027,7 +1038,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1027 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); 1038 skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
1028 1039
1029 /* Our usage of tstamp should remain private */ 1040 /* Our usage of tstamp should remain private */
1030 skb->tstamp.tv64 = 0; 1041 skb->tstamp = 0;
1031 1042
1032 /* Cleanup our debris for IP stacks */ 1043 /* Cleanup our debris for IP stacks */
1033 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), 1044 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
@@ -1514,6 +1525,18 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1514 if (sysctl_tcp_slow_start_after_idle && 1525 if (sysctl_tcp_slow_start_after_idle &&
1515 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) 1526 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1516 tcp_cwnd_application_limited(sk); 1527 tcp_cwnd_application_limited(sk);
1528
1529 /* The following conditions together indicate the starvation
1530 * is caused by insufficient sender buffer:
1531 * 1) just sent some data (see tcp_write_xmit)
1532 * 2) not cwnd limited (this else condition)
1533 * 3) no more data to send (null tcp_send_head )
1534 * 4) application is hitting buffer limit (SOCK_NOSPACE)
1535 */
1536 if (!tcp_send_head(sk) && sk->sk_socket &&
1537 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1538 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1539 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
1517 } 1540 }
1518} 1541}
1519 1542
@@ -1910,26 +1933,26 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
1910 */ 1933 */
1911static int tcp_mtu_probe(struct sock *sk) 1934static int tcp_mtu_probe(struct sock *sk)
1912{ 1935{
1913 struct tcp_sock *tp = tcp_sk(sk);
1914 struct inet_connection_sock *icsk = inet_csk(sk); 1936 struct inet_connection_sock *icsk = inet_csk(sk);
1937 struct tcp_sock *tp = tcp_sk(sk);
1915 struct sk_buff *skb, *nskb, *next; 1938 struct sk_buff *skb, *nskb, *next;
1916 struct net *net = sock_net(sk); 1939 struct net *net = sock_net(sk);
1917 int len;
1918 int probe_size; 1940 int probe_size;
1919 int size_needed; 1941 int size_needed;
1920 int copy; 1942 int copy, len;
1921 int mss_now; 1943 int mss_now;
1922 int interval; 1944 int interval;
1923 1945
1924 /* Not currently probing/verifying, 1946 /* Not currently probing/verifying,
1925 * not in recovery, 1947 * not in recovery,
1926 * have enough cwnd, and 1948 * have enough cwnd, and
1927 * not SACKing (the variable headers throw things off) */ 1949 * not SACKing (the variable headers throw things off)
1928 if (!icsk->icsk_mtup.enabled || 1950 */
1929 icsk->icsk_mtup.probe_size || 1951 if (likely(!icsk->icsk_mtup.enabled ||
1930 inet_csk(sk)->icsk_ca_state != TCP_CA_Open || 1952 icsk->icsk_mtup.probe_size ||
1931 tp->snd_cwnd < 11 || 1953 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1932 tp->rx_opt.num_sacks || tp->rx_opt.dsack) 1954 tp->snd_cwnd < 11 ||
1955 tp->rx_opt.num_sacks || tp->rx_opt.dsack))
1933 return -1; 1956 return -1;
1934 1957
1935 /* Use binary search for probe_size between tcp_mss_base, 1958 /* Use binary search for probe_size between tcp_mss_base,
@@ -2069,7 +2092,16 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2069 limit <<= factor; 2092 limit <<= factor;
2070 2093
2071 if (atomic_read(&sk->sk_wmem_alloc) > limit) { 2094 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
2072 set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); 2095 /* Always send the 1st or 2nd skb in write queue.
2096 * No need to wait for TX completion to call us back,
2097 * after softirq/tasklet schedule.
2098 * This helps when TX completions are delayed too much.
2099 */
2100 if (skb == sk->sk_write_queue.next ||
2101 skb->prev == sk->sk_write_queue.next)
2102 return false;
2103
2104 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
2073 /* It is possible TX completion already happened 2105 /* It is possible TX completion already happened
2074 * before we set TSQ_THROTTLED, so we must 2106 * before we set TSQ_THROTTLED, so we must
2075 * test again the condition. 2107 * test again the condition.
@@ -2081,6 +2113,47 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2081 return false; 2113 return false;
2082} 2114}
2083 2115
2116static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
2117{
2118 const u32 now = tcp_time_stamp;
2119
2120 if (tp->chrono_type > TCP_CHRONO_UNSPEC)
2121 tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
2122 tp->chrono_start = now;
2123 tp->chrono_type = new;
2124}
2125
2126void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
2127{
2128 struct tcp_sock *tp = tcp_sk(sk);
2129
2130 /* If there are multiple conditions worthy of tracking in a
2131 * chronograph then the highest priority enum takes precedence
2132 * over the other conditions. So that if something "more interesting"
2133 * starts happening, stop the previous chrono and start a new one.
2134 */
2135 if (type > tp->chrono_type)
2136 tcp_chrono_set(tp, type);
2137}
2138
2139void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
2140{
2141 struct tcp_sock *tp = tcp_sk(sk);
2142
2143
2144 /* There are multiple conditions worthy of tracking in a
2145 * chronograph, so that the highest priority enum takes
2146 * precedence over the other conditions (see tcp_chrono_start).
2147 * If a condition stops, we only stop chrono tracking if
2148 * it's the "most interesting" or current chrono we are
2149 * tracking and starts busy chrono if we have pending data.
2150 */
2151 if (tcp_write_queue_empty(sk))
2152 tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
2153 else if (type == tp->chrono_type)
2154 tcp_chrono_set(tp, TCP_CHRONO_BUSY);
2155}
2156
2084/* This routine writes packets to the network. It advances the 2157/* This routine writes packets to the network. It advances the
2085 * send_head. This happens as incoming acks open up the remote 2158 * send_head. This happens as incoming acks open up the remote
2086 * window for us. 2159 * window for us.
@@ -2103,7 +2176,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2103 unsigned int tso_segs, sent_pkts; 2176 unsigned int tso_segs, sent_pkts;
2104 int cwnd_quota; 2177 int cwnd_quota;
2105 int result; 2178 int result;
2106 bool is_cwnd_limited = false; 2179 bool is_cwnd_limited = false, is_rwnd_limited = false;
2107 u32 max_segs; 2180 u32 max_segs;
2108 2181
2109 sent_pkts = 0; 2182 sent_pkts = 0;
@@ -2140,8 +2213,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2140 break; 2213 break;
2141 } 2214 }
2142 2215
2143 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) 2216 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
2217 is_rwnd_limited = true;
2144 break; 2218 break;
2219 }
2145 2220
2146 if (tso_segs == 1) { 2221 if (tso_segs == 1) {
2147 if (unlikely(!tcp_nagle_test(tp, skb, mss_now, 2222 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
@@ -2167,6 +2242,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2167 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 2242 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
2168 break; 2243 break;
2169 2244
2245 if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
2246 clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
2170 if (tcp_small_queue_check(sk, skb, 0)) 2247 if (tcp_small_queue_check(sk, skb, 0))
2171 break; 2248 break;
2172 2249
@@ -2186,6 +2263,11 @@ repair:
2186 break; 2263 break;
2187 } 2264 }
2188 2265
2266 if (is_rwnd_limited)
2267 tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED);
2268 else
2269 tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
2270
2189 if (likely(sent_pkts)) { 2271 if (likely(sent_pkts)) {
2190 if (tcp_in_cwnd_reduction(sk)) 2272 if (tcp_in_cwnd_reduction(sk))
2191 tp->prr_out += sent_pkts; 2273 tp->prr_out += sent_pkts;
@@ -2514,7 +2596,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2514} 2596}
2515 2597
2516/* Collapses two adjacent SKB's during retransmission. */ 2598/* Collapses two adjacent SKB's during retransmission. */
2517static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) 2599static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2518{ 2600{
2519 struct tcp_sock *tp = tcp_sk(sk); 2601 struct tcp_sock *tp = tcp_sk(sk);
2520 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 2602 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
@@ -2525,13 +2607,17 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2525 2607
2526 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); 2608 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
2527 2609
2610 if (next_skb_size) {
2611 if (next_skb_size <= skb_availroom(skb))
2612 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2613 next_skb_size);
2614 else if (!skb_shift(skb, next_skb, next_skb_size))
2615 return false;
2616 }
2528 tcp_highest_sack_combine(sk, next_skb, skb); 2617 tcp_highest_sack_combine(sk, next_skb, skb);
2529 2618
2530 tcp_unlink_write_queue(next_skb, sk); 2619 tcp_unlink_write_queue(next_skb, sk);
2531 2620
2532 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
2533 next_skb_size);
2534
2535 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 2621 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2536 skb->ip_summed = CHECKSUM_PARTIAL; 2622 skb->ip_summed = CHECKSUM_PARTIAL;
2537 2623
@@ -2560,6 +2646,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2560 tcp_skb_collapse_tstamp(skb, next_skb); 2646 tcp_skb_collapse_tstamp(skb, next_skb);
2561 2647
2562 sk_wmem_free_skb(sk, next_skb); 2648 sk_wmem_free_skb(sk, next_skb);
2649 return true;
2563} 2650}
2564 2651
2565/* Check if coalescing SKBs is legal. */ 2652/* Check if coalescing SKBs is legal. */
@@ -2567,14 +2654,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2567{ 2654{
2568 if (tcp_skb_pcount(skb) > 1) 2655 if (tcp_skb_pcount(skb) > 1)
2569 return false; 2656 return false;
2570 /* TODO: SACK collapsing could be used to remove this condition */
2571 if (skb_shinfo(skb)->nr_frags != 0)
2572 return false;
2573 if (skb_cloned(skb)) 2657 if (skb_cloned(skb))
2574 return false; 2658 return false;
2575 if (skb == tcp_send_head(sk)) 2659 if (skb == tcp_send_head(sk))
2576 return false; 2660 return false;
2577 /* Some heurestics for collapsing over SACK'd could be invented */ 2661 /* Some heuristics for collapsing over SACK'd could be invented */
2578 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) 2662 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2579 return false; 2663 return false;
2580 2664
@@ -2612,16 +2696,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2612 2696
2613 if (space < 0) 2697 if (space < 0)
2614 break; 2698 break;
2615 /* Punt if not enough space exists in the first SKB for
2616 * the data in the second
2617 */
2618 if (skb->len > skb_availroom(to))
2619 break;
2620 2699
2621 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) 2700 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
2622 break; 2701 break;
2623 2702
2624 tcp_collapse_retrans(sk, to); 2703 if (!tcp_collapse_retrans(sk, to))
2704 break;
2625 } 2705 }
2626} 2706}
2627 2707
@@ -3123,7 +3203,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3123#endif 3203#endif
3124 3204
3125 /* Do not fool tcpdump (if any), clean our debris */ 3205 /* Do not fool tcpdump (if any), clean our debris */
3126 skb->tstamp.tv64 = 0; 3206 skb->tstamp = 0;
3127 return skb; 3207 return skb;
3128} 3208}
3129EXPORT_SYMBOL(tcp_make_synack); 3209EXPORT_SYMBOL(tcp_make_synack);
@@ -3300,6 +3380,8 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3300 fo->copied = space; 3380 fo->copied = space;
3301 3381
3302 tcp_connect_queue_skb(sk, syn_data); 3382 tcp_connect_queue_skb(sk, syn_data);
3383 if (syn_data->len)
3384 tcp_chrono_start(sk, TCP_CHRONO_BUSY);
3303 3385
3304 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); 3386 err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
3305 3387
@@ -3464,8 +3546,6 @@ void tcp_send_ack(struct sock *sk)
3464 /* We do not want pure acks influencing TCP Small Queues or fq/pacing 3546 /* We do not want pure acks influencing TCP Small Queues or fq/pacing
3465 * too much. 3547 * too much.
3466 * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 3548 * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
3467 * We also avoid tcp_wfree() overhead (cache line miss accessing
3468 * tp->tsq_flags) by using regular sock_wfree()
3469 */ 3549 */
3470 skb_set_tcp_pure_ack(buff); 3550 skb_set_tcp_pure_ack(buff);
3471 3551
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index bf5ea9e9bbc1..f2123075ce6e 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,6 +15,10 @@
15#define TCP_SCALABLE_AI_CNT 50U 15#define TCP_SCALABLE_AI_CNT 50U
16#define TCP_SCALABLE_MD_SCALE 3 16#define TCP_SCALABLE_MD_SCALE 3
17 17
18struct scalable {
19 u32 loss_cwnd;
20};
21
18static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) 22static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
19{ 23{
20 struct tcp_sock *tp = tcp_sk(sk); 24 struct tcp_sock *tp = tcp_sk(sk);
@@ -32,12 +36,23 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
32static u32 tcp_scalable_ssthresh(struct sock *sk) 36static u32 tcp_scalable_ssthresh(struct sock *sk)
33{ 37{
34 const struct tcp_sock *tp = tcp_sk(sk); 38 const struct tcp_sock *tp = tcp_sk(sk);
39 struct scalable *ca = inet_csk_ca(sk);
40
41 ca->loss_cwnd = tp->snd_cwnd;
35 42
36 return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); 43 return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
37} 44}
38 45
46static u32 tcp_scalable_cwnd_undo(struct sock *sk)
47{
48 const struct scalable *ca = inet_csk_ca(sk);
49
50 return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
51}
52
39static struct tcp_congestion_ops tcp_scalable __read_mostly = { 53static struct tcp_congestion_ops tcp_scalable __read_mostly = {
40 .ssthresh = tcp_scalable_ssthresh, 54 .ssthresh = tcp_scalable_ssthresh,
55 .undo_cwnd = tcp_scalable_cwnd_undo,
41 .cong_avoid = tcp_scalable_cong_avoid, 56 .cong_avoid = tcp_scalable_cong_avoid,
42 57
43 .owner = THIS_MODULE, 58 .owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3ea1cf804748..3705075f42c3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -310,7 +310,7 @@ static void tcp_delack_timer(unsigned long data)
310 inet_csk(sk)->icsk_ack.blocked = 1; 310 inet_csk(sk)->icsk_ack.blocked = 1;
311 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); 311 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
312 /* deleguate our work to tcp_release_cb() */ 312 /* deleguate our work to tcp_release_cb() */
313 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) 313 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
314 sock_hold(sk); 314 sock_hold(sk);
315 } 315 }
316 bh_unlock_sock(sk); 316 bh_unlock_sock(sk);
@@ -592,7 +592,7 @@ static void tcp_write_timer(unsigned long data)
592 tcp_write_timer_handler(sk); 592 tcp_write_timer_handler(sk);
593 } else { 593 } else {
594 /* delegate our work to tcp_release_cb() */ 594 /* delegate our work to tcp_release_cb() */
595 if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) 595 if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
596 sock_hold(sk); 596 sock_hold(sk);
597 } 597 }
598 bh_unlock_sock(sk); 598 bh_unlock_sock(sk);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 4c4bac1b5eab..218cfcc77650 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -307,6 +307,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
307static struct tcp_congestion_ops tcp_vegas __read_mostly = { 307static struct tcp_congestion_ops tcp_vegas __read_mostly = {
308 .init = tcp_vegas_init, 308 .init = tcp_vegas_init,
309 .ssthresh = tcp_reno_ssthresh, 309 .ssthresh = tcp_reno_ssthresh,
310 .undo_cwnd = tcp_reno_undo_cwnd,
310 .cong_avoid = tcp_vegas_cong_avoid, 311 .cong_avoid = tcp_vegas_cong_avoid,
311 .pkts_acked = tcp_vegas_pkts_acked, 312 .pkts_acked = tcp_vegas_pkts_acked,
312 .set_state = tcp_vegas_state, 313 .set_state = tcp_vegas_state,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 40171e163cff..76005d4b8dfc 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -30,6 +30,7 @@ struct veno {
30 u32 basertt; /* the min of all Veno rtt measurements seen (in usec) */ 30 u32 basertt; /* the min of all Veno rtt measurements seen (in usec) */
31 u32 inc; /* decide whether to increase cwnd */ 31 u32 inc; /* decide whether to increase cwnd */
32 u32 diff; /* calculate the diff rate */ 32 u32 diff; /* calculate the diff rate */
33 u32 loss_cwnd; /* cwnd when loss occured */
33}; 34};
34 35
35/* There are several situations when we must "re-start" Veno: 36/* There are several situations when we must "re-start" Veno:
@@ -193,6 +194,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
193 const struct tcp_sock *tp = tcp_sk(sk); 194 const struct tcp_sock *tp = tcp_sk(sk);
194 struct veno *veno = inet_csk_ca(sk); 195 struct veno *veno = inet_csk_ca(sk);
195 196
197 veno->loss_cwnd = tp->snd_cwnd;
196 if (veno->diff < beta) 198 if (veno->diff < beta)
197 /* in "non-congestive state", cut cwnd by 1/5 */ 199 /* in "non-congestive state", cut cwnd by 1/5 */
198 return max(tp->snd_cwnd * 4 / 5, 2U); 200 return max(tp->snd_cwnd * 4 / 5, 2U);
@@ -201,9 +203,17 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
201 return max(tp->snd_cwnd >> 1U, 2U); 203 return max(tp->snd_cwnd >> 1U, 2U);
202} 204}
203 205
206static u32 tcp_veno_cwnd_undo(struct sock *sk)
207{
208 const struct veno *veno = inet_csk_ca(sk);
209
210 return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd);
211}
212
204static struct tcp_congestion_ops tcp_veno __read_mostly = { 213static struct tcp_congestion_ops tcp_veno __read_mostly = {
205 .init = tcp_veno_init, 214 .init = tcp_veno_init,
206 .ssthresh = tcp_veno_ssthresh, 215 .ssthresh = tcp_veno_ssthresh,
216 .undo_cwnd = tcp_veno_cwnd_undo,
207 .cong_avoid = tcp_veno_cong_avoid, 217 .cong_avoid = tcp_veno_cong_avoid,
208 .pkts_acked = tcp_veno_pkts_acked, 218 .pkts_acked = tcp_veno_pkts_acked,
209 .set_state = tcp_veno_state, 219 .set_state = tcp_veno_state,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4b03a2e2a050..fed66dc0e0f5 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -278,6 +278,7 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = {
278 .init = tcp_westwood_init, 278 .init = tcp_westwood_init,
279 .ssthresh = tcp_reno_ssthresh, 279 .ssthresh = tcp_reno_ssthresh,
280 .cong_avoid = tcp_reno_cong_avoid, 280 .cong_avoid = tcp_reno_cong_avoid,
281 .undo_cwnd = tcp_reno_undo_cwnd,
281 .cwnd_event = tcp_westwood_event, 282 .cwnd_event = tcp_westwood_event,
282 .in_ack_event = tcp_westwood_ack, 283 .in_ack_event = tcp_westwood_ack,
283 .get_info = tcp_westwood_info, 284 .get_info = tcp_westwood_info,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 9c5fc973267f..e6ff99c4bd3b 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -37,6 +37,7 @@ struct yeah {
37 u32 fast_count; 37 u32 fast_count;
38 38
39 u32 pkts_acked; 39 u32 pkts_acked;
40 u32 loss_cwnd;
40}; 41};
41 42
42static void tcp_yeah_init(struct sock *sk) 43static void tcp_yeah_init(struct sock *sk)
@@ -219,13 +220,22 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
219 220
220 yeah->fast_count = 0; 221 yeah->fast_count = 0;
221 yeah->reno_count = max(yeah->reno_count>>1, 2U); 222 yeah->reno_count = max(yeah->reno_count>>1, 2U);
223 yeah->loss_cwnd = tp->snd_cwnd;
222 224
223 return max_t(int, tp->snd_cwnd - reduction, 2); 225 return max_t(int, tp->snd_cwnd - reduction, 2);
224} 226}
225 227
228static u32 tcp_yeah_cwnd_undo(struct sock *sk)
229{
230 const struct yeah *yeah = inet_csk_ca(sk);
231
232 return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd);
233}
234
226static struct tcp_congestion_ops tcp_yeah __read_mostly = { 235static struct tcp_congestion_ops tcp_yeah __read_mostly = {
227 .init = tcp_yeah_init, 236 .init = tcp_yeah_init,
228 .ssthresh = tcp_yeah_ssthresh, 237 .ssthresh = tcp_yeah_ssthresh,
238 .undo_cwnd = tcp_yeah_cwnd_undo,
229 .cong_avoid = tcp_yeah_cong_avoid, 239 .cong_avoid = tcp_yeah_cong_avoid,
230 .set_state = tcp_vegas_state, 240 .set_state = tcp_vegas_state,
231 .cwnd_event = tcp_vegas_cwnd_event, 241 .cwnd_event = tcp_vegas_cwnd_event,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5bab6c3f7a2f..1307a7c2e544 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -79,7 +79,7 @@
79 79
80#define pr_fmt(fmt) "UDP: " fmt 80#define pr_fmt(fmt) "UDP: " fmt
81 81
82#include <asm/uaccess.h> 82#include <linux/uaccess.h>
83#include <asm/ioctls.h> 83#include <asm/ioctls.h>
84#include <linux/bootmem.h> 84#include <linux/bootmem.h>
85#include <linux/highmem.h> 85#include <linux/highmem.h>
@@ -580,7 +580,8 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
580 * Does increment socket refcount. 580 * Does increment socket refcount.
581 */ 581 */
582#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ 582#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
583 IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) 583 IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
584 IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
584struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 585struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
585 __be32 daddr, __be16 dport, int dif) 586 __be32 daddr, __be16 dport, int dif)
586{ 587{
@@ -1019,7 +1020,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1019 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, 1020 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
1020 RT_SCOPE_UNIVERSE, sk->sk_protocol, 1021 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1021 flow_flags, 1022 flow_flags,
1022 faddr, saddr, dport, inet->inet_sport); 1023 faddr, saddr, dport, inet->inet_sport,
1024 sk->sk_uid);
1023 1025
1024 security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); 1026 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
1025 rt = ip_route_output_flow(net, fl4, sk); 1027 rt = ip_route_output_flow(net, fl4, sk);
@@ -1172,6 +1174,181 @@ out:
1172 return ret; 1174 return ret;
1173} 1175}
1174 1176
1177/* fully reclaim rmem/fwd memory allocated for skb */
1178static void udp_rmem_release(struct sock *sk, int size, int partial)
1179{
1180 struct udp_sock *up = udp_sk(sk);
1181 int amt;
1182
1183 if (likely(partial)) {
1184 up->forward_deficit += size;
1185 size = up->forward_deficit;
1186 if (size < (sk->sk_rcvbuf >> 2) &&
1187 !skb_queue_empty(&sk->sk_receive_queue))
1188 return;
1189 } else {
1190 size += up->forward_deficit;
1191 }
1192 up->forward_deficit = 0;
1193
1194 sk->sk_forward_alloc += size;
1195 amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
1196 sk->sk_forward_alloc -= amt;
1197
1198 if (amt)
1199 __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
1200
1201 atomic_sub(size, &sk->sk_rmem_alloc);
1202}
1203
1204/* Note: called with sk_receive_queue.lock held.
1205 * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
1206 * This avoids a cache line miss while receive_queue lock is held.
1207 * Look at __udp_enqueue_schedule_skb() to find where this copy is done.
1208 */
1209void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
1210{
1211 udp_rmem_release(sk, skb->dev_scratch, 1);
1212}
1213EXPORT_SYMBOL(udp_skb_destructor);
1214
1215/* Idea of busylocks is to let producers grab an extra spinlock
1216 * to relieve pressure on the receive_queue spinlock shared by consumer.
1217 * Under flood, this means that only one producer can be in line
1218 * trying to acquire the receive_queue spinlock.
1219 * These busylock can be allocated on a per cpu manner, instead of a
1220 * per socket one (that would consume a cache line per socket)
1221 */
1222static int udp_busylocks_log __read_mostly;
1223static spinlock_t *udp_busylocks __read_mostly;
1224
1225static spinlock_t *busylock_acquire(void *ptr)
1226{
1227 spinlock_t *busy;
1228
1229 busy = udp_busylocks + hash_ptr(ptr, udp_busylocks_log);
1230 spin_lock(busy);
1231 return busy;
1232}
1233
1234static void busylock_release(spinlock_t *busy)
1235{
1236 if (busy)
1237 spin_unlock(busy);
1238}
1239
1240int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
1241{
1242 struct sk_buff_head *list = &sk->sk_receive_queue;
1243 int rmem, delta, amt, err = -ENOMEM;
1244 spinlock_t *busy = NULL;
1245 int size;
1246
1247 /* try to avoid the costly atomic add/sub pair when the receive
1248 * queue is full; always allow at least a packet
1249 */
1250 rmem = atomic_read(&sk->sk_rmem_alloc);
1251 if (rmem > sk->sk_rcvbuf)
1252 goto drop;
1253
1254 /* Under mem pressure, it might be helpful to help udp_recvmsg()
1255 * having linear skbs :
1256 * - Reduce memory overhead and thus increase receive queue capacity
1257 * - Less cache line misses at copyout() time
1258 * - Less work at consume_skb() (less alien page frag freeing)
1259 */
1260 if (rmem > (sk->sk_rcvbuf >> 1)) {
1261 skb_condense(skb);
1262
1263 busy = busylock_acquire(sk);
1264 }
1265 size = skb->truesize;
1266 /* Copy skb->truesize into skb->dev_scratch to avoid a cache line miss
1267 * in udp_skb_destructor()
1268 */
1269 skb->dev_scratch = size;
1270
1271 /* we drop only if the receive buf is full and the receive
1272 * queue contains some other skb
1273 */
1274 rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
1275 if (rmem > (size + sk->sk_rcvbuf))
1276 goto uncharge_drop;
1277
1278 spin_lock(&list->lock);
1279 if (size >= sk->sk_forward_alloc) {
1280 amt = sk_mem_pages(size);
1281 delta = amt << SK_MEM_QUANTUM_SHIFT;
1282 if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
1283 err = -ENOBUFS;
1284 spin_unlock(&list->lock);
1285 goto uncharge_drop;
1286 }
1287
1288 sk->sk_forward_alloc += delta;
1289 }
1290
1291 sk->sk_forward_alloc -= size;
1292
1293 /* no need to setup a destructor, we will explicitly release the
1294 * forward allocated memory on dequeue
1295 */
1296 sock_skb_set_dropcount(sk, skb);
1297
1298 __skb_queue_tail(list, skb);
1299 spin_unlock(&list->lock);
1300
1301 if (!sock_flag(sk, SOCK_DEAD))
1302 sk->sk_data_ready(sk);
1303
1304 busylock_release(busy);
1305 return 0;
1306
1307uncharge_drop:
1308 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1309
1310drop:
1311 atomic_inc(&sk->sk_drops);
1312 busylock_release(busy);
1313 return err;
1314}
1315EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
1316
1317void udp_destruct_sock(struct sock *sk)
1318{
1319 /* reclaim completely the forward allocated memory */
1320 unsigned int total = 0;
1321 struct sk_buff *skb;
1322
1323 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1324 total += skb->truesize;
1325 kfree_skb(skb);
1326 }
1327 udp_rmem_release(sk, total, 0);
1328
1329 inet_sock_destruct(sk);
1330}
1331EXPORT_SYMBOL_GPL(udp_destruct_sock);
1332
1333int udp_init_sock(struct sock *sk)
1334{
1335 sk->sk_destruct = udp_destruct_sock;
1336 return 0;
1337}
1338EXPORT_SYMBOL_GPL(udp_init_sock);
1339
1340void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
1341{
1342 if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
1343 bool slow = lock_sock_fast(sk);
1344
1345 sk_peek_offset_bwd(sk, len);
1346 unlock_sock_fast(sk, slow);
1347 }
1348 consume_skb(skb);
1349}
1350EXPORT_SYMBOL_GPL(skb_consume_udp);
1351
1175/** 1352/**
1176 * first_packet_length - return length of first packet in receive queue 1353 * first_packet_length - return length of first packet in receive queue
1177 * @sk: socket 1354 * @sk: socket
@@ -1181,12 +1358,11 @@ out:
1181 */ 1358 */
1182static int first_packet_length(struct sock *sk) 1359static int first_packet_length(struct sock *sk)
1183{ 1360{
1184 struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; 1361 struct sk_buff_head *rcvq = &sk->sk_receive_queue;
1185 struct sk_buff *skb; 1362 struct sk_buff *skb;
1363 int total = 0;
1186 int res; 1364 int res;
1187 1365
1188 __skb_queue_head_init(&list_kill);
1189
1190 spin_lock_bh(&rcvq->lock); 1366 spin_lock_bh(&rcvq->lock);
1191 while ((skb = skb_peek(rcvq)) != NULL && 1367 while ((skb = skb_peek(rcvq)) != NULL &&
1192 udp_lib_checksum_complete(skb)) { 1368 udp_lib_checksum_complete(skb)) {
@@ -1196,18 +1372,13 @@ static int first_packet_length(struct sock *sk)
1196 IS_UDPLITE(sk)); 1372 IS_UDPLITE(sk));
1197 atomic_inc(&sk->sk_drops); 1373 atomic_inc(&sk->sk_drops);
1198 __skb_unlink(skb, rcvq); 1374 __skb_unlink(skb, rcvq);
1199 __skb_queue_tail(&list_kill, skb); 1375 total += skb->truesize;
1376 kfree_skb(skb);
1200 } 1377 }
1201 res = skb ? skb->len : -1; 1378 res = skb ? skb->len : -1;
1379 if (total)
1380 udp_rmem_release(sk, total, 1);
1202 spin_unlock_bh(&rcvq->lock); 1381 spin_unlock_bh(&rcvq->lock);
1203
1204 if (!skb_queue_empty(&list_kill)) {
1205 bool slow = lock_sock_fast(sk);
1206
1207 __skb_queue_purge(&list_kill);
1208 sk_mem_reclaim_partial(sk);
1209 unlock_sock_fast(sk, slow);
1210 }
1211 return res; 1382 return res;
1212} 1383}
1213 1384
@@ -1256,15 +1427,13 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
1256 int err; 1427 int err;
1257 int is_udplite = IS_UDPLITE(sk); 1428 int is_udplite = IS_UDPLITE(sk);
1258 bool checksum_valid = false; 1429 bool checksum_valid = false;
1259 bool slow;
1260 1430
1261 if (flags & MSG_ERRQUEUE) 1431 if (flags & MSG_ERRQUEUE)
1262 return ip_recv_error(sk, msg, len, addr_len); 1432 return ip_recv_error(sk, msg, len, addr_len);
1263 1433
1264try_again: 1434try_again:
1265 peeking = off = sk_peek_offset(sk, flags); 1435 peeking = off = sk_peek_offset(sk, flags);
1266 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 1436 skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
1267 &peeked, &off, &err);
1268 if (!skb) 1437 if (!skb)
1269 return err; 1438 return err;
1270 1439
@@ -1281,7 +1450,8 @@ try_again:
1281 * coverage checksum (UDP-Lite), do it before the copy. 1450 * coverage checksum (UDP-Lite), do it before the copy.
1282 */ 1451 */
1283 1452
1284 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { 1453 if (copied < ulen || peeking ||
1454 (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
1285 checksum_valid = !udp_lib_checksum_complete(skb); 1455 checksum_valid = !udp_lib_checksum_complete(skb);
1286 if (!checksum_valid) 1456 if (!checksum_valid)
1287 goto csum_copy_err; 1457 goto csum_copy_err;
@@ -1297,13 +1467,12 @@ try_again:
1297 } 1467 }
1298 1468
1299 if (unlikely(err)) { 1469 if (unlikely(err)) {
1300 trace_kfree_skb(skb, udp_recvmsg);
1301 if (!peeked) { 1470 if (!peeked) {
1302 atomic_inc(&sk->sk_drops); 1471 atomic_inc(&sk->sk_drops);
1303 UDP_INC_STATS(sock_net(sk), 1472 UDP_INC_STATS(sock_net(sk),
1304 UDP_MIB_INERRORS, is_udplite); 1473 UDP_MIB_INERRORS, is_udplite);
1305 } 1474 }
1306 skb_free_datagram_locked(sk, skb); 1475 kfree_skb(skb);
1307 return err; 1476 return err;
1308 } 1477 }
1309 1478
@@ -1322,22 +1491,21 @@ try_again:
1322 *addr_len = sizeof(*sin); 1491 *addr_len = sizeof(*sin);
1323 } 1492 }
1324 if (inet->cmsg_flags) 1493 if (inet->cmsg_flags)
1325 ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off); 1494 ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
1326 1495
1327 err = copied; 1496 err = copied;
1328 if (flags & MSG_TRUNC) 1497 if (flags & MSG_TRUNC)
1329 err = ulen; 1498 err = ulen;
1330 1499
1331 __skb_free_datagram_locked(sk, skb, peeking ? -err : err); 1500 skb_consume_udp(sk, skb, peeking ? -err : err);
1332 return err; 1501 return err;
1333 1502
1334csum_copy_err: 1503csum_copy_err:
1335 slow = lock_sock_fast(sk); 1504 if (!__sk_queue_drop_skb(sk, skb, flags)) {
1336 if (!skb_kill_datagram(sk, skb, flags)) {
1337 UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1505 UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
1338 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1506 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1339 } 1507 }
1340 unlock_sock_fast(sk, slow); 1508 kfree_skb(skb);
1341 1509
1342 /* starting over for a new packet, but check if we need to yield */ 1510 /* starting over for a new packet, but check if we need to yield */
1343 cond_resched(); 1511 cond_resched();
@@ -1463,9 +1631,11 @@ int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1463 sock_rps_save_rxhash(sk, skb); 1631 sock_rps_save_rxhash(sk, skb);
1464 sk_mark_napi_id(sk, skb); 1632 sk_mark_napi_id(sk, skb);
1465 sk_incoming_cpu_update(sk); 1633 sk_incoming_cpu_update(sk);
1634 } else {
1635 sk_mark_napi_id_once(sk, skb);
1466 } 1636 }
1467 1637
1468 rc = __sock_queue_rcv_skb(sk, skb); 1638 rc = __udp_enqueue_schedule_skb(sk, skb);
1469 if (rc < 0) { 1639 if (rc < 0) {
1470 int is_udplite = IS_UDPLITE(sk); 1640 int is_udplite = IS_UDPLITE(sk);
1471 1641
@@ -1480,7 +1650,6 @@ int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1480 } 1650 }
1481 1651
1482 return 0; 1652 return 0;
1483
1484} 1653}
1485 1654
1486static struct static_key udp_encap_needed __read_mostly; 1655static struct static_key udp_encap_needed __read_mostly;
@@ -1502,7 +1671,6 @@ EXPORT_SYMBOL(udp_encap_enable);
1502int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1671int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1503{ 1672{
1504 struct udp_sock *up = udp_sk(sk); 1673 struct udp_sock *up = udp_sk(sk);
1505 int rc;
1506 int is_udplite = IS_UDPLITE(sk); 1674 int is_udplite = IS_UDPLITE(sk);
1507 1675
1508 /* 1676 /*
@@ -1589,25 +1757,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1589 goto drop; 1757 goto drop;
1590 1758
1591 udp_csum_pull_header(skb); 1759 udp_csum_pull_header(skb);
1592 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
1593 __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1594 is_udplite);
1595 goto drop;
1596 }
1597
1598 rc = 0;
1599 1760
1600 ipv4_pktinfo_prepare(sk, skb); 1761 ipv4_pktinfo_prepare(sk, skb);
1601 bh_lock_sock(sk); 1762 return __udp_queue_rcv_skb(sk, skb);
1602 if (!sock_owned_by_user(sk))
1603 rc = __udp_queue_rcv_skb(sk, skb);
1604 else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
1605 bh_unlock_sock(sk);
1606 goto drop;
1607 }
1608 bh_unlock_sock(sk);
1609
1610 return rc;
1611 1763
1612csum_error: 1764csum_error:
1613 __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1765 __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
@@ -2217,13 +2369,13 @@ struct proto udp_prot = {
2217 .connect = ip4_datagram_connect, 2369 .connect = ip4_datagram_connect,
2218 .disconnect = udp_disconnect, 2370 .disconnect = udp_disconnect,
2219 .ioctl = udp_ioctl, 2371 .ioctl = udp_ioctl,
2372 .init = udp_init_sock,
2220 .destroy = udp_destroy_sock, 2373 .destroy = udp_destroy_sock,
2221 .setsockopt = udp_setsockopt, 2374 .setsockopt = udp_setsockopt,
2222 .getsockopt = udp_getsockopt, 2375 .getsockopt = udp_getsockopt,
2223 .sendmsg = udp_sendmsg, 2376 .sendmsg = udp_sendmsg,
2224 .recvmsg = udp_recvmsg, 2377 .recvmsg = udp_recvmsg,
2225 .sendpage = udp_sendpage, 2378 .sendpage = udp_sendpage,
2226 .backlog_rcv = __udp_queue_rcv_skb,
2227 .release_cb = ip4_datagram_release_cb, 2379 .release_cb = ip4_datagram_release_cb,
2228 .hash = udp_lib_hash, 2380 .hash = udp_lib_hash,
2229 .unhash = udp_lib_unhash, 2381 .unhash = udp_lib_unhash,
@@ -2512,6 +2664,7 @@ EXPORT_SYMBOL(udp_flow_hashrnd);
2512void __init udp_init(void) 2664void __init udp_init(void)
2513{ 2665{
2514 unsigned long limit; 2666 unsigned long limit;
2667 unsigned int i;
2515 2668
2516 udp_table_init(&udp_table, "UDP"); 2669 udp_table_init(&udp_table, "UDP");
2517 limit = nr_free_buffer_pages() / 8; 2670 limit = nr_free_buffer_pages() / 8;
@@ -2522,4 +2675,13 @@ void __init udp_init(void)
2522 2675
2523 sysctl_udp_rmem_min = SK_MEM_QUANTUM; 2676 sysctl_udp_rmem_min = SK_MEM_QUANTUM;
2524 sysctl_udp_wmem_min = SK_MEM_QUANTUM; 2677 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
2678
2679 /* 16 spinlocks per cpu */
2680 udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
2681 udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
2682 GFP_KERNEL);
2683 if (!udp_busylocks)
2684 panic("UDP: failed to alloc udp_busylocks\n");
2685 for (i = 0; i < (1U << udp_busylocks_log); i++)
2686 spin_lock_init(udp_busylocks + i);
2525} 2687}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index ff450c2aad9b..59f10fe9782e 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -50,10 +50,11 @@ struct proto udplite_prot = {
50 .sendmsg = udp_sendmsg, 50 .sendmsg = udp_sendmsg,
51 .recvmsg = udp_recvmsg, 51 .recvmsg = udp_recvmsg,
52 .sendpage = udp_sendpage, 52 .sendpage = udp_sendpage,
53 .backlog_rcv = __udp_queue_rcv_skb,
54 .hash = udp_lib_hash, 53 .hash = udp_lib_hash,
55 .unhash = udp_lib_unhash, 54 .unhash = udp_lib_unhash,
56 .get_port = udp_v4_get_port, 55 .get_port = udp_v4_get_port,
56 .memory_allocated = &udp_memory_allocated,
57 .sysctl_mem = sysctl_udp_mem,
57 .obj_size = sizeof(struct udp_sock), 58 .obj_size = sizeof(struct udp_sock),
58 .h.udp_table = &udplite_table, 59 .h.udp_table = &udplite_table,
59#ifdef CONFIG_COMPAT 60#ifdef CONFIG_COMPAT
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 2343e4f2e0bf..ec1267e2bd1f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -289,4 +289,39 @@ config IPV6_PIMSM_V2
289 Support for IPv6 PIM multicast routing protocol PIM-SMv2. 289 Support for IPv6 PIM multicast routing protocol PIM-SMv2.
290 If unsure, say N. 290 If unsure, say N.
291 291
292config IPV6_SEG6_LWTUNNEL
293 bool "IPv6: Segment Routing Header encapsulation support"
294 depends on IPV6
295 select LWTUNNEL
296 ---help---
297 Support for encapsulation of packets within an outer IPv6
298 header and a Segment Routing Header using the lightweight
299 tunnels mechanism.
300
301 If unsure, say N.
302
303config IPV6_SEG6_INLINE
304 bool "IPv6: direct Segment Routing Header insertion "
305 depends on IPV6_SEG6_LWTUNNEL
306 ---help---
307 Support for direct insertion of the Segment Routing Header,
308 also known as inline mode. Be aware that direct insertion of
309 extension headers (as opposed to encapsulation) may break
310 multiple mechanisms such as PMTUD or IPSec AH. Use this feature
311 only if you know exactly what you are doing.
312
313 If unsure, say N.
314
315config IPV6_SEG6_HMAC
316 bool "IPv6: Segment Routing HMAC support"
317 depends on IPV6
318 select CRYPTO_HMAC
319 select CRYPTO_SHA1
320 select CRYPTO_SHA256
321 ---help---
322 Support for HMAC signature generation and verification
323 of SR-enabled packets.
324
325 If unsure, say N.
326
292endif # IPV6 327endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index c174ccb340a1..a9e9fec387ce 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
9 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ 9 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
10 raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ 10 raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
11 exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ 11 exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
12 udp_offload.o 12 udp_offload.o seg6.o
13 13
14ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o 14ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
15 15
@@ -23,6 +23,8 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
23ipv6-$(CONFIG_PROC_FS) += proc.o 23ipv6-$(CONFIG_PROC_FS) += proc.o
24ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o 24ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
25ipv6-$(CONFIG_NETLABEL) += calipso.o 25ipv6-$(CONFIG_NETLABEL) += calipso.o
26ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o
27ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
26 28
27ipv6-objs += $(ipv6-y) 29ipv6-objs += $(ipv6-y)
28 30
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4bc5ba3ae452..c1e124bc8e1e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -238,6 +238,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
238 .use_oif_addrs_only = 0, 238 .use_oif_addrs_only = 0,
239 .ignore_routes_with_linkdown = 0, 239 .ignore_routes_with_linkdown = 0,
240 .keep_addr_on_down = 0, 240 .keep_addr_on_down = 0,
241 .seg6_enabled = 0,
242#ifdef CONFIG_IPV6_SEG6_HMAC
243 .seg6_require_hmac = 0,
244#endif
245 .enhanced_dad = 1,
241}; 246};
242 247
243static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { 248static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -284,6 +289,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
284 .use_oif_addrs_only = 0, 289 .use_oif_addrs_only = 0,
285 .ignore_routes_with_linkdown = 0, 290 .ignore_routes_with_linkdown = 0,
286 .keep_addr_on_down = 0, 291 .keep_addr_on_down = 0,
292 .seg6_enabled = 0,
293#ifdef CONFIG_IPV6_SEG6_HMAC
294 .seg6_require_hmac = 0,
295#endif
296 .enhanced_dad = 1,
287}; 297};
288 298
289/* Check if a valid qdisc is available */ 299/* Check if a valid qdisc is available */
@@ -3727,12 +3737,21 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
3727{ 3737{
3728 unsigned long rand_num; 3738 unsigned long rand_num;
3729 struct inet6_dev *idev = ifp->idev; 3739 struct inet6_dev *idev = ifp->idev;
3740 u64 nonce;
3730 3741
3731 if (ifp->flags & IFA_F_OPTIMISTIC) 3742 if (ifp->flags & IFA_F_OPTIMISTIC)
3732 rand_num = 0; 3743 rand_num = 0;
3733 else 3744 else
3734 rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); 3745 rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
3735 3746
3747 nonce = 0;
3748 if (idev->cnf.enhanced_dad ||
3749 dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) {
3750 do
3751 get_random_bytes(&nonce, 6);
3752 while (nonce == 0);
3753 }
3754 ifp->dad_nonce = nonce;
3736 ifp->dad_probes = idev->cnf.dad_transmits; 3755 ifp->dad_probes = idev->cnf.dad_transmits;
3737 addrconf_mod_dad_work(ifp, rand_num); 3756 addrconf_mod_dad_work(ifp, rand_num);
3738} 3757}
@@ -3910,7 +3929,8 @@ static void addrconf_dad_work(struct work_struct *w)
3910 3929
3911 /* send a neighbour solicitation for our addr */ 3930 /* send a neighbour solicitation for our addr */
3912 addrconf_addr_solict_mult(&ifp->addr, &mcaddr); 3931 addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
3913 ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any); 3932 ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any,
3933 ifp->dad_nonce);
3914out: 3934out:
3915 in6_ifa_put(ifp); 3935 in6_ifa_put(ifp);
3916 rtnl_unlock(); 3936 rtnl_unlock();
@@ -4950,6 +4970,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
4950 array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; 4970 array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
4951 array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; 4971 array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
4952 array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; 4972 array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
4973 array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
4974#ifdef CONFIG_IPV6_SEG6_HMAC
4975 array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
4976#endif
4977 array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
4953} 4978}
4954 4979
4955static inline size_t inet6_ifla6_size(void) 4980static inline size_t inet6_ifla6_size(void)
@@ -6042,6 +6067,29 @@ static const struct ctl_table addrconf_sysctl[] = {
6042 6067
6043 }, 6068 },
6044 { 6069 {
6070 .procname = "seg6_enabled",
6071 .data = &ipv6_devconf.seg6_enabled,
6072 .maxlen = sizeof(int),
6073 .mode = 0644,
6074 .proc_handler = proc_dointvec,
6075 },
6076#ifdef CONFIG_IPV6_SEG6_HMAC
6077 {
6078 .procname = "seg6_require_hmac",
6079 .data = &ipv6_devconf.seg6_require_hmac,
6080 .maxlen = sizeof(int),
6081 .mode = 0644,
6082 .proc_handler = proc_dointvec,
6083 },
6084#endif
6085 {
6086 .procname = "enhanced_dad",
6087 .data = &ipv6_devconf.enhanced_dad,
6088 .maxlen = sizeof(int),
6089 .mode = 0644,
6090 .proc_handler = proc_dointvec,
6091 },
6092 {
6045 /* sentinel */ 6093 /* sentinel */
6046 } 6094 }
6047}; 6095};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 46ad699937fd..aa42123bc301 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -61,8 +61,9 @@
61#include <net/ip6_tunnel.h> 61#include <net/ip6_tunnel.h>
62#endif 62#endif
63#include <net/calipso.h> 63#include <net/calipso.h>
64#include <net/seg6.h>
64 65
65#include <asm/uaccess.h> 66#include <linux/uaccess.h>
66#include <linux/mroute6.h> 67#include <linux/mroute6.h>
67 68
68#include "ip6_offload.h" 69#include "ip6_offload.h"
@@ -257,6 +258,14 @@ lookup_protocol:
257 goto out; 258 goto out;
258 } 259 }
259 } 260 }
261
262 if (!kern) {
263 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
264 if (err) {
265 sk_common_release(sk);
266 goto out;
267 }
268 }
260out: 269out:
261 return err; 270 return err;
262out_rcu_unlock: 271out_rcu_unlock:
@@ -678,6 +687,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
678 fl6.flowi6_mark = sk->sk_mark; 687 fl6.flowi6_mark = sk->sk_mark;
679 fl6.fl6_dport = inet->inet_dport; 688 fl6.fl6_dport = inet->inet_dport;
680 fl6.fl6_sport = inet->inet_sport; 689 fl6.fl6_sport = inet->inet_sport;
690 fl6.flowi6_uid = sk->sk_uid;
681 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 691 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
682 692
683 rcu_read_lock(); 693 rcu_read_lock();
@@ -990,6 +1000,10 @@ static int __init inet6_init(void)
990 if (err) 1000 if (err)
991 goto calipso_fail; 1001 goto calipso_fail;
992 1002
1003 err = seg6_init();
1004 if (err)
1005 goto seg6_fail;
1006
993#ifdef CONFIG_SYSCTL 1007#ifdef CONFIG_SYSCTL
994 err = ipv6_sysctl_register(); 1008 err = ipv6_sysctl_register();
995 if (err) 1009 if (err)
@@ -1000,8 +1014,10 @@ out:
1000 1014
1001#ifdef CONFIG_SYSCTL 1015#ifdef CONFIG_SYSCTL
1002sysctl_fail: 1016sysctl_fail:
1003 calipso_exit(); 1017 seg6_exit();
1004#endif 1018#endif
1019seg6_fail:
1020 calipso_exit();
1005calipso_fail: 1021calipso_fail:
1006 pingv6_exit(); 1022 pingv6_exit();
1007pingv6_fail: 1023pingv6_fail:
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 0630a4d5daaa..189eb10b742d 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
662 return 0; 662 return 0;
663 663
664 if (type == NDISC_REDIRECT) 664 if (type == NDISC_REDIRECT)
665 ip6_redirect(skb, net, skb->dev->ifindex, 0); 665 ip6_redirect(skb, net, skb->dev->ifindex, 0,
666 sock_net_uid(net, NULL));
666 else 667 else
667 ip6_update_pmtu(skb, net, info, 0, 0); 668 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
668 xfrm_state_put(x); 669 xfrm_state_put(x);
669 670
670 return 0; 671 return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ccf40550c475..a3eaafd87100 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -33,7 +33,7 @@
33#include <net/dsfield.h> 33#include <net/dsfield.h>
34 34
35#include <linux/errqueue.h> 35#include <linux/errqueue.h>
36#include <asm/uaccess.h> 36#include <linux/uaccess.h>
37 37
38static bool ipv6_mapped_addr_any(const struct in6_addr *a) 38static bool ipv6_mapped_addr_any(const struct in6_addr *a)
39{ 39{
@@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
54 fl6->fl6_dport = inet->inet_dport; 54 fl6->fl6_dport = inet->inet_dport;
55 fl6->fl6_sport = inet->inet_sport; 55 fl6->fl6_sport = inet->inet_sport;
56 fl6->flowlabel = np->flow_label; 56 fl6->flowlabel = np->flow_label;
57 fl6->flowi6_uid = sk->sk_uid;
57 58
58 if (!fl6->flowi6_oif) 59 if (!fl6->flowi6_oif)
59 fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; 60 fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
@@ -700,7 +701,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
700 struct sockaddr_in6 sin6; 701 struct sockaddr_in6 sin6;
701 __be16 *ports = (__be16 *) skb_transport_header(skb); 702 __be16 *ports = (__be16 *) skb_transport_header(skb);
702 703
703 if (skb_transport_offset(skb) + 4 <= skb->len) { 704 if (skb_transport_offset(skb) + 4 <= (int)skb->len) {
704 /* All current transport protocols have the port numbers in the 705 /* All current transport protocols have the port numbers in the
705 * first four bytes of the transport header and this function is 706 * first four bytes of the transport header and this function is
706 * written with this assumption in mind. 707 * written with this assumption in mind.
@@ -717,6 +718,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
717 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); 718 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
718 } 719 }
719 } 720 }
721 if (np->rxopt.bits.recvfragsize && opt->frag_max_size) {
722 int val = opt->frag_max_size;
723
724 put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val);
725 }
720} 726}
721 727
722void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, 728void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 111ba55fd512..cbcdd5db31f4 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
474 return 0; 474 return 0;
475 475
476 if (type == NDISC_REDIRECT) 476 if (type == NDISC_REDIRECT)
477 ip6_redirect(skb, net, skb->dev->ifindex, 0); 477 ip6_redirect(skb, net, skb->dev->ifindex, 0,
478 sock_net_uid(net, NULL));
478 else 479 else
479 ip6_update_pmtu(skb, net, info, 0, 0); 480 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
480 xfrm_state_put(x); 481 xfrm_state_put(x);
481 482
482 return 0; 483 return 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 139ceb68bd37..e4198502fd98 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -47,6 +47,11 @@
47#if IS_ENABLED(CONFIG_IPV6_MIP6) 47#if IS_ENABLED(CONFIG_IPV6_MIP6)
48#include <net/xfrm.h> 48#include <net/xfrm.h>
49#endif 49#endif
50#include <linux/seg6.h>
51#include <net/seg6.h>
52#ifdef CONFIG_IPV6_SEG6_HMAC
53#include <net/seg6_hmac.h>
54#endif
50 55
51#include <linux/uaccess.h> 56#include <linux/uaccess.h>
52 57
@@ -227,7 +232,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
227 ipv6h->saddr = hao->addr; 232 ipv6h->saddr = hao->addr;
228 hao->addr = tmp_addr; 233 hao->addr = tmp_addr;
229 234
230 if (skb->tstamp.tv64 == 0) 235 if (skb->tstamp == 0)
231 __net_timestamp(skb); 236 __net_timestamp(skb);
232 237
233 return true; 238 return true;
@@ -286,6 +291,182 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
286 return -1; 291 return -1;
287} 292}
288 293
294static void seg6_update_csum(struct sk_buff *skb)
295{
296 struct ipv6_sr_hdr *hdr;
297 struct in6_addr *addr;
298 __be32 from, to;
299
300 /* srh is at transport offset and seg_left is already decremented
301 * but daddr is not yet updated with next segment
302 */
303
304 hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
305 addr = hdr->segments + hdr->segments_left;
306
307 hdr->segments_left++;
308 from = *(__be32 *)hdr;
309
310 hdr->segments_left--;
311 to = *(__be32 *)hdr;
312
313 /* update skb csum with diff resulting from seg_left decrement */
314
315 update_csum_diff4(skb, from, to);
316
317 /* compute csum diff between current and next segment and update */
318
319 update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr),
320 (__be32 *)addr);
321}
322
323static int ipv6_srh_rcv(struct sk_buff *skb)
324{
325 struct inet6_skb_parm *opt = IP6CB(skb);
326 struct net *net = dev_net(skb->dev);
327 struct ipv6_sr_hdr *hdr;
328 struct inet6_dev *idev;
329 struct in6_addr *addr;
330 bool cleanup = false;
331 int accept_seg6;
332
333 hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
334
335 idev = __in6_dev_get(skb->dev);
336
337 accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
338 if (accept_seg6 > idev->cnf.seg6_enabled)
339 accept_seg6 = idev->cnf.seg6_enabled;
340
341 if (!accept_seg6) {
342 kfree_skb(skb);
343 return -1;
344 }
345
346#ifdef CONFIG_IPV6_SEG6_HMAC
347 if (!seg6_hmac_validate_skb(skb)) {
348 kfree_skb(skb);
349 return -1;
350 }
351#endif
352
353looped_back:
354 if (hdr->segments_left > 0) {
355 if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 &&
356 sr_has_cleanup(hdr))
357 cleanup = true;
358 } else {
359 if (hdr->nexthdr == NEXTHDR_IPV6) {
360 int offset = (hdr->hdrlen + 1) << 3;
361
362 skb_postpull_rcsum(skb, skb_network_header(skb),
363 skb_network_header_len(skb));
364
365 if (!pskb_pull(skb, offset)) {
366 kfree_skb(skb);
367 return -1;
368 }
369 skb_postpull_rcsum(skb, skb_transport_header(skb),
370 offset);
371
372 skb_reset_network_header(skb);
373 skb_reset_transport_header(skb);
374 skb->encapsulation = 0;
375
376 __skb_tunnel_rx(skb, skb->dev, net);
377
378 netif_rx(skb);
379 return -1;
380 }
381
382 opt->srcrt = skb_network_header_len(skb);
383 opt->lastopt = opt->srcrt;
384 skb->transport_header += (hdr->hdrlen + 1) << 3;
385 opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
386
387 return 1;
388 }
389
390 if (hdr->segments_left >= (hdr->hdrlen >> 1)) {
391 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
392 IPSTATS_MIB_INHDRERRORS);
393 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
394 ((&hdr->segments_left) -
395 skb_network_header(skb)));
396 kfree_skb(skb);
397 return -1;
398 }
399
400 if (skb_cloned(skb)) {
401 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
402 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
403 IPSTATS_MIB_OUTDISCARDS);
404 kfree_skb(skb);
405 return -1;
406 }
407 }
408
409 hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
410
411 hdr->segments_left--;
412 addr = hdr->segments + hdr->segments_left;
413
414 skb_push(skb, sizeof(struct ipv6hdr));
415
416 if (skb->ip_summed == CHECKSUM_COMPLETE)
417 seg6_update_csum(skb);
418
419 ipv6_hdr(skb)->daddr = *addr;
420
421 if (cleanup) {
422 int srhlen = (hdr->hdrlen + 1) << 3;
423 int nh = hdr->nexthdr;
424
425 skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen);
426 memmove(skb_network_header(skb) + srhlen,
427 skb_network_header(skb),
428 (unsigned char *)hdr - skb_network_header(skb));
429 skb->network_header += srhlen;
430 ipv6_hdr(skb)->nexthdr = nh;
431 ipv6_hdr(skb)->payload_len = htons(skb->len -
432 sizeof(struct ipv6hdr));
433 skb_push_rcsum(skb, sizeof(struct ipv6hdr));
434 }
435
436 skb_dst_drop(skb);
437
438 ip6_route_input(skb);
439
440 if (skb_dst(skb)->error) {
441 dst_input(skb);
442 return -1;
443 }
444
445 if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
446 if (ipv6_hdr(skb)->hop_limit <= 1) {
447 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
448 IPSTATS_MIB_INHDRERRORS);
449 icmpv6_send(skb, ICMPV6_TIME_EXCEED,
450 ICMPV6_EXC_HOPLIMIT, 0);
451 kfree_skb(skb);
452 return -1;
453 }
454 ipv6_hdr(skb)->hop_limit--;
455
456 /* be sure that srh is still present before reinjecting */
457 if (!cleanup) {
458 skb_pull(skb, sizeof(struct ipv6hdr));
459 goto looped_back;
460 }
461 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
462 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
463 }
464
465 dst_input(skb);
466
467 return -1;
468}
469
289/******************************** 470/********************************
290 Routing header. 471 Routing header.
291 ********************************/ 472 ********************************/
@@ -326,6 +507,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
326 return -1; 507 return -1;
327 } 508 }
328 509
510 /* segment routing */
511 if (hdr->type == IPV6_SRCRT_TYPE_4)
512 return ipv6_srh_rcv(skb);
513
329looped_back: 514looped_back:
330 if (hdr->segments_left == 0) { 515 if (hdr->segments_left == 0) {
331 switch (hdr->type) { 516 switch (hdr->type) {
@@ -679,9 +864,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
679 * for headers. 864 * for headers.
680 */ 865 */
681 866
682static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, 867static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto,
683 struct ipv6_rt_hdr *opt, 868 struct ipv6_rt_hdr *opt,
684 struct in6_addr **addr_p) 869 struct in6_addr **addr_p, struct in6_addr *saddr)
685{ 870{
686 struct rt0_hdr *phdr, *ihdr; 871 struct rt0_hdr *phdr, *ihdr;
687 int hops; 872 int hops;
@@ -704,6 +889,62 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
704 *proto = NEXTHDR_ROUTING; 889 *proto = NEXTHDR_ROUTING;
705} 890}
706 891
892static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
893 struct ipv6_rt_hdr *opt,
894 struct in6_addr **addr_p, struct in6_addr *saddr)
895{
896 struct ipv6_sr_hdr *sr_phdr, *sr_ihdr;
897 int plen, hops;
898
899 sr_ihdr = (struct ipv6_sr_hdr *)opt;
900 plen = (sr_ihdr->hdrlen + 1) << 3;
901
902 sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen);
903 memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr));
904
905 hops = sr_ihdr->first_segment + 1;
906 memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1,
907 (hops - 1) * sizeof(struct in6_addr));
908
909 sr_phdr->segments[0] = **addr_p;
910 *addr_p = &sr_ihdr->segments[hops - 1];
911
912#ifdef CONFIG_IPV6_SEG6_HMAC
913 if (sr_has_hmac(sr_phdr)) {
914 struct net *net = NULL;
915
916 if (skb->dev)
917 net = dev_net(skb->dev);
918 else if (skb->sk)
919 net = sock_net(skb->sk);
920
921 WARN_ON(!net);
922
923 if (net)
924 seg6_push_hmac(net, saddr, sr_phdr);
925 }
926#endif
927
928 sr_phdr->nexthdr = *proto;
929 *proto = NEXTHDR_ROUTING;
930}
931
932static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
933 struct ipv6_rt_hdr *opt,
934 struct in6_addr **addr_p, struct in6_addr *saddr)
935{
936 switch (opt->type) {
937 case IPV6_SRCRT_TYPE_0:
938 ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
939 break;
940 case IPV6_SRCRT_TYPE_4:
941 ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr);
942 break;
943 default:
944 break;
945 }
946}
947
707static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) 948static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
708{ 949{
709 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt)); 950 struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
@@ -715,10 +956,10 @@ static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv
715 956
716void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, 957void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
717 u8 *proto, 958 u8 *proto,
718 struct in6_addr **daddr) 959 struct in6_addr **daddr, struct in6_addr *saddr)
719{ 960{
720 if (opt->srcrt) { 961 if (opt->srcrt) {
721 ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); 962 ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr);
722 /* 963 /*
723 * IPV6_RTHDRDSTOPTS is ignored 964 * IPV6_RTHDRDSTOPTS is ignored
724 * unless IPV6_RTHDR is set (RFC3542). 965 * unless IPV6_RTHDR is set (RFC3542).
@@ -945,7 +1186,22 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
945 return NULL; 1186 return NULL;
946 1187
947 *orig = fl6->daddr; 1188 *orig = fl6->daddr;
948 fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; 1189
1190 switch (opt->srcrt->type) {
1191 case IPV6_SRCRT_TYPE_0:
1192 fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
1193 break;
1194 case IPV6_SRCRT_TYPE_4:
1195 {
1196 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt;
1197
1198 fl6->daddr = srh->segments[srh->first_segment];
1199 break;
1200 }
1201 default:
1202 return NULL;
1203 }
1204
949 return orig; 1205 return orig;
950} 1206}
951EXPORT_SYMBOL_GPL(fl6_update_dst); 1207EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2772004ba5a1..3036f665e6c8 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -70,7 +70,7 @@
70#include <net/dsfield.h> 70#include <net/dsfield.h>
71#include <net/l3mdev.h> 71#include <net/l3mdev.h>
72 72
73#include <asm/uaccess.h> 73#include <linux/uaccess.h>
74 74
75/* 75/*
76 * The ICMP socket(s). This is the most convenient way to flow control 76 * The ICMP socket(s). This is the most convenient way to flow control
@@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
92 struct net *net = dev_net(skb->dev); 92 struct net *net = dev_net(skb->dev);
93 93
94 if (type == ICMPV6_PKT_TOOBIG) 94 if (type == ICMPV6_PKT_TOOBIG)
95 ip6_update_pmtu(skb, net, info, 0, 0); 95 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 else if (type == NDISC_REDIRECT) 96 else if (type == NDISC_REDIRECT)
97 ip6_redirect(skb, net, skb->dev->ifindex, 0); 97 ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 sock_net_uid(net, NULL));
98 99
99 if (!(type & ICMPV6_INFOMSG_MASK)) 100 if (!(type & ICMPV6_INFOMSG_MASK))
100 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) 101 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
@@ -486,6 +487,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
486 fl6.flowi6_oif = iif; 487 fl6.flowi6_oif = iif;
487 fl6.fl6_icmp_type = type; 488 fl6.fl6_icmp_type = type;
488 fl6.fl6_icmp_code = code; 489 fl6.fl6_icmp_code = code;
490 fl6.flowi6_uid = sock_net_uid(net, NULL);
489 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 491 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
490 492
491 sk = icmpv6_xmit_lock(net); 493 sk = icmpv6_xmit_lock(net);
@@ -660,6 +662,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
660 fl6.flowi6_oif = skb->dev->ifindex; 662 fl6.flowi6_oif = skb->dev->ifindex;
661 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; 663 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
662 fl6.flowi6_mark = mark; 664 fl6.flowi6_mark = mark;
665 fl6.flowi6_uid = sock_net_uid(net, NULL);
663 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 666 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
664 667
665 sk = icmpv6_xmit_lock(net); 668 sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index e50c27a93e17..a7bc54ab46e2 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -6,29 +6,88 @@
6#include <linux/socket.h> 6#include <linux/socket.h>
7#include <linux/types.h> 7#include <linux/types.h>
8#include <net/checksum.h> 8#include <net/checksum.h>
9#include <net/dst_cache.h>
9#include <net/ip.h> 10#include <net/ip.h>
10#include <net/ip6_fib.h> 11#include <net/ip6_fib.h>
12#include <net/ip6_route.h>
11#include <net/lwtunnel.h> 13#include <net/lwtunnel.h>
12#include <net/protocol.h> 14#include <net/protocol.h>
13#include <uapi/linux/ila.h> 15#include <uapi/linux/ila.h>
14#include "ila.h" 16#include "ila.h"
15 17
18struct ila_lwt {
19 struct ila_params p;
20 struct dst_cache dst_cache;
21 u32 connected : 1;
22};
23
24static inline struct ila_lwt *ila_lwt_lwtunnel(
25 struct lwtunnel_state *lwt)
26{
27 return (struct ila_lwt *)lwt->data;
28}
29
16static inline struct ila_params *ila_params_lwtunnel( 30static inline struct ila_params *ila_params_lwtunnel(
17 struct lwtunnel_state *lwstate) 31 struct lwtunnel_state *lwt)
18{ 32{
19 return (struct ila_params *)lwstate->data; 33 return &ila_lwt_lwtunnel(lwt)->p;
20} 34}
21 35
22static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) 36static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
23{ 37{
24 struct dst_entry *dst = skb_dst(skb); 38 struct dst_entry *orig_dst = skb_dst(skb);
39 struct rt6_info *rt = (struct rt6_info *)orig_dst;
40 struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate);
41 struct dst_entry *dst;
42 int err = -EINVAL;
25 43
26 if (skb->protocol != htons(ETH_P_IPV6)) 44 if (skb->protocol != htons(ETH_P_IPV6))
27 goto drop; 45 goto drop;
28 46
29 ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true); 47 ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate),
48 true);
30 49
31 return dst->lwtstate->orig_output(net, sk, skb); 50 if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
51 /* Already have a next hop address in route, no need for
52 * dest cache route.
53 */
54 return orig_dst->lwtstate->orig_output(net, sk, skb);
55 }
56
57 dst = dst_cache_get(&ilwt->dst_cache);
58 if (unlikely(!dst)) {
59 struct ipv6hdr *ip6h = ipv6_hdr(skb);
60 struct flowi6 fl6;
61
62 /* Lookup a route for the new destination. Take into
63 * account that the base route may already have a gateway.
64 */
65
66 memset(&fl6, 0, sizeof(fl6));
67 fl6.flowi6_oif = orig_dst->dev->ifindex;
68 fl6.flowi6_iif = LOOPBACK_IFINDEX;
69 fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst,
70 &ip6h->daddr);
71
72 dst = ip6_route_output(net, NULL, &fl6);
73 if (dst->error) {
74 err = -EHOSTUNREACH;
75 dst_release(dst);
76 goto drop;
77 }
78
79 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
80 if (IS_ERR(dst)) {
81 err = PTR_ERR(dst);
82 goto drop;
83 }
84
85 if (ilwt->connected)
86 dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
87 }
88
89 skb_dst_set(skb, dst);
90 return dst_output(net, sk, skb);
32 91
33drop: 92drop:
34 kfree_skb(skb); 93 kfree_skb(skb);
@@ -60,9 +119,9 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
60 unsigned int family, const void *cfg, 119 unsigned int family, const void *cfg,
61 struct lwtunnel_state **ts) 120 struct lwtunnel_state **ts)
62{ 121{
122 struct ila_lwt *ilwt;
63 struct ila_params *p; 123 struct ila_params *p;
64 struct nlattr *tb[ILA_ATTR_MAX + 1]; 124 struct nlattr *tb[ILA_ATTR_MAX + 1];
65 size_t encap_len = sizeof(*p);
66 struct lwtunnel_state *newts; 125 struct lwtunnel_state *newts;
67 const struct fib6_config *cfg6 = cfg; 126 const struct fib6_config *cfg6 = cfg;
68 struct ila_addr *iaddr; 127 struct ila_addr *iaddr;
@@ -71,7 +130,7 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
71 if (family != AF_INET6) 130 if (family != AF_INET6)
72 return -EINVAL; 131 return -EINVAL;
73 132
74 if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) { 133 if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
75 /* Need to have full locator and at least type field 134 /* Need to have full locator and at least type field
76 * included in destination 135 * included in destination
77 */ 136 */
@@ -95,11 +154,17 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
95 if (!tb[ILA_ATTR_LOCATOR]) 154 if (!tb[ILA_ATTR_LOCATOR])
96 return -EINVAL; 155 return -EINVAL;
97 156
98 newts = lwtunnel_state_alloc(encap_len); 157 newts = lwtunnel_state_alloc(sizeof(*ilwt));
99 if (!newts) 158 if (!newts)
100 return -ENOMEM; 159 return -ENOMEM;
101 160
102 newts->len = encap_len; 161 ilwt = ila_lwt_lwtunnel(newts);
162 ret = dst_cache_init(&ilwt->dst_cache, GFP_ATOMIC);
163 if (ret) {
164 kfree(newts);
165 return ret;
166 }
167
103 p = ila_params_lwtunnel(newts); 168 p = ila_params_lwtunnel(newts);
104 169
105 p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); 170 p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
@@ -120,11 +185,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
120 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | 185 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
121 LWTUNNEL_STATE_INPUT_REDIRECT; 186 LWTUNNEL_STATE_INPUT_REDIRECT;
122 187
188 if (cfg6->fc_dst_len == 8 * sizeof(struct in6_addr))
189 ilwt->connected = 1;
190
123 *ts = newts; 191 *ts = newts;
124 192
125 return 0; 193 return 0;
126} 194}
127 195
196static void ila_destroy_state(struct lwtunnel_state *lwt)
197{
198 dst_cache_destroy(&ila_lwt_lwtunnel(lwt)->dst_cache);
199}
200
128static int ila_fill_encap_info(struct sk_buff *skb, 201static int ila_fill_encap_info(struct sk_buff *skb,
129 struct lwtunnel_state *lwtstate) 202 struct lwtunnel_state *lwtstate)
130{ 203{
@@ -159,6 +232,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
159 232
160static const struct lwtunnel_encap_ops ila_encap_ops = { 233static const struct lwtunnel_encap_ops ila_encap_ops = {
161 .build_state = ila_build_state, 234 .build_state = ila_build_state,
235 .destroy_state = ila_destroy_state,
162 .output = ila_output, 236 .output = ila_output,
163 .input = ila_input, 237 .input = ila_input,
164 .fill_encap = ila_fill_encap_info, 238 .fill_encap = ila_fill_encap_info,
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index e604013dd814..af8f52ee7180 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -118,15 +118,7 @@ static const struct rhashtable_params rht_params = {
118 .obj_cmpfn = ila_cmpfn, 118 .obj_cmpfn = ila_cmpfn,
119}; 119};
120 120
121static struct genl_family ila_nl_family = { 121static struct genl_family ila_nl_family;
122 .id = GENL_ID_GENERATE,
123 .hdrsize = 0,
124 .name = ILA_GENL_NAME,
125 .version = ILA_GENL_VERSION,
126 .maxattr = ILA_ATTR_MAX,
127 .netnsok = true,
128 .parallel_ops = true,
129};
130 122
131static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { 123static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
132 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, 124 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
@@ -482,7 +474,15 @@ static int ila_nl_dump_start(struct netlink_callback *cb)
482{ 474{
483 struct net *net = sock_net(cb->skb->sk); 475 struct net *net = sock_net(cb->skb->sk);
484 struct ila_net *ilan = net_generic(net, ila_net_id); 476 struct ila_net *ilan = net_generic(net, ila_net_id);
485 struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; 477 struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
478
479 if (!iter) {
480 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
481 if (!iter)
482 return -ENOMEM;
483
484 cb->args[0] = (long)iter;
485 }
486 486
487 return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter, 487 return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter,
488 GFP_KERNEL); 488 GFP_KERNEL);
@@ -490,16 +490,18 @@ static int ila_nl_dump_start(struct netlink_callback *cb)
490 490
491static int ila_nl_dump_done(struct netlink_callback *cb) 491static int ila_nl_dump_done(struct netlink_callback *cb)
492{ 492{
493 struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; 493 struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
494 494
495 rhashtable_walk_exit(&iter->rhiter); 495 rhashtable_walk_exit(&iter->rhiter);
496 496
497 kfree(iter);
498
497 return 0; 499 return 0;
498} 500}
499 501
500static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) 502static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
501{ 503{
502 struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; 504 struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
503 struct rhashtable_iter *rhiter = &iter->rhiter; 505 struct rhashtable_iter *rhiter = &iter->rhiter;
504 struct ila_map *ila; 506 struct ila_map *ila;
505 int ret; 507 int ret;
@@ -561,6 +563,18 @@ static const struct genl_ops ila_nl_ops[] = {
561 }, 563 },
562}; 564};
563 565
566static struct genl_family ila_nl_family __ro_after_init = {
567 .hdrsize = 0,
568 .name = ILA_GENL_NAME,
569 .version = ILA_GENL_VERSION,
570 .maxattr = ILA_ATTR_MAX,
571 .netnsok = true,
572 .parallel_ops = true,
573 .module = THIS_MODULE,
574 .ops = ila_nl_ops,
575 .n_ops = ARRAY_SIZE(ila_nl_ops),
576};
577
564#define ILA_HASH_TABLE_SIZE 1024 578#define ILA_HASH_TABLE_SIZE 1024
565 579
566static __net_init int ila_init_net(struct net *net) 580static __net_init int ila_init_net(struct net *net)
@@ -623,7 +637,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
623 return 0; 637 return 0;
624} 638}
625 639
626int ila_xlat_init(void) 640int __init ila_xlat_init(void)
627{ 641{
628 int ret; 642 int ret;
629 643
@@ -631,8 +645,7 @@ int ila_xlat_init(void)
631 if (ret) 645 if (ret)
632 goto exit; 646 goto exit;
633 647
634 ret = genl_register_family_with_ops(&ila_nl_family, 648 ret = genl_register_family(&ila_nl_family);
635 ila_nl_ops);
636 if (ret < 0) 649 if (ret < 0)
637 goto unregister; 650 goto unregister;
638 651
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 532c3ef282c5..7396e75e161b 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -29,11 +29,12 @@
29#include <net/sock_reuseport.h> 29#include <net/sock_reuseport.h>
30 30
31int inet6_csk_bind_conflict(const struct sock *sk, 31int inet6_csk_bind_conflict(const struct sock *sk,
32 const struct inet_bind_bucket *tb, bool relax) 32 const struct inet_bind_bucket *tb, bool relax,
33 bool reuseport_ok)
33{ 34{
34 const struct sock *sk2; 35 const struct sock *sk2;
35 int reuse = sk->sk_reuse; 36 bool reuse = !!sk->sk_reuse;
36 int reuseport = sk->sk_reuseport; 37 bool reuseport = !!sk->sk_reuseport && reuseport_ok;
37 kuid_t uid = sock_i_uid((struct sock *)sk); 38 kuid_t uid = sock_i_uid((struct sock *)sk);
38 39
39 /* We must walk the whole port owner list in this case. -DaveM */ 40 /* We must walk the whole port owner list in this case. -DaveM */
@@ -88,6 +89,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
88 fl6->flowi6_mark = ireq->ir_mark; 89 fl6->flowi6_mark = ireq->ir_mark;
89 fl6->fl6_dport = ireq->ir_rmt_port; 90 fl6->fl6_dport = ireq->ir_rmt_port;
90 fl6->fl6_sport = htons(ireq->ir_num); 91 fl6->fl6_sport = htons(ireq->ir_num);
92 fl6->flowi6_uid = sk->sk_uid;
91 security_req_classify_flow(req, flowi6_to_flowi(fl6)); 93 security_req_classify_flow(req, flowi6_to_flowi(fl6));
92 94
93 dst = ip6_dst_lookup_flow(sk, fl6, final_p); 95 dst = ip6_dst_lookup_flow(sk, fl6, final_p);
@@ -136,6 +138,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
136 fl6->flowi6_mark = sk->sk_mark; 138 fl6->flowi6_mark = sk->sk_mark;
137 fl6->fl6_sport = inet->inet_sport; 139 fl6->fl6_sport = inet->inet_sport;
138 fl6->fl6_dport = inet->inet_dport; 140 fl6->fl6_dport = inet->inet_dport;
141 fl6->flowi6_uid = sk->sk_uid;
139 security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); 142 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
140 143
141 rcu_read_lock(); 144 rcu_read_lock();
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b912f0dbaf72..8081bafe441b 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -29,7 +29,7 @@
29#include <net/rawv6.h> 29#include <net/rawv6.h>
30#include <net/transp_v6.h> 30#include <net/transp_v6.h>
31 31
32#include <asm/uaccess.h> 32#include <linux/uaccess.h>
33 33
34#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified 34#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
35 in old IPv6 RFC. Well, it was reasonable value. 35 in old IPv6 RFC. Well, it was reasonable value.
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d7d6d3ae0b3b..75b6108234dd 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -64,7 +64,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
64#define IP6_GRE_HASH_SIZE_SHIFT 5 64#define IP6_GRE_HASH_SIZE_SHIFT 5
65#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT) 65#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
66 66
67static int ip6gre_net_id __read_mostly; 67static unsigned int ip6gre_net_id __read_mostly;
68struct ip6gre_net { 68struct ip6gre_net {
69 struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; 69 struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
70 70
@@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
548 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 548 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
549 fl6.flowi6_mark = skb->mark; 549 fl6.flowi6_mark = skb->mark;
550 550
551 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
552
551 err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); 553 err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
552 if (err) 554 if (err)
553 return -1; 555 return -1;
@@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
602 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 604 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
603 fl6.flowi6_mark = skb->mark; 605 fl6.flowi6_mark = skb->mark;
604 606
607 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
608
605 if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) 609 if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
606 return -1; 610 return -1;
607 611
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 59eb4ed99ce8..70d0de404197 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -39,6 +39,7 @@
39#include <linux/module.h> 39#include <linux/module.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41 41
42#include <linux/bpf-cgroup.h>
42#include <linux/netfilter.h> 43#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h> 44#include <linux/netfilter_ipv6.h>
44 45
@@ -131,6 +132,14 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
131 132
132static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 133static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
133{ 134{
135 int ret;
136
137 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
138 if (ret) {
139 kfree_skb(skb);
140 return ret;
141 }
142
134 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 143 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
135 dst_allfrag(skb_dst(skb)) || 144 dst_allfrag(skb_dst(skb)) ||
136 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 145 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -203,7 +212,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
203 if (opt->opt_flen) 212 if (opt->opt_flen)
204 ipv6_push_frag_opts(skb, opt, &proto); 213 ipv6_push_frag_opts(skb, opt, &proto);
205 if (opt->opt_nflen) 214 if (opt->opt_nflen)
206 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 215 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
216 &fl6->saddr);
207 } 217 }
208 218
209 skb_push(skb, sizeof(struct ipv6hdr)); 219 skb_push(skb, sizeof(struct ipv6hdr));
@@ -624,7 +634,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
624 634
625 hroom = LL_RESERVED_SPACE(rt->dst.dev); 635 hroom = LL_RESERVED_SPACE(rt->dst.dev);
626 if (skb_has_frag_list(skb)) { 636 if (skb_has_frag_list(skb)) {
627 int first_len = skb_pagelen(skb); 637 unsigned int first_len = skb_pagelen(skb);
628 struct sk_buff *frag2; 638 struct sk_buff *frag2;
629 639
630 if (first_len - hlen > mtu || 640 if (first_len - hlen > mtu ||
@@ -1672,7 +1682,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
1672 if (opt && opt->opt_flen) 1682 if (opt && opt->opt_flen)
1673 ipv6_push_frag_opts(skb, opt, &proto); 1683 ipv6_push_frag_opts(skb, opt, &proto);
1674 if (opt && opt->opt_nflen) 1684 if (opt && opt->opt_nflen)
1675 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1685 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1676 1686
1677 skb_push(skb, sizeof(struct ipv6hdr)); 1687 skb_push(skb, sizeof(struct ipv6hdr));
1678 skb_reset_network_header(skb); 1688 skb_reset_network_header(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index d76674efe523..36d292180942 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -42,7 +42,7 @@
42#include <linux/hash.h> 42#include <linux/hash.h>
43#include <linux/etherdevice.h> 43#include <linux/etherdevice.h>
44 44
45#include <asm/uaccess.h> 45#include <linux/uaccess.h>
46#include <linux/atomic.h> 46#include <linux/atomic.h>
47 47
48#include <net/icmp.h> 48#include <net/icmp.h>
@@ -83,7 +83,7 @@ static int ip6_tnl_dev_init(struct net_device *dev);
83static void ip6_tnl_dev_setup(struct net_device *dev); 83static void ip6_tnl_dev_setup(struct net_device *dev);
84static struct rtnl_link_ops ip6_link_ops __read_mostly; 84static struct rtnl_link_ops ip6_link_ops __read_mostly;
85 85
86static int ip6_tnl_net_id __read_mostly; 86static unsigned int ip6_tnl_net_id __read_mostly;
87struct ip6_tnl_net { 87struct ip6_tnl_net {
88 /* the IPv6 tunnel fallback device */ 88 /* the IPv6 tunnel fallback device */
89 struct net_device *fb_tnl_dev; 89 struct net_device *fb_tnl_dev;
@@ -1166,7 +1166,7 @@ route_lookup:
1166 1166
1167 if (encap_limit >= 0) { 1167 if (encap_limit >= 0) {
1168 init_tel_txopt(&opt, encap_limit); 1168 init_tel_txopt(&opt, encap_limit);
1169 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 1169 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL);
1170 } 1170 }
1171 1171
1172 /* Calculate max headroom for all the headers and adjust 1172 /* Calculate max headroom for all the headers and adjust
@@ -1248,6 +1248,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1248 fl6.flowi6_mark = skb->mark; 1248 fl6.flowi6_mark = skb->mark;
1249 } 1249 }
1250 1250
1251 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
1252
1251 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) 1253 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
1252 return -1; 1254 return -1;
1253 1255
@@ -1326,6 +1328,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1326 fl6.flowi6_mark = skb->mark; 1328 fl6.flowi6_mark = skb->mark;
1327 } 1329 }
1328 1330
1331 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
1332
1329 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) 1333 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
1330 return -1; 1334 return -1;
1331 1335
@@ -1645,7 +1649,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1645 struct ip6_tnl *tnl = netdev_priv(dev); 1649 struct ip6_tnl *tnl = netdev_priv(dev);
1646 1650
1647 if (tnl->parms.proto == IPPROTO_IPIP) { 1651 if (tnl->parms.proto == IPPROTO_IPIP) {
1648 if (new_mtu < 68) 1652 if (new_mtu < ETH_MIN_MTU)
1649 return -EINVAL; 1653 return -EINVAL;
1650 } else { 1654 } else {
1651 if (new_mtu < IPV6_MIN_MTU) 1655 if (new_mtu < IPV6_MIN_MTU)
@@ -1798,6 +1802,8 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1798 dev->mtu = ETH_DATA_LEN - t_hlen; 1802 dev->mtu = ETH_DATA_LEN - t_hlen;
1799 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1803 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1800 dev->mtu -= 8; 1804 dev->mtu -= 8;
1805 dev->min_mtu = ETH_MIN_MTU;
1806 dev->max_mtu = 0xFFF8 - dev->hard_header_len;
1801 1807
1802 return 0; 1808 return 0;
1803 1809
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c299c1e2bbf0..f4b4a4a5f4ba 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -64,7 +64,7 @@ static int vti6_dev_init(struct net_device *dev);
64static void vti6_dev_setup(struct net_device *dev); 64static void vti6_dev_setup(struct net_device *dev);
65static struct rtnl_link_ops vti6_link_ops __read_mostly; 65static struct rtnl_link_ops vti6_link_ops __read_mostly;
66 66
67static int vti6_net_id __read_mostly; 67static unsigned int vti6_net_id __read_mostly;
68struct vti6_net { 68struct vti6_net {
69 /* the vti6 tunnel fallback device */ 69 /* the vti6 tunnel fallback device */
70 struct net_device *fb_tnl_dev; 70 struct net_device *fb_tnl_dev;
@@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
608 return 0; 608 return 0;
609 609
610 if (type == NDISC_REDIRECT) 610 if (type == NDISC_REDIRECT)
611 ip6_redirect(skb, net, skb->dev->ifindex, 0); 611 ip6_redirect(skb, net, skb->dev->ifindex, 0,
612 sock_net_uid(net, NULL));
612 else 613 else
613 ip6_update_pmtu(skb, net, info, 0, 0); 614 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
614 xfrm_state_put(x); 615 xfrm_state_put(x);
615 616
616 return 0; 617 return 0;
@@ -812,30 +813,11 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
812 return err; 813 return err;
813} 814}
814 815
815/**
816 * vti6_tnl_change_mtu - change mtu manually for tunnel device
817 * @dev: virtual device associated with tunnel
818 * @new_mtu: the new mtu
819 *
820 * Return:
821 * 0 on success,
822 * %-EINVAL if mtu too small
823 **/
824static int vti6_change_mtu(struct net_device *dev, int new_mtu)
825{
826 if (new_mtu < IPV6_MIN_MTU)
827 return -EINVAL;
828
829 dev->mtu = new_mtu;
830 return 0;
831}
832
833static const struct net_device_ops vti6_netdev_ops = { 816static const struct net_device_ops vti6_netdev_ops = {
834 .ndo_init = vti6_dev_init, 817 .ndo_init = vti6_dev_init,
835 .ndo_uninit = vti6_dev_uninit, 818 .ndo_uninit = vti6_dev_uninit,
836 .ndo_start_xmit = vti6_tnl_xmit, 819 .ndo_start_xmit = vti6_tnl_xmit,
837 .ndo_do_ioctl = vti6_ioctl, 820 .ndo_do_ioctl = vti6_ioctl,
838 .ndo_change_mtu = vti6_change_mtu,
839 .ndo_get_stats64 = ip_tunnel_get_stats64, 821 .ndo_get_stats64 = ip_tunnel_get_stats64,
840 .ndo_get_iflink = ip6_tnl_get_iflink, 822 .ndo_get_iflink = ip6_tnl_get_iflink,
841}; 823};
@@ -855,6 +837,8 @@ static void vti6_dev_setup(struct net_device *dev)
855 dev->type = ARPHRD_TUNNEL6; 837 dev->type = ARPHRD_TUNNEL6;
856 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); 838 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
857 dev->mtu = ETH_DATA_LEN; 839 dev->mtu = ETH_DATA_LEN;
840 dev->min_mtu = IPV6_MIN_MTU;
841 dev->max_mtu = IP_MAX_MTU;
858 dev->flags |= IFF_NOARP; 842 dev->flags |= IFF_NOARP;
859 dev->addr_len = sizeof(struct in6_addr); 843 dev->addr_len = sizeof(struct in6_addr);
860 netif_keep_dst(dev); 844 netif_keep_dst(dev);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7f4265b1649b..604d8953c775 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -16,7 +16,7 @@
16 * 16 *
17 */ 17 */
18 18
19#include <asm/uaccess.h> 19#include <linux/uaccess.h>
20#include <linux/types.h> 20#include <linux/types.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
@@ -636,7 +636,7 @@ static int pim6_rcv(struct sk_buff *skb)
636 goto drop; 636 goto drop;
637 637
638 pim = (struct pimreghdr *)skb_transport_header(skb); 638 pim = (struct pimreghdr *)skb_transport_header(skb);
639 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || 639 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
640 (pim->flags & PIM_NULL_REGISTER) || 640 (pim->flags & PIM_NULL_REGISTER) ||
641 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 641 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
642 sizeof(*pim), IPPROTO_PIM, 642 sizeof(*pim), IPPROTO_PIM,
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 1b9316e1386a..54d165b9845a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
74 return 0; 74 return 0;
75 75
76 if (type == NDISC_REDIRECT) 76 if (type == NDISC_REDIRECT)
77 ip6_redirect(skb, net, skb->dev->ifindex, 0); 77 ip6_redirect(skb, net, skb->dev->ifindex, 0,
78 sock_net_uid(net, NULL));
78 else 79 else
79 ip6_update_pmtu(skb, net, info, 0, 0); 80 ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
80 xfrm_state_put(x); 81 xfrm_state_put(x);
81 82
82 return 0; 83 return 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 636ec56f5f50..ee97c44e2aa0 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -52,8 +52,9 @@
52#include <net/udplite.h> 52#include <net/udplite.h>
53#include <net/xfrm.h> 53#include <net/xfrm.h>
54#include <net/compat.h> 54#include <net/compat.h>
55#include <net/seg6.h>
55 56
56#include <asm/uaccess.h> 57#include <linux/uaccess.h>
57 58
58struct ip6_ra_chain *ip6_ra_chain; 59struct ip6_ra_chain *ip6_ra_chain;
59DEFINE_RWLOCK(ip6_ra_lock); 60DEFINE_RWLOCK(ip6_ra_lock);
@@ -430,6 +431,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
430 431
431 break; 432 break;
432#endif 433#endif
434 case IPV6_SRCRT_TYPE_4:
435 {
436 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)
437 opt->srcrt;
438
439 if (!seg6_validate_srh(srh, optlen))
440 goto sticky_done;
441 break;
442 }
433 default: 443 default:
434 goto sticky_done; 444 goto sticky_done;
435 } 445 }
@@ -868,6 +878,10 @@ pref_skip_coa:
868 np->autoflowlabel = valbool; 878 np->autoflowlabel = valbool;
869 retv = 0; 879 retv = 0;
870 break; 880 break;
881 case IPV6_RECVFRAGSIZE:
882 np->rxopt.bits.recvfragsize = valbool;
883 retv = 0;
884 break;
871 } 885 }
872 886
873 release_sock(sk); 887 release_sock(sk);
@@ -1310,6 +1324,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1310 val = np->autoflowlabel; 1324 val = np->autoflowlabel;
1311 break; 1325 break;
1312 1326
1327 case IPV6_RECVFRAGSIZE:
1328 val = np->rxopt.bits.recvfragsize;
1329 break;
1330
1313 default: 1331 default:
1314 return -ENOPROTOOPT; 1332 return -ENOPROTOOPT;
1315 } 1333 }
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 60c79a08e14a..64f0f7be9e5e 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -191,7 +191,7 @@ static inline int mip6_report_rl_allow(ktime_t stamp,
191 int allow = 0; 191 int allow = 0;
192 192
193 spin_lock_bh(&mip6_report_rl.lock); 193 spin_lock_bh(&mip6_report_rl.lock);
194 if (!ktime_equal(mip6_report_rl.stamp, stamp) || 194 if (mip6_report_rl.stamp != stamp ||
195 mip6_report_rl.iif != iif || 195 mip6_report_rl.iif != iif ||
196 !ipv6_addr_equal(&mip6_report_rl.src, src) || 196 !ipv6_addr_equal(&mip6_report_rl.src, src) ||
197 !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { 197 !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d8e671457d10..7ebac630d3c6 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -233,6 +233,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
233 case ND_OPT_SOURCE_LL_ADDR: 233 case ND_OPT_SOURCE_LL_ADDR:
234 case ND_OPT_TARGET_LL_ADDR: 234 case ND_OPT_TARGET_LL_ADDR:
235 case ND_OPT_MTU: 235 case ND_OPT_MTU:
236 case ND_OPT_NONCE:
236 case ND_OPT_REDIRECT_HDR: 237 case ND_OPT_REDIRECT_HDR:
237 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { 238 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
238 ND_PRINTK(2, warn, 239 ND_PRINTK(2, warn,
@@ -568,7 +569,8 @@ static void ndisc_send_unsol_na(struct net_device *dev)
568} 569}
569 570
570void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, 571void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
571 const struct in6_addr *daddr, const struct in6_addr *saddr) 572 const struct in6_addr *daddr, const struct in6_addr *saddr,
573 u64 nonce)
572{ 574{
573 struct sk_buff *skb; 575 struct sk_buff *skb;
574 struct in6_addr addr_buf; 576 struct in6_addr addr_buf;
@@ -588,6 +590,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
588 if (inc_opt) 590 if (inc_opt)
589 optlen += ndisc_opt_addr_space(dev, 591 optlen += ndisc_opt_addr_space(dev,
590 NDISC_NEIGHBOUR_SOLICITATION); 592 NDISC_NEIGHBOUR_SOLICITATION);
593 if (nonce != 0)
594 optlen += 8;
591 595
592 skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); 596 skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
593 if (!skb) 597 if (!skb)
@@ -605,6 +609,13 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
605 ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, 609 ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
606 dev->dev_addr, 610 dev->dev_addr,
607 NDISC_NEIGHBOUR_SOLICITATION); 611 NDISC_NEIGHBOUR_SOLICITATION);
612 if (nonce != 0) {
613 u8 *opt = skb_put(skb, 8);
614
615 opt[0] = ND_OPT_NONCE;
616 opt[1] = 8 >> 3;
617 memcpy(opt + 2, &nonce, 6);
618 }
608 619
609 ndisc_send_skb(skb, daddr, saddr); 620 ndisc_send_skb(skb, daddr, saddr);
610} 621}
@@ -693,12 +704,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
693 "%s: trying to ucast probe in NUD_INVALID: %pI6\n", 704 "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
694 __func__, target); 705 __func__, target);
695 } 706 }
696 ndisc_send_ns(dev, target, target, saddr); 707 ndisc_send_ns(dev, target, target, saddr, 0);
697 } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { 708 } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
698 neigh_app_ns(neigh); 709 neigh_app_ns(neigh);
699 } else { 710 } else {
700 addrconf_addr_solict_mult(target, &mcaddr); 711 addrconf_addr_solict_mult(target, &mcaddr);
701 ndisc_send_ns(dev, target, &mcaddr, saddr); 712 ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
702 } 713 }
703} 714}
704 715
@@ -742,6 +753,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
742 int dad = ipv6_addr_any(saddr); 753 int dad = ipv6_addr_any(saddr);
743 bool inc; 754 bool inc;
744 int is_router = -1; 755 int is_router = -1;
756 u64 nonce = 0;
745 757
746 if (skb->len < sizeof(struct nd_msg)) { 758 if (skb->len < sizeof(struct nd_msg)) {
747 ND_PRINTK(2, warn, "NS: packet too short\n"); 759 ND_PRINTK(2, warn, "NS: packet too short\n");
@@ -786,6 +798,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
786 return; 798 return;
787 } 799 }
788 } 800 }
801 if (ndopts.nd_opts_nonce)
802 memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
789 803
790 inc = ipv6_addr_is_multicast(daddr); 804 inc = ipv6_addr_is_multicast(daddr);
791 805
@@ -794,6 +808,15 @@ static void ndisc_recv_ns(struct sk_buff *skb)
794have_ifp: 808have_ifp:
795 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { 809 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
796 if (dad) { 810 if (dad) {
811 if (nonce != 0 && ifp->dad_nonce == nonce) {
812 u8 *np = (u8 *)&nonce;
813 /* Matching nonce if looped back */
814 ND_PRINTK(2, notice,
815 "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
816 ifp->idev->dev->name,
817 &ifp->addr, np);
818 goto out;
819 }
797 /* 820 /*
798 * We are colliding with another node 821 * We are colliding with another node
799 * who is doing DAD 822 * who is doing DAD
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d11c46833d61..39970e212ad5 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
26 struct flowi6 fl6 = { 26 struct flowi6 fl6 = {
27 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 27 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
28 .flowi6_mark = skb->mark, 28 .flowi6_mark = skb->mark,
29 .flowi6_uid = sock_net_uid(net, skb->sk),
29 .daddr = iph->daddr, 30 .daddr = iph->daddr,
30 .saddr = iph->saddr, 31 .saddr = iph->saddr,
31 }; 32 };
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index e10a04c9cdc7..6acb2eecd986 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6
25 25
26 To compile it as a module, choose M here. If unsure, say N. 26 To compile it as a module, choose M here. If unsure, say N.
27 27
28config NF_SOCKET_IPV6
29 tristate "IPv6 socket lookup support"
30 help
31 This option enables the IPv6 socket lookup infrastructure. This
32 is used by the ip6tables socket match.
33
28if NF_TABLES 34if NF_TABLES
29 35
30config NF_TABLES_IPV6 36config NF_TABLES_IPV6
@@ -54,6 +60,14 @@ config NFT_DUP_IPV6
54 help 60 help
55 This module enables IPv6 packet duplication support for nf_tables. 61 This module enables IPv6 packet duplication support for nf_tables.
56 62
63config NFT_FIB_IPV6
64 tristate "nf_tables fib / ipv6 route lookup support"
65 select NFT_FIB
66 help
67 This module enables IPv6 FIB lookups, e.g. for reverse path filtering.
68 It also allows query of the FIB for the route type, e.g. local, unicast,
69 multicast or blackhole.
70
57endif # NF_TABLES_IPV6 71endif # NF_TABLES_IPV6
58endif # NF_TABLES 72endif # NF_TABLES
59 73
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b4f7d0b4e2af..fe180c96040e 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
24nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o 24nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
25obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o 25obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
26 26
27obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o
28
27# logging 29# logging
28obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o 30obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
29 31
@@ -40,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
40obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o 42obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
41obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o 43obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
42obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o 44obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
45obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
43 46
44# matches 47# matches
45obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 48obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 55aacea24396..25a022d41a70 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -24,7 +24,7 @@
24#include <linux/icmpv6.h> 24#include <linux/icmpv6.h>
25#include <net/ipv6.h> 25#include <net/ipv6.h>
26#include <net/compat.h> 26#include <net/compat.h>
27#include <asm/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/mutex.h> 28#include <linux/mutex.h>
29#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
30#include <linux/err.h> 30#include <linux/err.h>
@@ -291,11 +291,7 @@ ip6t_do_table(struct sk_buff *skb,
291 * rule is also a fragment-specific rule, non-fragments won't 291 * rule is also a fragment-specific rule, non-fragments won't
292 * match it. */ 292 * match it. */
293 acpar.hotdrop = false; 293 acpar.hotdrop = false;
294 acpar.net = state->net; 294 acpar.state = state;
295 acpar.in = state->in;
296 acpar.out = state->out;
297 acpar.family = NFPROTO_IPV6;
298 acpar.hooknum = hook;
299 295
300 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 296 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
301 297
@@ -566,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
566 562
567static int 563static int
568find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, 564find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
569 unsigned int size) 565 unsigned int size,
566 struct xt_percpu_counter_alloc_state *alloc_state)
570{ 567{
571 struct xt_entry_target *t; 568 struct xt_entry_target *t;
572 struct xt_target *target; 569 struct xt_target *target;
@@ -574,12 +571,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
574 unsigned int j; 571 unsigned int j;
575 struct xt_mtchk_param mtpar; 572 struct xt_mtchk_param mtpar;
576 struct xt_entry_match *ematch; 573 struct xt_entry_match *ematch;
577 unsigned long pcnt;
578 574
579 pcnt = xt_percpu_counter_alloc(); 575 if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
580 if (IS_ERR_VALUE(pcnt))
581 return -ENOMEM; 576 return -ENOMEM;
582 e->counters.pcnt = pcnt;
583 577
584 j = 0; 578 j = 0;
585 mtpar.net = net; 579 mtpar.net = net;
@@ -616,7 +610,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
616 cleanup_match(ematch, net); 610 cleanup_match(ematch, net);
617 } 611 }
618 612
619 xt_percpu_counter_free(e->counters.pcnt); 613 xt_percpu_counter_free(&e->counters);
620 614
621 return ret; 615 return ret;
622} 616}
@@ -703,8 +697,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
703 if (par.target->destroy != NULL) 697 if (par.target->destroy != NULL)
704 par.target->destroy(&par); 698 par.target->destroy(&par);
705 module_put(par.target->me); 699 module_put(par.target->me);
706 700 xt_percpu_counter_free(&e->counters);
707 xt_percpu_counter_free(e->counters.pcnt);
708} 701}
709 702
710/* Checks and translates the user-supplied table segment (held in 703/* Checks and translates the user-supplied table segment (held in
@@ -713,6 +706,7 @@ static int
713translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, 706translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
714 const struct ip6t_replace *repl) 707 const struct ip6t_replace *repl)
715{ 708{
709 struct xt_percpu_counter_alloc_state alloc_state = { 0 };
716 struct ip6t_entry *iter; 710 struct ip6t_entry *iter;
717 unsigned int *offsets; 711 unsigned int *offsets;
718 unsigned int i; 712 unsigned int i;
@@ -772,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
772 /* Finally, each sanity check must pass */ 766 /* Finally, each sanity check must pass */
773 i = 0; 767 i = 0;
774 xt_entry_foreach(iter, entry0, newinfo->size) { 768 xt_entry_foreach(iter, entry0, newinfo->size) {
775 ret = find_check_entry(iter, net, repl->name, repl->size); 769 ret = find_check_entry(iter, net, repl->name, repl->size,
770 &alloc_state);
776 if (ret != 0) 771 if (ret != 0)
777 break; 772 break;
778 ++i; 773 ++i;
@@ -1007,7 +1002,7 @@ static int get_info(struct net *net, void __user *user,
1007#endif 1002#endif
1008 t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), 1003 t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
1009 "ip6table_%s", name); 1004 "ip6table_%s", name);
1010 if (!IS_ERR_OR_NULL(t)) { 1005 if (t) {
1011 struct ip6t_getinfo info; 1006 struct ip6t_getinfo info;
1012 const struct xt_table_info *private = t->private; 1007 const struct xt_table_info *private = t->private;
1013#ifdef CONFIG_COMPAT 1008#ifdef CONFIG_COMPAT
@@ -1037,7 +1032,7 @@ static int get_info(struct net *net, void __user *user,
1037 xt_table_unlock(t); 1032 xt_table_unlock(t);
1038 module_put(t->me); 1033 module_put(t->me);
1039 } else 1034 } else
1040 ret = t ? PTR_ERR(t) : -ENOENT; 1035 ret = -ENOENT;
1041#ifdef CONFIG_COMPAT 1036#ifdef CONFIG_COMPAT
1042 if (compat) 1037 if (compat)
1043 xt_compat_unlock(AF_INET6); 1038 xt_compat_unlock(AF_INET6);
@@ -1063,7 +1058,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
1063 get.name[sizeof(get.name) - 1] = '\0'; 1058 get.name[sizeof(get.name) - 1] = '\0';
1064 1059
1065 t = xt_find_table_lock(net, AF_INET6, get.name); 1060 t = xt_find_table_lock(net, AF_INET6, get.name);
1066 if (!IS_ERR_OR_NULL(t)) { 1061 if (t) {
1067 struct xt_table_info *private = t->private; 1062 struct xt_table_info *private = t->private;
1068 if (get.size == private->size) 1063 if (get.size == private->size)
1069 ret = copy_entries_to_user(private->size, 1064 ret = copy_entries_to_user(private->size,
@@ -1074,7 +1069,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
1074 module_put(t->me); 1069 module_put(t->me);
1075 xt_table_unlock(t); 1070 xt_table_unlock(t);
1076 } else 1071 } else
1077 ret = t ? PTR_ERR(t) : -ENOENT; 1072 ret = -ENOENT;
1078 1073
1079 return ret; 1074 return ret;
1080} 1075}
@@ -1099,8 +1094,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1099 1094
1100 t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), 1095 t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
1101 "ip6table_%s", name); 1096 "ip6table_%s", name);
1102 if (IS_ERR_OR_NULL(t)) { 1097 if (!t) {
1103 ret = t ? PTR_ERR(t) : -ENOENT; 1098 ret = -ENOENT;
1104 goto free_newinfo_counters_untrans; 1099 goto free_newinfo_counters_untrans;
1105 } 1100 }
1106 1101
@@ -1214,8 +1209,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1214 if (IS_ERR(paddc)) 1209 if (IS_ERR(paddc))
1215 return PTR_ERR(paddc); 1210 return PTR_ERR(paddc);
1216 t = xt_find_table_lock(net, AF_INET6, tmp.name); 1211 t = xt_find_table_lock(net, AF_INET6, tmp.name);
1217 if (IS_ERR_OR_NULL(t)) { 1212 if (!t) {
1218 ret = t ? PTR_ERR(t) : -ENOENT; 1213 ret = -ENOENT;
1219 goto free; 1214 goto free;
1220 } 1215 }
1221 1216
@@ -1651,7 +1646,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1651 1646
1652 xt_compat_lock(AF_INET6); 1647 xt_compat_lock(AF_INET6);
1653 t = xt_find_table_lock(net, AF_INET6, get.name); 1648 t = xt_find_table_lock(net, AF_INET6, get.name);
1654 if (!IS_ERR_OR_NULL(t)) { 1649 if (t) {
1655 const struct xt_table_info *private = t->private; 1650 const struct xt_table_info *private = t->private;
1656 struct xt_table_info info; 1651 struct xt_table_info info;
1657 ret = compat_table_info(private, &info); 1652 ret = compat_table_info(private, &info);
@@ -1665,7 +1660,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1665 module_put(t->me); 1660 module_put(t->me);
1666 xt_table_unlock(t); 1661 xt_table_unlock(t);
1667 } else 1662 } else
1668 ret = t ? PTR_ERR(t) : -ENOENT; 1663 ret = -ENOENT;
1669 1664
1670 xt_compat_unlock(AF_INET6); 1665 xt_compat_unlock(AF_INET6);
1671 return ret; 1666 return ret;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 7f9f45d829d2..2b1a15846f9a 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -24,7 +24,7 @@
24static unsigned int 24static unsigned int
25masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) 25masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
26{ 26{
27 return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out); 27 return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
28} 28}
29 29
30static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) 30static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index db29bbf41b59..fa51a205918d 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -39,35 +39,40 @@ static unsigned int
39reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) 39reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
40{ 40{
41 const struct ip6t_reject_info *reject = par->targinfo; 41 const struct ip6t_reject_info *reject = par->targinfo;
42 struct net *net = par->net; 42 struct net *net = xt_net(par);
43 43
44 switch (reject->with) { 44 switch (reject->with) {
45 case IP6T_ICMP6_NO_ROUTE: 45 case IP6T_ICMP6_NO_ROUTE:
46 nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); 46 nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par));
47 break; 47 break;
48 case IP6T_ICMP6_ADM_PROHIBITED: 48 case IP6T_ICMP6_ADM_PROHIBITED:
49 nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); 49 nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED,
50 xt_hooknum(par));
50 break; 51 break;
51 case IP6T_ICMP6_NOT_NEIGHBOUR: 52 case IP6T_ICMP6_NOT_NEIGHBOUR:
52 nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); 53 nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR,
54 xt_hooknum(par));
53 break; 55 break;
54 case IP6T_ICMP6_ADDR_UNREACH: 56 case IP6T_ICMP6_ADDR_UNREACH:
55 nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); 57 nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH,
58 xt_hooknum(par));
56 break; 59 break;
57 case IP6T_ICMP6_PORT_UNREACH: 60 case IP6T_ICMP6_PORT_UNREACH:
58 nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); 61 nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH,
62 xt_hooknum(par));
59 break; 63 break;
60 case IP6T_ICMP6_ECHOREPLY: 64 case IP6T_ICMP6_ECHOREPLY:
61 /* Do nothing */ 65 /* Do nothing */
62 break; 66 break;
63 case IP6T_TCP_RESET: 67 case IP6T_TCP_RESET:
64 nf_send_reset6(net, skb, par->hooknum); 68 nf_send_reset6(net, skb, xt_hooknum(par));
65 break; 69 break;
66 case IP6T_ICMP6_POLICY_FAIL: 70 case IP6T_ICMP6_POLICY_FAIL:
67 nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum); 71 nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par));
68 break; 72 break;
69 case IP6T_ICMP6_REJECT_ROUTE: 73 case IP6T_ICMP6_REJECT_ROUTE:
70 nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum); 74 nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE,
75 xt_hooknum(par));
71 break; 76 break;
72 } 77 }
73 78
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 06bed74cf5ee..98c8dd38575a 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -277,12 +277,12 @@ static unsigned int
277synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) 277synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
278{ 278{
279 const struct xt_synproxy_info *info = par->targinfo; 279 const struct xt_synproxy_info *info = par->targinfo;
280 struct net *net = par->net; 280 struct net *net = xt_net(par);
281 struct synproxy_net *snet = synproxy_pernet(net); 281 struct synproxy_net *snet = synproxy_pernet(net);
282 struct synproxy_options opts = {}; 282 struct synproxy_options opts = {};
283 struct tcphdr *th, _th; 283 struct tcphdr *th, _th;
284 284
285 if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) 285 if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP))
286 return NF_DROP; 286 return NF_DROP;
287 287
288 th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); 288 th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
@@ -440,12 +440,12 @@ static int synproxy_tg6_check(const struct xt_tgchk_param *par)
440 e->ipv6.invflags & XT_INV_PROTO) 440 e->ipv6.invflags & XT_INV_PROTO)
441 return -EINVAL; 441 return -EINVAL;
442 442
443 return nf_ct_l3proto_try_module_get(par->family); 443 return nf_ct_netns_get(par->net, par->family);
444} 444}
445 445
446static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) 446static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
447{ 447{
448 nf_ct_l3proto_module_put(par->family); 448 nf_ct_netns_put(par->net, par->family);
449} 449}
450 450
451static struct xt_target synproxy_tg6_reg __read_mostly = { 451static struct xt_target synproxy_tg6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 1ee1b25df096..d5263dc364a9 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -93,7 +93,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
93 if (unlikely(saddrtype == IPV6_ADDR_ANY)) 93 if (unlikely(saddrtype == IPV6_ADDR_ANY))
94 return true ^ invert; /* not routable: forward path will drop it */ 94 return true ^ invert; /* not routable: forward path will drop it */
95 95
96 return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert; 96 return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par),
97 info->flags) ^ invert;
97} 98}
98 99
99static int rpfilter_check(const struct xt_mtchk_param *par) 100static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 963ee3848675..4e3402486833 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -34,6 +34,13 @@
34#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 34#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
35#include <net/netfilter/nf_log.h> 35#include <net/netfilter/nf_log.h>
36 36
37static int conntrack6_net_id;
38static DEFINE_MUTEX(register_ipv6_hooks);
39
40struct conntrack6_net {
41 unsigned int users;
42};
43
37static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 44static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
38 struct nf_conntrack_tuple *tuple) 45 struct nf_conntrack_tuple *tuple)
39{ 46{
@@ -308,6 +315,42 @@ static int ipv6_nlattr_tuple_size(void)
308} 315}
309#endif 316#endif
310 317
318static int ipv6_hooks_register(struct net *net)
319{
320 struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
321 int err = 0;
322
323 mutex_lock(&register_ipv6_hooks);
324 cnet->users++;
325 if (cnet->users > 1)
326 goto out_unlock;
327
328 err = nf_defrag_ipv6_enable(net);
329 if (err < 0) {
330 cnet->users = 0;
331 goto out_unlock;
332 }
333
334 err = nf_register_net_hooks(net, ipv6_conntrack_ops,
335 ARRAY_SIZE(ipv6_conntrack_ops));
336 if (err)
337 cnet->users = 0;
338 out_unlock:
339 mutex_unlock(&register_ipv6_hooks);
340 return err;
341}
342
343static void ipv6_hooks_unregister(struct net *net)
344{
345 struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
346
347 mutex_lock(&register_ipv6_hooks);
348 if (cnet->users && (--cnet->users == 0))
349 nf_unregister_net_hooks(net, ipv6_conntrack_ops,
350 ARRAY_SIZE(ipv6_conntrack_ops));
351 mutex_unlock(&register_ipv6_hooks);
352}
353
311struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { 354struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
312 .l3proto = PF_INET6, 355 .l3proto = PF_INET6,
313 .name = "ipv6", 356 .name = "ipv6",
@@ -321,6 +364,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
321 .nlattr_to_tuple = ipv6_nlattr_to_tuple, 364 .nlattr_to_tuple = ipv6_nlattr_to_tuple,
322 .nla_policy = ipv6_nla_policy, 365 .nla_policy = ipv6_nla_policy,
323#endif 366#endif
367 .net_ns_get = ipv6_hooks_register,
368 .net_ns_put = ipv6_hooks_unregister,
324 .me = THIS_MODULE, 369 .me = THIS_MODULE,
325}; 370};
326 371
@@ -336,52 +381,51 @@ static struct nf_sockopt_ops so_getorigdst6 = {
336 .owner = THIS_MODULE, 381 .owner = THIS_MODULE,
337}; 382};
338 383
384static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
385 &nf_conntrack_l4proto_tcp6,
386 &nf_conntrack_l4proto_udp6,
387 &nf_conntrack_l4proto_icmpv6,
388#ifdef CONFIG_NF_CT_PROTO_DCCP
389 &nf_conntrack_l4proto_dccp6,
390#endif
391#ifdef CONFIG_NF_CT_PROTO_SCTP
392 &nf_conntrack_l4proto_sctp6,
393#endif
394#ifdef CONFIG_NF_CT_PROTO_UDPLITE
395 &nf_conntrack_l4proto_udplite6,
396#endif
397};
398
339static int ipv6_net_init(struct net *net) 399static int ipv6_net_init(struct net *net)
340{ 400{
341 int ret = 0; 401 int ret = 0;
342 402
343 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6); 403 ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
344 if (ret < 0) { 404 ARRAY_SIZE(builtin_l4proto6));
345 pr_err("nf_conntrack_tcp6: pernet registration failed\n"); 405 if (ret < 0)
346 goto out; 406 return ret;
347 } 407
348 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
349 if (ret < 0) {
350 pr_err("nf_conntrack_udp6: pernet registration failed\n");
351 goto cleanup_tcp6;
352 }
353 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
354 if (ret < 0) {
355 pr_err("nf_conntrack_icmp6: pernet registration failed\n");
356 goto cleanup_udp6;
357 }
358 ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); 408 ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
359 if (ret < 0) { 409 if (ret < 0) {
360 pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); 410 pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
361 goto cleanup_icmpv6; 411 nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
412 ARRAY_SIZE(builtin_l4proto6));
362 } 413 }
363 return 0;
364 cleanup_icmpv6:
365 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
366 cleanup_udp6:
367 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
368 cleanup_tcp6:
369 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
370 out:
371 return ret; 414 return ret;
372} 415}
373 416
374static void ipv6_net_exit(struct net *net) 417static void ipv6_net_exit(struct net *net)
375{ 418{
376 nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); 419 nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
377 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); 420 nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
378 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); 421 ARRAY_SIZE(builtin_l4proto6));
379 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
380} 422}
381 423
382static struct pernet_operations ipv6_net_ops = { 424static struct pernet_operations ipv6_net_ops = {
383 .init = ipv6_net_init, 425 .init = ipv6_net_init,
384 .exit = ipv6_net_exit, 426 .exit = ipv6_net_exit,
427 .id = &conntrack6_net_id,
428 .size = sizeof(struct conntrack6_net),
385}; 429};
386 430
387static int __init nf_conntrack_l3proto_ipv6_init(void) 431static int __init nf_conntrack_l3proto_ipv6_init(void)
@@ -389,7 +433,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
389 int ret = 0; 433 int ret = 0;
390 434
391 need_conntrack(); 435 need_conntrack();
392 nf_defrag_ipv6_enable();
393 436
394 ret = nf_register_sockopt(&so_getorigdst6); 437 ret = nf_register_sockopt(&so_getorigdst6);
395 if (ret < 0) { 438 if (ret < 0) {
@@ -401,47 +444,20 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
401 if (ret < 0) 444 if (ret < 0)
402 goto cleanup_sockopt; 445 goto cleanup_sockopt;
403 446
404 ret = nf_register_hooks(ipv6_conntrack_ops, 447 ret = nf_ct_l4proto_register(builtin_l4proto6,
405 ARRAY_SIZE(ipv6_conntrack_ops)); 448 ARRAY_SIZE(builtin_l4proto6));
406 if (ret < 0) { 449 if (ret < 0)
407 pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
408 "hook.\n");
409 goto cleanup_pernet; 450 goto cleanup_pernet;
410 }
411
412 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
413 if (ret < 0) {
414 pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
415 goto cleanup_hooks;
416 }
417
418 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
419 if (ret < 0) {
420 pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
421 goto cleanup_tcp6;
422 }
423
424 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
425 if (ret < 0) {
426 pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
427 goto cleanup_udp6;
428 }
429 451
430 ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); 452 ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
431 if (ret < 0) { 453 if (ret < 0) {
432 pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); 454 pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
433 goto cleanup_icmpv6; 455 goto cleanup_l4proto;
434 } 456 }
435 return ret; 457 return ret;
436 458cleanup_l4proto:
437 cleanup_icmpv6: 459 nf_ct_l4proto_unregister(builtin_l4proto6,
438 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); 460 ARRAY_SIZE(builtin_l4proto6));
439 cleanup_udp6:
440 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
441 cleanup_tcp6:
442 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
443 cleanup_hooks:
444 nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
445 cleanup_pernet: 461 cleanup_pernet:
446 unregister_pernet_subsys(&ipv6_net_ops); 462 unregister_pernet_subsys(&ipv6_net_ops);
447 cleanup_sockopt: 463 cleanup_sockopt:
@@ -453,10 +469,8 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
453{ 469{
454 synchronize_net(); 470 synchronize_net();
455 nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); 471 nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
456 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); 472 nf_ct_l4proto_unregister(builtin_l4proto6,
457 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); 473 ARRAY_SIZE(builtin_l4proto6));
458 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
459 nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
460 unregister_pernet_subsys(&ipv6_net_ops); 474 unregister_pernet_subsys(&ipv6_net_ops);
461 nf_unregister_sockopt(&so_getorigdst6); 475 nf_unregister_sockopt(&so_getorigdst6);
462} 476}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index f06b0471f39f..8e0bdd058787 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -30,6 +30,8 @@
30#include <net/netfilter/nf_conntrack_zones.h> 30#include <net/netfilter/nf_conntrack_zones.h>
31#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 31#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
32 32
33static DEFINE_MUTEX(defrag6_mutex);
34
33static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, 35static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
34 struct sk_buff *skb) 36 struct sk_buff *skb)
35{ 37{
@@ -87,6 +89,19 @@ static struct nf_hook_ops ipv6_defrag_ops[] = {
87 }, 89 },
88}; 90};
89 91
92static void __net_exit defrag6_net_exit(struct net *net)
93{
94 if (net->nf.defrag_ipv6) {
95 nf_unregister_net_hooks(net, ipv6_defrag_ops,
96 ARRAY_SIZE(ipv6_defrag_ops));
97 net->nf.defrag_ipv6 = false;
98 }
99}
100
101static struct pernet_operations defrag6_net_ops = {
102 .exit = defrag6_net_exit,
103};
104
90static int __init nf_defrag_init(void) 105static int __init nf_defrag_init(void)
91{ 106{
92 int ret = 0; 107 int ret = 0;
@@ -96,9 +111,9 @@ static int __init nf_defrag_init(void)
96 pr_err("nf_defrag_ipv6: can't initialize frag6.\n"); 111 pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
97 return ret; 112 return ret;
98 } 113 }
99 ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); 114 ret = register_pernet_subsys(&defrag6_net_ops);
100 if (ret < 0) { 115 if (ret < 0) {
101 pr_err("nf_defrag_ipv6: can't register hooks\n"); 116 pr_err("nf_defrag_ipv6: can't register pernet ops\n");
102 goto cleanup_frag6; 117 goto cleanup_frag6;
103 } 118 }
104 return ret; 119 return ret;
@@ -111,12 +126,31 @@ cleanup_frag6:
111 126
112static void __exit nf_defrag_fini(void) 127static void __exit nf_defrag_fini(void)
113{ 128{
114 nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); 129 unregister_pernet_subsys(&defrag6_net_ops);
115 nf_ct_frag6_cleanup(); 130 nf_ct_frag6_cleanup();
116} 131}
117 132
118void nf_defrag_ipv6_enable(void) 133int nf_defrag_ipv6_enable(struct net *net)
119{ 134{
135 int err = 0;
136
137 might_sleep();
138
139 if (net->nf.defrag_ipv6)
140 return 0;
141
142 mutex_lock(&defrag6_mutex);
143 if (net->nf.defrag_ipv6)
144 goto out_unlock;
145
146 err = nf_register_net_hooks(net, ipv6_defrag_ops,
147 ARRAY_SIZE(ipv6_defrag_ops));
148 if (err == 0)
149 net->nf.defrag_ipv6 = true;
150
151 out_unlock:
152 mutex_unlock(&defrag6_mutex);
153 return err;
120} 154}
121EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); 155EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
122 156
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
new file mode 100644
index 000000000000..ebb2bf84232a
--- /dev/null
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -0,0 +1,151 @@
1/*
2 * Copyright (C) 2007-2008 BalaBit IT Ltd.
3 * Author: Krisztian Kovacs
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 */
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <net/tcp.h>
14#include <net/udp.h>
15#include <net/icmp.h>
16#include <net/sock.h>
17#include <net/inet_sock.h>
18#include <net/inet6_hashtables.h>
19#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
20#include <net/netfilter/nf_socket.h>
21#if IS_ENABLED(CONFIG_NF_CONNTRACK)
22#include <net/netfilter/nf_conntrack.h>
23#endif
24
25static int
26extract_icmp6_fields(const struct sk_buff *skb,
27 unsigned int outside_hdrlen,
28 int *protocol,
29 const struct in6_addr **raddr,
30 const struct in6_addr **laddr,
31 __be16 *rport,
32 __be16 *lport,
33 struct ipv6hdr *ipv6_var)
34{
35 const struct ipv6hdr *inside_iph;
36 struct icmp6hdr *icmph, _icmph;
37 __be16 *ports, _ports[2];
38 u8 inside_nexthdr;
39 __be16 inside_fragoff;
40 int inside_hdrlen;
41
42 icmph = skb_header_pointer(skb, outside_hdrlen,
43 sizeof(_icmph), &_icmph);
44 if (icmph == NULL)
45 return 1;
46
47 if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
48 return 1;
49
50 inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
51 sizeof(*ipv6_var), ipv6_var);
52 if (inside_iph == NULL)
53 return 1;
54 inside_nexthdr = inside_iph->nexthdr;
55
56 inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
57 sizeof(*ipv6_var),
58 &inside_nexthdr, &inside_fragoff);
59 if (inside_hdrlen < 0)
60 return 1; /* hjm: Packet has no/incomplete transport layer headers. */
61
62 if (inside_nexthdr != IPPROTO_TCP &&
63 inside_nexthdr != IPPROTO_UDP)
64 return 1;
65
66 ports = skb_header_pointer(skb, inside_hdrlen,
67 sizeof(_ports), &_ports);
68 if (ports == NULL)
69 return 1;
70
71 /* the inside IP packet is the one quoted from our side, thus
72 * its saddr is the local address */
73 *protocol = inside_nexthdr;
74 *laddr = &inside_iph->saddr;
75 *lport = ports[0];
76 *raddr = &inside_iph->daddr;
77 *rport = ports[1];
78
79 return 0;
80}
81
82static struct sock *
83nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
84 const u8 protocol,
85 const struct in6_addr *saddr, const struct in6_addr *daddr,
86 const __be16 sport, const __be16 dport,
87 const struct net_device *in)
88{
89 switch (protocol) {
90 case IPPROTO_TCP:
91 return inet6_lookup(net, &tcp_hashinfo, skb, doff,
92 saddr, sport, daddr, dport,
93 in->ifindex);
94 case IPPROTO_UDP:
95 return udp6_lib_lookup(net, saddr, sport, daddr, dport,
96 in->ifindex);
97 }
98
99 return NULL;
100}
101
102struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
103 const struct net_device *indev)
104{
105 __be16 uninitialized_var(dport), uninitialized_var(sport);
106 const struct in6_addr *daddr = NULL, *saddr = NULL;
107 struct ipv6hdr *iph = ipv6_hdr(skb);
108 struct sk_buff *data_skb = NULL;
109 int doff = 0;
110 int thoff = 0, tproto;
111
112 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
113 if (tproto < 0) {
114 pr_debug("unable to find transport header in IPv6 packet, dropping\n");
115 return NULL;
116 }
117
118 if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
119 struct udphdr _hdr, *hp;
120
121 hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
122 if (hp == NULL)
123 return NULL;
124
125 saddr = &iph->saddr;
126 sport = hp->source;
127 daddr = &iph->daddr;
128 dport = hp->dest;
129 data_skb = (struct sk_buff *)skb;
130 doff = tproto == IPPROTO_TCP ?
131 thoff + __tcp_hdrlen((struct tcphdr *)hp) :
132 thoff + sizeof(*hp);
133
134 } else if (tproto == IPPROTO_ICMPV6) {
135 struct ipv6hdr ipv6_var;
136
137 if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
138 &sport, &dport, &ipv6_var))
139 return NULL;
140 } else {
141 return NULL;
142 }
143
144 return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
145 sport, dport, indev);
146}
147EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6);
148
149MODULE_LICENSE("GPL");
150MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
151MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure");
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 831f86e1ec08..d8b5b60b7d53 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
28 struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr]; 28 struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
29 int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; 29 int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
30 30
31 nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); 31 nf_dup_ipv6(nft_net(pkt), pkt->skb, nft_hook(pkt), gw, oif);
32} 32}
33 33
34static int nft_dup_ipv6_init(const struct nft_ctx *ctx, 34static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
new file mode 100644
index 000000000000..c947aad8bcc6
--- /dev/null
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -0,0 +1,275 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 */
6
7#include <linux/kernel.h>
8#include <linux/init.h>
9#include <linux/module.h>
10#include <linux/netlink.h>
11#include <linux/netfilter.h>
12#include <linux/netfilter/nf_tables.h>
13#include <linux/netfilter_ipv6.h>
14#include <net/netfilter/nf_tables_core.h>
15#include <net/netfilter/nf_tables.h>
16#include <net/netfilter/nft_fib.h>
17
18#include <net/ip6_fib.h>
19#include <net/ip6_route.h>
20
21static bool fib6_is_local(const struct sk_buff *skb)
22{
23 const struct rt6_info *rt = (const void *)skb_dst(skb);
24
25 return rt && (rt->rt6i_flags & RTF_LOCAL);
26}
27
28static int get_ifindex(const struct net_device *dev)
29{
30 return dev ? dev->ifindex : 0;
31}
32
33static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
34 const struct nft_pktinfo *pkt,
35 const struct net_device *dev)
36{
37 const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
38 int lookup_flags = 0;
39
40 if (priv->flags & NFTA_FIB_F_DADDR) {
41 fl6->daddr = iph->daddr;
42 fl6->saddr = iph->saddr;
43 } else {
44 fl6->daddr = iph->saddr;
45 fl6->saddr = iph->daddr;
46 }
47
48 if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
49 lookup_flags |= RT6_LOOKUP_F_IFACE;
50 fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
51 }
52
53 if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
54 lookup_flags |= RT6_LOOKUP_F_HAS_SADDR;
55
56 if (priv->flags & NFTA_FIB_F_MARK)
57 fl6->flowi6_mark = pkt->skb->mark;
58
59 fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
60
61 return lookup_flags;
62}
63
64static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
65 const struct nft_pktinfo *pkt)
66{
67 const struct net_device *dev = NULL;
68 const struct nf_ipv6_ops *v6ops;
69 const struct nf_afinfo *afinfo;
70 int route_err, addrtype;
71 struct rt6_info *rt;
72 struct flowi6 fl6 = {
73 .flowi6_iif = LOOPBACK_IFINDEX,
74 .flowi6_proto = pkt->tprot,
75 };
76 u32 ret = 0;
77
78 afinfo = nf_get_afinfo(NFPROTO_IPV6);
79 if (!afinfo)
80 return RTN_UNREACHABLE;
81
82 if (priv->flags & NFTA_FIB_F_IIF)
83 dev = nft_in(pkt);
84 else if (priv->flags & NFTA_FIB_F_OIF)
85 dev = nft_out(pkt);
86
87 nft_fib6_flowi_init(&fl6, priv, pkt, dev);
88
89 v6ops = nf_get_ipv6_ops();
90 if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
91 ret = RTN_LOCAL;
92
93 route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
94 flowi6_to_flowi(&fl6), false);
95 if (route_err)
96 goto err;
97
98 if (rt->rt6i_flags & RTF_REJECT) {
99 route_err = rt->dst.error;
100 dst_release(&rt->dst);
101 goto err;
102 }
103
104 if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr))
105 ret = RTN_ANYCAST;
106 else if (!dev && rt->rt6i_flags & RTF_LOCAL)
107 ret = RTN_LOCAL;
108
109 dst_release(&rt->dst);
110
111 if (ret)
112 return ret;
113
114 addrtype = ipv6_addr_type(&fl6.daddr);
115
116 if (addrtype & IPV6_ADDR_MULTICAST)
117 return RTN_MULTICAST;
118 if (addrtype & IPV6_ADDR_UNICAST)
119 return RTN_UNICAST;
120
121 return RTN_UNSPEC;
122 err:
123 switch (route_err) {
124 case -EINVAL:
125 return RTN_BLACKHOLE;
126 case -EACCES:
127 return RTN_PROHIBIT;
128 case -EAGAIN:
129 return RTN_THROW;
130 default:
131 break;
132 }
133
134 return RTN_UNREACHABLE;
135}
136
137void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
138 const struct nft_pktinfo *pkt)
139{
140 const struct nft_fib *priv = nft_expr_priv(expr);
141 u32 *dest = &regs->data[priv->dreg];
142
143 *dest = __nft_fib6_eval_type(priv, pkt);
144}
145EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
146
147void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
148 const struct nft_pktinfo *pkt)
149{
150 const struct nft_fib *priv = nft_expr_priv(expr);
151 const struct net_device *oif = NULL;
152 u32 *dest = &regs->data[priv->dreg];
153 struct flowi6 fl6 = {
154 .flowi6_iif = LOOPBACK_IFINDEX,
155 .flowi6_proto = pkt->tprot,
156 };
157 struct rt6_info *rt;
158 int lookup_flags;
159
160 if (priv->flags & NFTA_FIB_F_IIF)
161 oif = nft_in(pkt);
162 else if (priv->flags & NFTA_FIB_F_OIF)
163 oif = nft_out(pkt);
164
165 lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
166
167 if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) {
168 nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
169 return;
170 }
171
172 *dest = 0;
173 again:
174 rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
175 if (rt->dst.error)
176 goto put_rt_err;
177
178 /* Should not see RTF_LOCAL here */
179 if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
180 goto put_rt_err;
181
182 if (oif && oif != rt->rt6i_idev->dev) {
183 /* multipath route? Try again with F_IFACE */
184 if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
185 lookup_flags |= RT6_LOOKUP_F_IFACE;
186 fl6.flowi6_oif = oif->ifindex;
187 ip6_rt_put(rt);
188 goto again;
189 }
190 }
191
192 switch (priv->result) {
193 case NFT_FIB_RESULT_OIF:
194 *dest = rt->rt6i_idev->dev->ifindex;
195 break;
196 case NFT_FIB_RESULT_OIFNAME:
197 strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ);
198 break;
199 default:
200 WARN_ON_ONCE(1);
201 break;
202 }
203
204 put_rt_err:
205 ip6_rt_put(rt);
206}
207EXPORT_SYMBOL_GPL(nft_fib6_eval);
208
209static struct nft_expr_type nft_fib6_type;
210
211static const struct nft_expr_ops nft_fib6_type_ops = {
212 .type = &nft_fib6_type,
213 .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
214 .eval = nft_fib6_eval_type,
215 .init = nft_fib_init,
216 .dump = nft_fib_dump,
217 .validate = nft_fib_validate,
218};
219
220static const struct nft_expr_ops nft_fib6_ops = {
221 .type = &nft_fib6_type,
222 .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
223 .eval = nft_fib6_eval,
224 .init = nft_fib_init,
225 .dump = nft_fib_dump,
226 .validate = nft_fib_validate,
227};
228
229static const struct nft_expr_ops *
230nft_fib6_select_ops(const struct nft_ctx *ctx,
231 const struct nlattr * const tb[])
232{
233 enum nft_fib_result result;
234
235 if (!tb[NFTA_FIB_RESULT])
236 return ERR_PTR(-EINVAL);
237
238 result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
239
240 switch (result) {
241 case NFT_FIB_RESULT_OIF:
242 return &nft_fib6_ops;
243 case NFT_FIB_RESULT_OIFNAME:
244 return &nft_fib6_ops;
245 case NFT_FIB_RESULT_ADDRTYPE:
246 return &nft_fib6_type_ops;
247 default:
248 return ERR_PTR(-EOPNOTSUPP);
249 }
250}
251
252static struct nft_expr_type nft_fib6_type __read_mostly = {
253 .name = "fib",
254 .select_ops = &nft_fib6_select_ops,
255 .policy = nft_fib_policy,
256 .maxattr = NFTA_FIB_MAX,
257 .family = NFPROTO_IPV6,
258 .owner = THIS_MODULE,
259};
260
261static int __init nft_fib6_module_init(void)
262{
263 return nft_register_expr(&nft_fib6_type);
264}
265
266static void __exit nft_fib6_module_exit(void)
267{
268 nft_unregister_expr(&nft_fib6_type);
269}
270module_init(nft_fib6_module_init);
271module_exit(nft_fib6_module_exit);
272
273MODULE_LICENSE("GPL");
274MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
275MODULE_ALIAS_NFT_AF_EXPR(10, "fib");
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 9597ffb74077..6c5b5b1830a7 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> 2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,14 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
32 range.max_proto.all = 32 range.max_proto.all =
33 *(__be16 *)&regs->data[priv->sreg_proto_max]; 33 *(__be16 *)&regs->data[priv->sreg_proto_max];
34 } 34 }
35 regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); 35 regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
36 nft_out(pkt));
37}
38
39static void
40nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
41{
42 nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
36} 43}
37 44
38static struct nft_expr_type nft_masq_ipv6_type; 45static struct nft_expr_type nft_masq_ipv6_type;
@@ -41,6 +48,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = {
41 .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), 48 .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
42 .eval = nft_masq_ipv6_eval, 49 .eval = nft_masq_ipv6_eval,
43 .init = nft_masq_init, 50 .init = nft_masq_init,
51 .destroy = nft_masq_ipv6_destroy,
44 .dump = nft_masq_dump, 52 .dump = nft_masq_dump,
45 .validate = nft_masq_validate, 53 .validate = nft_masq_validate,
46}; 54};
@@ -77,5 +85,5 @@ module_init(nft_masq_ipv6_module_init);
77module_exit(nft_masq_ipv6_module_exit); 85module_exit(nft_masq_ipv6_module_exit);
78 86
79MODULE_LICENSE("GPL"); 87MODULE_LICENSE("GPL");
80MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); 88MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
81MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); 89MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index aca44e89a881..f5ac080fc084 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> 2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -35,7 +35,14 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
35 35
36 range.flags |= priv->flags; 36 range.flags |= priv->flags;
37 37
38 regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook); 38 regs->verdict.code =
39 nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
40}
41
42static void
43nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
44{
45 nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
39} 46}
40 47
41static struct nft_expr_type nft_redir_ipv6_type; 48static struct nft_expr_type nft_redir_ipv6_type;
@@ -44,6 +51,7 @@ static const struct nft_expr_ops nft_redir_ipv6_ops = {
44 .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), 51 .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
45 .eval = nft_redir_ipv6_eval, 52 .eval = nft_redir_ipv6_eval,
46 .init = nft_redir_init, 53 .init = nft_redir_init,
54 .destroy = nft_redir_ipv6_destroy,
47 .dump = nft_redir_dump, 55 .dump = nft_redir_dump,
48 .validate = nft_redir_validate, 56 .validate = nft_redir_validate,
49}; 57};
@@ -71,5 +79,5 @@ module_init(nft_redir_ipv6_module_init);
71module_exit(nft_redir_ipv6_module_exit); 79module_exit(nft_redir_ipv6_module_exit);
72 80
73MODULE_LICENSE("GPL"); 81MODULE_LICENSE("GPL");
74MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); 82MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
75MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); 83MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 92bda9908bb9..057deeaff1cb 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -27,11 +27,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
27 27
28 switch (priv->type) { 28 switch (priv->type) {
29 case NFT_REJECT_ICMP_UNREACH: 29 case NFT_REJECT_ICMP_UNREACH:
30 nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, 30 nf_send_unreach6(nft_net(pkt), pkt->skb, priv->icmp_code,
31 pkt->hook); 31 nft_hook(pkt));
32 break; 32 break;
33 case NFT_REJECT_TCP_RST: 33 case NFT_REJECT_TCP_RST:
34 nf_send_reset6(pkt->net, pkt->skb, pkt->hook); 34 nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt));
35 break; 35 break;
36 default: 36 default:
37 break; 37 break;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 66e2d9dfc43a..e1f8b34d7a2e 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
113 fl6.daddr = *daddr; 113 fl6.daddr = *daddr;
114 fl6.flowi6_oif = oif; 114 fl6.flowi6_oif = oif;
115 fl6.flowi6_mark = sk->sk_mark; 115 fl6.flowi6_mark = sk->sk_mark;
116 fl6.flowi6_uid = sk->sk_uid;
116 fl6.fl6_icmp_type = user_icmph.icmp6_type; 117 fl6.fl6_icmp_type = user_icmph.icmp6_type;
117 fl6.fl6_icmp_code = user_icmph.icmp6_code; 118 fl6.fl6_icmp_code = user_icmph.icmp6_code;
118 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 119 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 054a1d84fc5e..ea89073c8247 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -65,11 +65,12 @@
65 65
66#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ 66#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
67 67
68static struct raw_hashinfo raw_v6_hashinfo = { 68struct raw_hashinfo raw_v6_hashinfo = {
69 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), 69 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
70}; 70};
71EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
71 72
72static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, 73struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
73 unsigned short num, const struct in6_addr *loc_addr, 74 unsigned short num, const struct in6_addr *loc_addr,
74 const struct in6_addr *rmt_addr, int dif) 75 const struct in6_addr *rmt_addr, int dif)
75{ 76{
@@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
102found: 103found:
103 return sk; 104 return sk;
104} 105}
106EXPORT_SYMBOL_GPL(__raw_v6_lookup);
105 107
106/* 108/*
107 * 0 - deliver 109 * 0 - deliver
@@ -589,7 +591,11 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
589 } 591 }
590 592
591 offset += skb_transport_offset(skb); 593 offset += skb_transport_offset(skb);
592 BUG_ON(skb_copy_bits(skb, offset, &csum, 2)); 594 err = skb_copy_bits(skb, offset, &csum, 2);
595 if (err < 0) {
596 ip6_flush_pending_frames(sk);
597 goto out;
598 }
593 599
594 /* in case cksum was not initialized */ 600 /* in case cksum was not initialized */
595 if (unlikely(csum)) 601 if (unlikely(csum))
@@ -774,6 +780,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
774 memset(&fl6, 0, sizeof(fl6)); 780 memset(&fl6, 0, sizeof(fl6));
775 781
776 fl6.flowi6_mark = sk->sk_mark; 782 fl6.flowi6_mark = sk->sk_mark;
783 fl6.flowi6_uid = sk->sk_uid;
777 784
778 ipc6.hlimit = -1; 785 ipc6.hlimit = -1;
779 ipc6.tclass = -1; 786 ipc6.tclass = -1;
@@ -1259,6 +1266,7 @@ struct proto rawv6_prot = {
1259 .compat_getsockopt = compat_rawv6_getsockopt, 1266 .compat_getsockopt = compat_rawv6_getsockopt,
1260 .compat_ioctl = compat_rawv6_ioctl, 1267 .compat_ioctl = compat_rawv6_ioctl,
1261#endif 1268#endif
1269 .diag_destroy = raw_abort,
1262}; 1270};
1263 1271
1264#ifdef CONFIG_PROC_FS 1272#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3815e8505ed2..e1da5b888cc4 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
211{ 211{
212 struct sk_buff *prev, *next; 212 struct sk_buff *prev, *next;
213 struct net_device *dev; 213 struct net_device *dev;
214 int offset, end; 214 int offset, end, fragsize;
215 struct net *net = dev_net(skb_dst(skb)->dev); 215 struct net *net = dev_net(skb_dst(skb)->dev);
216 u8 ecn; 216 u8 ecn;
217 217
@@ -336,6 +336,10 @@ found:
336 fq->ecn |= ecn; 336 fq->ecn |= ecn;
337 add_frag_mem_limit(fq->q.net, skb->truesize); 337 add_frag_mem_limit(fq->q.net, skb->truesize);
338 338
339 fragsize = -skb_network_offset(skb) + skb->len;
340 if (fragsize > fq->q.max_size)
341 fq->q.max_size = fragsize;
342
339 /* The first fragment. 343 /* The first fragment.
340 * nhoffset is obtained from the first fragment, of course. 344 * nhoffset is obtained from the first fragment, of course.
341 */ 345 */
@@ -495,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
495 ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); 499 ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
496 IP6CB(head)->nhoff = nhoff; 500 IP6CB(head)->nhoff = nhoff;
497 IP6CB(head)->flags |= IP6SKB_FRAGMENTED; 501 IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
502 IP6CB(head)->frag_max_size = fq->q.max_size;
498 503
499 /* Yes, and fold redundant checksum back. 8) */ 504 /* Yes, and fold redundant checksum back. 8) */
500 skb_postpush_rcsum(head, skb_network_header(head), 505 skb_postpush_rcsum(head, skb_network_header(head),
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1b57e11e6e0d..8417c41d8ec8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -64,7 +64,7 @@
64#include <net/l3mdev.h> 64#include <net/l3mdev.h>
65#include <trace/events/fib6.h> 65#include <trace/events/fib6.h>
66 66
67#include <asm/uaccess.h> 67#include <linux/uaccess.h>
68 68
69#ifdef CONFIG_SYSCTL 69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h> 70#include <linux/sysctl.h>
@@ -527,7 +527,7 @@ static void rt6_probe_deferred(struct work_struct *w)
527 container_of(w, struct __rt6_probe_work, work); 527 container_of(w, struct __rt6_probe_work, work);
528 528
529 addrconf_addr_solict_mult(&work->target, &mcaddr); 529 addrconf_addr_solict_mult(&work->target, &mcaddr);
530 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL); 530 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
531 dev_put(work->dev); 531 dev_put(work->dev);
532 kfree(work); 532 kfree(work);
533} 533}
@@ -1408,7 +1408,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1408} 1408}
1409 1409
1410void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1410void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1411 int oif, u32 mark) 1411 int oif, u32 mark, kuid_t uid)
1412{ 1412{
1413 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1413 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1414 struct dst_entry *dst; 1414 struct dst_entry *dst;
@@ -1420,6 +1420,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1420 fl6.daddr = iph->daddr; 1420 fl6.daddr = iph->daddr;
1421 fl6.saddr = iph->saddr; 1421 fl6.saddr = iph->saddr;
1422 fl6.flowlabel = ip6_flowinfo(iph); 1422 fl6.flowlabel = ip6_flowinfo(iph);
1423 fl6.flowi6_uid = uid;
1423 1424
1424 dst = ip6_route_output(net, NULL, &fl6); 1425 dst = ip6_route_output(net, NULL, &fl6);
1425 if (!dst->error) 1426 if (!dst->error)
@@ -1433,7 +1434,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1433 struct dst_entry *dst; 1434 struct dst_entry *dst;
1434 1435
1435 ip6_update_pmtu(skb, sock_net(sk), mtu, 1436 ip6_update_pmtu(skb, sock_net(sk), mtu,
1436 sk->sk_bound_dev_if, sk->sk_mark); 1437 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1437 1438
1438 dst = __sk_dst_get(sk); 1439 dst = __sk_dst_get(sk);
1439 if (!dst || !dst->obsolete || 1440 if (!dst || !dst->obsolete ||
@@ -1525,7 +1526,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
1525 flags, __ip6_route_redirect); 1526 flags, __ip6_route_redirect);
1526} 1527}
1527 1528
1528void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1529void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1530 kuid_t uid)
1529{ 1531{
1530 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1532 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1531 struct dst_entry *dst; 1533 struct dst_entry *dst;
@@ -1538,6 +1540,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1538 fl6.daddr = iph->daddr; 1540 fl6.daddr = iph->daddr;
1539 fl6.saddr = iph->saddr; 1541 fl6.saddr = iph->saddr;
1540 fl6.flowlabel = ip6_flowinfo(iph); 1542 fl6.flowlabel = ip6_flowinfo(iph);
1543 fl6.flowi6_uid = uid;
1541 1544
1542 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 1545 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1543 rt6_do_redirect(dst, NULL, skb); 1546 rt6_do_redirect(dst, NULL, skb);
@@ -1559,6 +1562,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1559 fl6.flowi6_mark = mark; 1562 fl6.flowi6_mark = mark;
1560 fl6.daddr = msg->dest; 1563 fl6.daddr = msg->dest;
1561 fl6.saddr = iph->daddr; 1564 fl6.saddr = iph->daddr;
1565 fl6.flowi6_uid = sock_net_uid(net, NULL);
1562 1566
1563 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 1567 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1564 rt6_do_redirect(dst, NULL, skb); 1568 rt6_do_redirect(dst, NULL, skb);
@@ -1567,7 +1571,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1567 1571
1568void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1572void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1569{ 1573{
1570 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1574 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1575 sk->sk_uid);
1571} 1576}
1572EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1577EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1573 1578
@@ -1995,8 +2000,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1995 It is very good, but in some (rare!) circumstances 2000 It is very good, but in some (rare!) circumstances
1996 (SIT, PtP, NBMA NOARP links) it is handy to allow 2001 (SIT, PtP, NBMA NOARP links) it is handy to allow
1997 some exceptions. --ANK 2002 some exceptions. --ANK
2003 We allow IPv4-mapped nexthops to support RFC4798-type
2004 addressing
1998 */ 2005 */
1999 if (!(gwa_type & IPV6_ADDR_UNICAST)) 2006 if (!(gwa_type & (IPV6_ADDR_UNICAST |
2007 IPV6_ADDR_MAPPED)))
2000 goto out; 2008 goto out;
2001 2009
2002 if (cfg->fc_table) { 2010 if (cfg->fc_table) {
@@ -2166,6 +2174,8 @@ static int ip6_route_del(struct fib6_config *cfg)
2166 continue; 2174 continue;
2167 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 2175 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2168 continue; 2176 continue;
2177 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2178 continue;
2169 dst_hold(&rt->dst); 2179 dst_hold(&rt->dst);
2170 read_unlock_bh(&table->tb6_lock); 2180 read_unlock_bh(&table->tb6_lock);
2171 2181
@@ -2801,6 +2811,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2801 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 2811 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2802 [RTA_ENCAP] = { .type = NLA_NESTED }, 2812 [RTA_ENCAP] = { .type = NLA_NESTED },
2803 [RTA_EXPIRES] = { .type = NLA_U32 }, 2813 [RTA_EXPIRES] = { .type = NLA_U32 },
2814 [RTA_UID] = { .type = NLA_U32 },
2804}; 2815};
2805 2816
2806static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2817static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -3375,6 +3386,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3375 if (tb[RTA_MARK]) 3386 if (tb[RTA_MARK])
3376 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 3387 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3377 3388
3389 if (tb[RTA_UID])
3390 fl6.flowi6_uid = make_kuid(current_user_ns(),
3391 nla_get_u32(tb[RTA_UID]));
3392 else
3393 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3394
3378 if (iif) { 3395 if (iif) {
3379 struct net_device *dev; 3396 struct net_device *dev;
3380 int flags = 0; 3397 int flags = 0;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
new file mode 100644
index 000000000000..b172d85c650a
--- /dev/null
+++ b/net/ipv6/seg6.c
@@ -0,0 +1,495 @@
1/*
2 * SR-IPv6 implementation
3 *
4 * Author:
5 * David Lebrun <david.lebrun@uclouvain.be>
6 *
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/errno.h>
15#include <linux/types.h>
16#include <linux/socket.h>
17#include <linux/net.h>
18#include <linux/in6.h>
19#include <linux/slab.h>
20
21#include <net/ipv6.h>
22#include <net/protocol.h>
23
24#include <net/seg6.h>
25#include <net/genetlink.h>
26#include <linux/seg6.h>
27#include <linux/seg6_genl.h>
28#ifdef CONFIG_IPV6_SEG6_HMAC
29#include <net/seg6_hmac.h>
30#endif
31
32bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
33{
34 int trailing;
35 unsigned int tlv_offset;
36
37 if (srh->type != IPV6_SRCRT_TYPE_4)
38 return false;
39
40 if (((srh->hdrlen + 1) << 3) != len)
41 return false;
42
43 if (srh->segments_left != srh->first_segment)
44 return false;
45
46 tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
47
48 trailing = len - tlv_offset;
49 if (trailing < 0)
50 return false;
51
52 while (trailing) {
53 struct sr6_tlv *tlv;
54 unsigned int tlv_len;
55
56 tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset);
57 tlv_len = sizeof(*tlv) + tlv->len;
58
59 trailing -= tlv_len;
60 if (trailing < 0)
61 return false;
62
63 tlv_offset += tlv_len;
64 }
65
66 return true;
67}
68
69static struct genl_family seg6_genl_family;
70
71static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = {
72 [SEG6_ATTR_DST] = { .type = NLA_BINARY,
73 .len = sizeof(struct in6_addr) },
74 [SEG6_ATTR_DSTLEN] = { .type = NLA_S32, },
75 [SEG6_ATTR_HMACKEYID] = { .type = NLA_U32, },
76 [SEG6_ATTR_SECRET] = { .type = NLA_BINARY, },
77 [SEG6_ATTR_SECRETLEN] = { .type = NLA_U8, },
78 [SEG6_ATTR_ALGID] = { .type = NLA_U8, },
79 [SEG6_ATTR_HMACINFO] = { .type = NLA_NESTED, },
80};
81
82#ifdef CONFIG_IPV6_SEG6_HMAC
83
84static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
85{
86 struct net *net = genl_info_net(info);
87 struct seg6_pernet_data *sdata;
88 struct seg6_hmac_info *hinfo;
89 u32 hmackeyid;
90 char *secret;
91 int err = 0;
92 u8 algid;
93 u8 slen;
94
95 sdata = seg6_pernet(net);
96
97 if (!info->attrs[SEG6_ATTR_HMACKEYID] ||
98 !info->attrs[SEG6_ATTR_SECRETLEN] ||
99 !info->attrs[SEG6_ATTR_ALGID])
100 return -EINVAL;
101
102 hmackeyid = nla_get_u32(info->attrs[SEG6_ATTR_HMACKEYID]);
103 slen = nla_get_u8(info->attrs[SEG6_ATTR_SECRETLEN]);
104 algid = nla_get_u8(info->attrs[SEG6_ATTR_ALGID]);
105
106 if (hmackeyid == 0)
107 return -EINVAL;
108
109 if (slen > SEG6_HMAC_SECRET_LEN)
110 return -EINVAL;
111
112 mutex_lock(&sdata->lock);
113 hinfo = seg6_hmac_info_lookup(net, hmackeyid);
114
115 if (!slen) {
116 if (!hinfo)
117 err = -ENOENT;
118
119 err = seg6_hmac_info_del(net, hmackeyid);
120
121 goto out_unlock;
122 }
123
124 if (!info->attrs[SEG6_ATTR_SECRET]) {
125 err = -EINVAL;
126 goto out_unlock;
127 }
128
129 if (hinfo) {
130 err = seg6_hmac_info_del(net, hmackeyid);
131 if (err)
132 goto out_unlock;
133 }
134
135 secret = (char *)nla_data(info->attrs[SEG6_ATTR_SECRET]);
136
137 hinfo = kzalloc(sizeof(*hinfo), GFP_KERNEL);
138 if (!hinfo) {
139 err = -ENOMEM;
140 goto out_unlock;
141 }
142
143 memcpy(hinfo->secret, secret, slen);
144 hinfo->slen = slen;
145 hinfo->alg_id = algid;
146 hinfo->hmackeyid = hmackeyid;
147
148 err = seg6_hmac_info_add(net, hmackeyid, hinfo);
149 if (err)
150 kfree(hinfo);
151
152out_unlock:
153 mutex_unlock(&sdata->lock);
154 return err;
155}
156
157#else
158
159static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
160{
161 return -ENOTSUPP;
162}
163
164#endif
165
166static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info)
167{
168 struct net *net = genl_info_net(info);
169 struct in6_addr *val, *t_old, *t_new;
170 struct seg6_pernet_data *sdata;
171
172 sdata = seg6_pernet(net);
173
174 if (!info->attrs[SEG6_ATTR_DST])
175 return -EINVAL;
176
177 val = nla_data(info->attrs[SEG6_ATTR_DST]);
178 t_new = kmemdup(val, sizeof(*val), GFP_KERNEL);
179
180 mutex_lock(&sdata->lock);
181
182 t_old = sdata->tun_src;
183 rcu_assign_pointer(sdata->tun_src, t_new);
184
185 mutex_unlock(&sdata->lock);
186
187 synchronize_net();
188 kfree(t_old);
189
190 return 0;
191}
192
193static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
194{
195 struct net *net = genl_info_net(info);
196 struct in6_addr *tun_src;
197 struct sk_buff *msg;
198 void *hdr;
199
200 msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
201 if (!msg)
202 return -ENOMEM;
203
204 hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
205 &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC);
206 if (!hdr)
207 goto free_msg;
208
209 rcu_read_lock();
210 tun_src = rcu_dereference(seg6_pernet(net)->tun_src);
211
212 if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src))
213 goto nla_put_failure;
214
215 rcu_read_unlock();
216
217 genlmsg_end(msg, hdr);
218 genlmsg_reply(msg, info);
219
220 return 0;
221
222nla_put_failure:
223 rcu_read_unlock();
224 genlmsg_cancel(msg, hdr);
225free_msg:
226 nlmsg_free(msg);
227 return -ENOMEM;
228}
229
230#ifdef CONFIG_IPV6_SEG6_HMAC
231
232static int __seg6_hmac_fill_info(struct seg6_hmac_info *hinfo,
233 struct sk_buff *msg)
234{
235 if (nla_put_u32(msg, SEG6_ATTR_HMACKEYID, hinfo->hmackeyid) ||
236 nla_put_u8(msg, SEG6_ATTR_SECRETLEN, hinfo->slen) ||
237 nla_put(msg, SEG6_ATTR_SECRET, hinfo->slen, hinfo->secret) ||
238 nla_put_u8(msg, SEG6_ATTR_ALGID, hinfo->alg_id))
239 return -1;
240
241 return 0;
242}
243
244static int __seg6_genl_dumphmac_element(struct seg6_hmac_info *hinfo,
245 u32 portid, u32 seq, u32 flags,
246 struct sk_buff *skb, u8 cmd)
247{
248 void *hdr;
249
250 hdr = genlmsg_put(skb, portid, seq, &seg6_genl_family, flags, cmd);
251 if (!hdr)
252 return -ENOMEM;
253
254 if (__seg6_hmac_fill_info(hinfo, skb) < 0)
255 goto nla_put_failure;
256
257 genlmsg_end(skb, hdr);
258 return 0;
259
260nla_put_failure:
261 genlmsg_cancel(skb, hdr);
262 return -EMSGSIZE;
263}
264
265static int seg6_genl_dumphmac_start(struct netlink_callback *cb)
266{
267 struct net *net = sock_net(cb->skb->sk);
268 struct seg6_pernet_data *sdata;
269 struct rhashtable_iter *iter;
270
271 sdata = seg6_pernet(net);
272 iter = (struct rhashtable_iter *)cb->args[0];
273
274 if (!iter) {
275 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
276 if (!iter)
277 return -ENOMEM;
278
279 cb->args[0] = (long)iter;
280 }
281
282 rhashtable_walk_enter(&sdata->hmac_infos, iter);
283
284 return 0;
285}
286
287static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
288{
289 struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
290
291 rhashtable_walk_exit(iter);
292
293 kfree(iter);
294
295 return 0;
296}
297
298static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
299{
300 struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
301 struct net *net = sock_net(skb->sk);
302 struct seg6_pernet_data *sdata;
303 struct seg6_hmac_info *hinfo;
304 int ret;
305
306 sdata = seg6_pernet(net);
307
308 ret = rhashtable_walk_start(iter);
309 if (ret && ret != -EAGAIN)
310 goto done;
311
312 for (;;) {
313 hinfo = rhashtable_walk_next(iter);
314
315 if (IS_ERR(hinfo)) {
316 if (PTR_ERR(hinfo) == -EAGAIN)
317 continue;
318 ret = PTR_ERR(hinfo);
319 goto done;
320 } else if (!hinfo) {
321 break;
322 }
323
324 ret = __seg6_genl_dumphmac_element(hinfo,
325 NETLINK_CB(cb->skb).portid,
326 cb->nlh->nlmsg_seq,
327 NLM_F_MULTI,
328 skb, SEG6_CMD_DUMPHMAC);
329 if (ret)
330 goto done;
331 }
332
333 ret = skb->len;
334
335done:
336 rhashtable_walk_stop(iter);
337 return ret;
338}
339
340#else
341
342static int seg6_genl_dumphmac_start(struct netlink_callback *cb)
343{
344 return 0;
345}
346
347static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
348{
349 return 0;
350}
351
352static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
353{
354 return -ENOTSUPP;
355}
356
357#endif
358
359static int __net_init seg6_net_init(struct net *net)
360{
361 struct seg6_pernet_data *sdata;
362
363 sdata = kzalloc(sizeof(*sdata), GFP_KERNEL);
364 if (!sdata)
365 return -ENOMEM;
366
367 mutex_init(&sdata->lock);
368
369 sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL);
370 if (!sdata->tun_src) {
371 kfree(sdata);
372 return -ENOMEM;
373 }
374
375 net->ipv6.seg6_data = sdata;
376
377#ifdef CONFIG_IPV6_SEG6_HMAC
378 seg6_hmac_net_init(net);
379#endif
380
381 return 0;
382}
383
384static void __net_exit seg6_net_exit(struct net *net)
385{
386 struct seg6_pernet_data *sdata = seg6_pernet(net);
387
388#ifdef CONFIG_IPV6_SEG6_HMAC
389 seg6_hmac_net_exit(net);
390#endif
391
392 kfree(sdata->tun_src);
393 kfree(sdata);
394}
395
396static struct pernet_operations ip6_segments_ops = {
397 .init = seg6_net_init,
398 .exit = seg6_net_exit,
399};
400
401static const struct genl_ops seg6_genl_ops[] = {
402 {
403 .cmd = SEG6_CMD_SETHMAC,
404 .doit = seg6_genl_sethmac,
405 .policy = seg6_genl_policy,
406 .flags = GENL_ADMIN_PERM,
407 },
408 {
409 .cmd = SEG6_CMD_DUMPHMAC,
410 .start = seg6_genl_dumphmac_start,
411 .dumpit = seg6_genl_dumphmac,
412 .done = seg6_genl_dumphmac_done,
413 .policy = seg6_genl_policy,
414 .flags = GENL_ADMIN_PERM,
415 },
416 {
417 .cmd = SEG6_CMD_SET_TUNSRC,
418 .doit = seg6_genl_set_tunsrc,
419 .policy = seg6_genl_policy,
420 .flags = GENL_ADMIN_PERM,
421 },
422 {
423 .cmd = SEG6_CMD_GET_TUNSRC,
424 .doit = seg6_genl_get_tunsrc,
425 .policy = seg6_genl_policy,
426 .flags = GENL_ADMIN_PERM,
427 },
428};
429
430static struct genl_family seg6_genl_family __ro_after_init = {
431 .hdrsize = 0,
432 .name = SEG6_GENL_NAME,
433 .version = SEG6_GENL_VERSION,
434 .maxattr = SEG6_ATTR_MAX,
435 .netnsok = true,
436 .parallel_ops = true,
437 .ops = seg6_genl_ops,
438 .n_ops = ARRAY_SIZE(seg6_genl_ops),
439 .module = THIS_MODULE,
440};
441
442int __init seg6_init(void)
443{
444 int err = -ENOMEM;
445
446 err = genl_register_family(&seg6_genl_family);
447 if (err)
448 goto out;
449
450 err = register_pernet_subsys(&ip6_segments_ops);
451 if (err)
452 goto out_unregister_genl;
453
454#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
455 err = seg6_iptunnel_init();
456 if (err)
457 goto out_unregister_pernet;
458#endif
459
460#ifdef CONFIG_IPV6_SEG6_HMAC
461 err = seg6_hmac_init();
462 if (err)
463 goto out_unregister_iptun;
464#endif
465
466 pr_info("Segment Routing with IPv6\n");
467
468out:
469 return err;
470#ifdef CONFIG_IPV6_SEG6_HMAC
471out_unregister_iptun:
472#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
473 seg6_iptunnel_exit();
474#endif
475#endif
476#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
477out_unregister_pernet:
478 unregister_pernet_subsys(&ip6_segments_ops);
479#endif
480out_unregister_genl:
481 genl_unregister_family(&seg6_genl_family);
482 goto out;
483}
484
485void seg6_exit(void)
486{
487#ifdef CONFIG_IPV6_SEG6_HMAC
488 seg6_hmac_exit();
489#endif
490#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
491 seg6_iptunnel_exit();
492#endif
493 unregister_pernet_subsys(&ip6_segments_ops);
494 genl_unregister_family(&seg6_genl_family);
495}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
new file mode 100644
index 000000000000..ef1c8a46e7ac
--- /dev/null
+++ b/net/ipv6/seg6_hmac.c
@@ -0,0 +1,484 @@
1/*
2 * SR-IPv6 implementation -- HMAC functions
3 *
4 * Author:
5 * David Lebrun <david.lebrun@uclouvain.be>
6 *
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/errno.h>
15#include <linux/types.h>
16#include <linux/socket.h>
17#include <linux/sockios.h>
18#include <linux/net.h>
19#include <linux/netdevice.h>
20#include <linux/in6.h>
21#include <linux/icmpv6.h>
22#include <linux/mroute6.h>
23#include <linux/slab.h>
24
25#include <linux/netfilter.h>
26#include <linux/netfilter_ipv6.h>
27
28#include <net/sock.h>
29#include <net/snmp.h>
30
31#include <net/ipv6.h>
32#include <net/protocol.h>
33#include <net/transp_v6.h>
34#include <net/rawv6.h>
35#include <net/ndisc.h>
36#include <net/ip6_route.h>
37#include <net/addrconf.h>
38#include <net/xfrm.h>
39
40#include <linux/cryptohash.h>
41#include <crypto/hash.h>
42#include <crypto/sha.h>
43#include <net/seg6.h>
44#include <net/genetlink.h>
45#include <net/seg6_hmac.h>
46#include <linux/random.h>
47
48static char * __percpu *hmac_ring;
49
50static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
51{
52 const struct seg6_hmac_info *hinfo = obj;
53
54 return (hinfo->hmackeyid != *(__u32 *)arg->key);
55}
56
57static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo)
58{
59 kfree_rcu(hinfo, rcu);
60}
61
62static void seg6_free_hi(void *ptr, void *arg)
63{
64 struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr;
65
66 if (hinfo)
67 seg6_hinfo_release(hinfo);
68}
69
70static const struct rhashtable_params rht_params = {
71 .head_offset = offsetof(struct seg6_hmac_info, node),
72 .key_offset = offsetof(struct seg6_hmac_info, hmackeyid),
73 .key_len = sizeof(u32),
74 .automatic_shrinking = true,
75 .obj_cmpfn = seg6_hmac_cmpfn,
76};
77
78static struct seg6_hmac_algo hmac_algos[] = {
79 {
80 .alg_id = SEG6_HMAC_ALGO_SHA1,
81 .name = "hmac(sha1)",
82 },
83 {
84 .alg_id = SEG6_HMAC_ALGO_SHA256,
85 .name = "hmac(sha256)",
86 },
87};
88
89static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh)
90{
91 struct sr6_tlv_hmac *tlv;
92
93 if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5)
94 return NULL;
95
96 if (!sr_has_hmac(srh))
97 return NULL;
98
99 tlv = (struct sr6_tlv_hmac *)
100 ((char *)srh + ((srh->hdrlen + 1) << 3) - 40);
101
102 if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38)
103 return NULL;
104
105 return tlv;
106}
107
108static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id)
109{
110 struct seg6_hmac_algo *algo;
111 int i, alg_count;
112
113 alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
114 for (i = 0; i < alg_count; i++) {
115 algo = &hmac_algos[i];
116 if (algo->alg_id == alg_id)
117 return algo;
118 }
119
120 return NULL;
121}
122
123static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize,
124 u8 *output, int outlen)
125{
126 struct seg6_hmac_algo *algo;
127 struct crypto_shash *tfm;
128 struct shash_desc *shash;
129 int ret, dgsize;
130
131 algo = __hmac_get_algo(hinfo->alg_id);
132 if (!algo)
133 return -ENOENT;
134
135 tfm = *this_cpu_ptr(algo->tfms);
136
137 dgsize = crypto_shash_digestsize(tfm);
138 if (dgsize > outlen) {
139 pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n",
140 dgsize, outlen);
141 return -ENOMEM;
142 }
143
144 ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen);
145 if (ret < 0) {
146 pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret);
147 goto failed;
148 }
149
150 shash = *this_cpu_ptr(algo->shashs);
151 shash->tfm = tfm;
152
153 ret = crypto_shash_digest(shash, text, psize, output);
154 if (ret < 0) {
155 pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret);
156 goto failed;
157 }
158
159 return dgsize;
160
161failed:
162 return ret;
163}
164
165int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
166 struct in6_addr *saddr, u8 *output)
167{
168 __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid);
169 u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE];
170 int plen, i, dgsize, wrsize;
171 char *ring, *off;
172
173 /* a 160-byte buffer for digest output allows to store highest known
174 * hash function (RadioGatun) with up to 1216 bits
175 */
176
177 /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */
178 plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16;
179
180 /* this limit allows for 14 segments */
181 if (plen >= SEG6_HMAC_RING_SIZE)
182 return -EMSGSIZE;
183
184 /* Let's build the HMAC text on the ring buffer. The text is composed
185 * as follows, in order:
186 *
187 * 1. Source IPv6 address (128 bits)
188 * 2. first_segment value (8 bits)
189 * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0)
190 * 4. HMAC Key ID (32 bits)
191 * 5. All segments in the segments list (n * 128 bits)
192 */
193
194 local_bh_disable();
195 ring = *this_cpu_ptr(hmac_ring);
196 off = ring;
197
198 /* source address */
199 memcpy(off, saddr, 16);
200 off += 16;
201
202 /* first_segment value */
203 *off++ = hdr->first_segment;
204
205 /* cleanup flag */
206 *off++ = !!(sr_has_cleanup(hdr)) << 7;
207
208 /* HMAC Key ID */
209 memcpy(off, &hmackeyid, 4);
210 off += 4;
211
212 /* all segments in the list */
213 for (i = 0; i < hdr->first_segment + 1; i++) {
214 memcpy(off, hdr->segments + i, 16);
215 off += 16;
216 }
217
218 dgsize = __do_hmac(hinfo, ring, plen, tmp_out,
219 SEG6_HMAC_MAX_DIGESTSIZE);
220 local_bh_enable();
221
222 if (dgsize < 0)
223 return dgsize;
224
225 wrsize = SEG6_HMAC_FIELD_LEN;
226 if (wrsize > dgsize)
227 wrsize = dgsize;
228
229 memset(output, 0, SEG6_HMAC_FIELD_LEN);
230 memcpy(output, tmp_out, wrsize);
231
232 return 0;
233}
234EXPORT_SYMBOL(seg6_hmac_compute);
235
236/* checks if an incoming SR-enabled packet's HMAC status matches
237 * the incoming policy.
238 *
239 * called with rcu_read_lock()
240 */
241bool seg6_hmac_validate_skb(struct sk_buff *skb)
242{
243 u8 hmac_output[SEG6_HMAC_FIELD_LEN];
244 struct net *net = dev_net(skb->dev);
245 struct seg6_hmac_info *hinfo;
246 struct sr6_tlv_hmac *tlv;
247 struct ipv6_sr_hdr *srh;
248 struct inet6_dev *idev;
249
250 idev = __in6_dev_get(skb->dev);
251
252 srh = (struct ipv6_sr_hdr *)skb_transport_header(skb);
253
254 tlv = seg6_get_tlv_hmac(srh);
255
256 /* mandatory check but no tlv */
257 if (idev->cnf.seg6_require_hmac > 0 && !tlv)
258 return false;
259
260 /* no check */
261 if (idev->cnf.seg6_require_hmac < 0)
262 return true;
263
264 /* check only if present */
265 if (idev->cnf.seg6_require_hmac == 0 && !tlv)
266 return true;
267
268 /* now, seg6_require_hmac >= 0 && tlv */
269
270 hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid));
271 if (!hinfo)
272 return false;
273
274 if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output))
275 return false;
276
277 if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0)
278 return false;
279
280 return true;
281}
282EXPORT_SYMBOL(seg6_hmac_validate_skb);
283
284/* called with rcu_read_lock() */
285struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key)
286{
287 struct seg6_pernet_data *sdata = seg6_pernet(net);
288 struct seg6_hmac_info *hinfo;
289
290 hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params);
291
292 return hinfo;
293}
294EXPORT_SYMBOL(seg6_hmac_info_lookup);
295
296int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo)
297{
298 struct seg6_pernet_data *sdata = seg6_pernet(net);
299 int err;
300
301 err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node,
302 rht_params);
303
304 return err;
305}
306EXPORT_SYMBOL(seg6_hmac_info_add);
307
308int seg6_hmac_info_del(struct net *net, u32 key)
309{
310 struct seg6_pernet_data *sdata = seg6_pernet(net);
311 struct seg6_hmac_info *hinfo;
312 int err = -ENOENT;
313
314 hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params);
315 if (!hinfo)
316 goto out;
317
318 err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node,
319 rht_params);
320 if (err)
321 goto out;
322
323 seg6_hinfo_release(hinfo);
324
325out:
326 return err;
327}
328EXPORT_SYMBOL(seg6_hmac_info_del);
329
330int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
331 struct ipv6_sr_hdr *srh)
332{
333 struct seg6_hmac_info *hinfo;
334 struct sr6_tlv_hmac *tlv;
335 int err = -ENOENT;
336
337 tlv = seg6_get_tlv_hmac(srh);
338 if (!tlv)
339 return -EINVAL;
340
341 rcu_read_lock();
342
343 hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid));
344 if (!hinfo)
345 goto out;
346
347 memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN);
348 err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac);
349
350out:
351 rcu_read_unlock();
352 return err;
353}
354EXPORT_SYMBOL(seg6_push_hmac);
355
356static int seg6_hmac_init_ring(void)
357{
358 int i;
359
360 hmac_ring = alloc_percpu(char *);
361
362 if (!hmac_ring)
363 return -ENOMEM;
364
365 for_each_possible_cpu(i) {
366 char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL);
367
368 if (!ring)
369 return -ENOMEM;
370
371 *per_cpu_ptr(hmac_ring, i) = ring;
372 }
373
374 return 0;
375}
376
377static int seg6_hmac_init_algo(void)
378{
379 struct seg6_hmac_algo *algo;
380 struct crypto_shash *tfm;
381 struct shash_desc *shash;
382 int i, alg_count, cpu;
383
384 alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
385
386 for (i = 0; i < alg_count; i++) {
387 struct crypto_shash **p_tfm;
388 int shsize;
389
390 algo = &hmac_algos[i];
391 algo->tfms = alloc_percpu(struct crypto_shash *);
392 if (!algo->tfms)
393 return -ENOMEM;
394
395 for_each_possible_cpu(cpu) {
396 tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL);
397 if (IS_ERR(tfm))
398 return PTR_ERR(tfm);
399 p_tfm = per_cpu_ptr(algo->tfms, cpu);
400 *p_tfm = tfm;
401 }
402
403 p_tfm = this_cpu_ptr(algo->tfms);
404 tfm = *p_tfm;
405
406 shsize = sizeof(*shash) + crypto_shash_descsize(tfm);
407
408 algo->shashs = alloc_percpu(struct shash_desc *);
409 if (!algo->shashs)
410 return -ENOMEM;
411
412 for_each_possible_cpu(cpu) {
413 shash = kzalloc(shsize, GFP_KERNEL);
414 if (!shash)
415 return -ENOMEM;
416 *per_cpu_ptr(algo->shashs, cpu) = shash;
417 }
418 }
419
420 return 0;
421}
422
423int __init seg6_hmac_init(void)
424{
425 int ret;
426
427 ret = seg6_hmac_init_ring();
428 if (ret < 0)
429 goto out;
430
431 ret = seg6_hmac_init_algo();
432
433out:
434 return ret;
435}
436EXPORT_SYMBOL(seg6_hmac_init);
437
438int __net_init seg6_hmac_net_init(struct net *net)
439{
440 struct seg6_pernet_data *sdata = seg6_pernet(net);
441
442 rhashtable_init(&sdata->hmac_infos, &rht_params);
443
444 return 0;
445}
446EXPORT_SYMBOL(seg6_hmac_net_init);
447
448void seg6_hmac_exit(void)
449{
450 struct seg6_hmac_algo *algo = NULL;
451 int i, alg_count, cpu;
452
453 for_each_possible_cpu(i) {
454 char *ring = *per_cpu_ptr(hmac_ring, i);
455
456 kfree(ring);
457 }
458 free_percpu(hmac_ring);
459
460 alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
461 for (i = 0; i < alg_count; i++) {
462 algo = &hmac_algos[i];
463 for_each_possible_cpu(cpu) {
464 struct crypto_shash *tfm;
465 struct shash_desc *shash;
466
467 shash = *per_cpu_ptr(algo->shashs, cpu);
468 kfree(shash);
469 tfm = *per_cpu_ptr(algo->tfms, cpu);
470 crypto_free_shash(tfm);
471 }
472 free_percpu(algo->tfms);
473 free_percpu(algo->shashs);
474 }
475}
476EXPORT_SYMBOL(seg6_hmac_exit);
477
478void __net_exit seg6_hmac_net_exit(struct net *net)
479{
480 struct seg6_pernet_data *sdata = seg6_pernet(net);
481
482 rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL);
483}
484EXPORT_SYMBOL(seg6_hmac_net_exit);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
new file mode 100644
index 000000000000..bbfca22c34ae
--- /dev/null
+++ b/net/ipv6/seg6_iptunnel.c
@@ -0,0 +1,431 @@
1/*
2 * SR-IPv6 implementation
3 *
4 * Author:
5 * David Lebrun <david.lebrun@uclouvain.be>
6 *
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/types.h>
15#include <linux/skbuff.h>
16#include <linux/net.h>
17#include <linux/module.h>
18#include <net/ip.h>
19#include <net/lwtunnel.h>
20#include <net/netevent.h>
21#include <net/netns/generic.h>
22#include <net/ip6_fib.h>
23#include <net/route.h>
24#include <net/seg6.h>
25#include <linux/seg6.h>
26#include <linux/seg6_iptunnel.h>
27#include <net/addrconf.h>
28#include <net/ip6_route.h>
29#ifdef CONFIG_DST_CACHE
30#include <net/dst_cache.h>
31#endif
32#ifdef CONFIG_IPV6_SEG6_HMAC
33#include <net/seg6_hmac.h>
34#endif
35
36struct seg6_lwt {
37#ifdef CONFIG_DST_CACHE
38 struct dst_cache cache;
39#endif
40 struct seg6_iptunnel_encap tuninfo[0];
41};
42
43static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
44{
45 return (struct seg6_lwt *)lwt->data;
46}
47
48static inline struct seg6_iptunnel_encap *
49seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
50{
51 return seg6_lwt_lwtunnel(lwt)->tuninfo;
52}
53
54static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
55 [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY },
56};
57
58int nla_put_srh(struct sk_buff *skb, int attrtype,
59 struct seg6_iptunnel_encap *tuninfo)
60{
61 struct seg6_iptunnel_encap *data;
62 struct nlattr *nla;
63 int len;
64
65 len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
66
67 nla = nla_reserve(skb, attrtype, len);
68 if (!nla)
69 return -EMSGSIZE;
70
71 data = nla_data(nla);
72 memcpy(data, tuninfo, len);
73
74 return 0;
75}
76
77static void set_tun_src(struct net *net, struct net_device *dev,
78 struct in6_addr *daddr, struct in6_addr *saddr)
79{
80 struct seg6_pernet_data *sdata = seg6_pernet(net);
81 struct in6_addr *tun_src;
82
83 rcu_read_lock();
84
85 tun_src = rcu_dereference(sdata->tun_src);
86
87 if (!ipv6_addr_any(tun_src)) {
88 memcpy(saddr, tun_src, sizeof(struct in6_addr));
89 } else {
90 ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
91 saddr);
92 }
93
94 rcu_read_unlock();
95}
96
97/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
98static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
99{
100 struct net *net = dev_net(skb_dst(skb)->dev);
101 struct ipv6hdr *hdr, *inner_hdr;
102 struct ipv6_sr_hdr *isrh;
103 int hdrlen, tot_len, err;
104
105 hdrlen = (osrh->hdrlen + 1) << 3;
106 tot_len = hdrlen + sizeof(*hdr);
107
108 err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC);
109 if (unlikely(err))
110 return err;
111
112 inner_hdr = ipv6_hdr(skb);
113
114 skb_push(skb, tot_len);
115 skb_reset_network_header(skb);
116 skb_mac_header_rebuild(skb);
117 hdr = ipv6_hdr(skb);
118
119 /* inherit tc, flowlabel and hlim
120 * hlim will be decremented in ip6_forward() afterwards and
121 * decapsulation will overwrite inner hlim with outer hlim
122 */
123 ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
124 ip6_flowlabel(inner_hdr));
125 hdr->hop_limit = inner_hdr->hop_limit;
126 hdr->nexthdr = NEXTHDR_ROUTING;
127
128 isrh = (void *)hdr + sizeof(*hdr);
129 memcpy(isrh, osrh, hdrlen);
130
131 isrh->nexthdr = NEXTHDR_IPV6;
132
133 hdr->daddr = isrh->segments[isrh->first_segment];
134 set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
135
136#ifdef CONFIG_IPV6_SEG6_HMAC
137 if (sr_has_hmac(isrh)) {
138 err = seg6_push_hmac(net, &hdr->saddr, isrh);
139 if (unlikely(err))
140 return err;
141 }
142#endif
143
144 skb_postpush_rcsum(skb, hdr, tot_len);
145
146 return 0;
147}
148
149/* insert an SRH within an IPv6 packet, just after the IPv6 header */
150#ifdef CONFIG_IPV6_SEG6_INLINE
151static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
152{
153 struct ipv6hdr *hdr, *oldhdr;
154 struct ipv6_sr_hdr *isrh;
155 int hdrlen, err;
156
157 hdrlen = (osrh->hdrlen + 1) << 3;
158
159 err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC);
160 if (unlikely(err))
161 return err;
162
163 oldhdr = ipv6_hdr(skb);
164
165 skb_pull(skb, sizeof(struct ipv6hdr));
166 skb_postpull_rcsum(skb, skb_network_header(skb),
167 sizeof(struct ipv6hdr));
168
169 skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
170 skb_reset_network_header(skb);
171 skb_mac_header_rebuild(skb);
172
173 hdr = ipv6_hdr(skb);
174
175 memmove(hdr, oldhdr, sizeof(*hdr));
176
177 isrh = (void *)hdr + sizeof(*hdr);
178 memcpy(isrh, osrh, hdrlen);
179
180 isrh->nexthdr = hdr->nexthdr;
181 hdr->nexthdr = NEXTHDR_ROUTING;
182
183 isrh->segments[0] = hdr->daddr;
184 hdr->daddr = isrh->segments[isrh->first_segment];
185
186#ifdef CONFIG_IPV6_SEG6_HMAC
187 if (sr_has_hmac(isrh)) {
188 struct net *net = dev_net(skb_dst(skb)->dev);
189
190 err = seg6_push_hmac(net, &hdr->saddr, isrh);
191 if (unlikely(err))
192 return err;
193 }
194#endif
195
196 skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
197
198 return 0;
199}
200#endif
201
202static int seg6_do_srh(struct sk_buff *skb)
203{
204 struct dst_entry *dst = skb_dst(skb);
205 struct seg6_iptunnel_encap *tinfo;
206 int err = 0;
207
208 tinfo = seg6_encap_lwtunnel(dst->lwtstate);
209
210 if (likely(!skb->encapsulation)) {
211 skb_reset_inner_headers(skb);
212 skb->encapsulation = 1;
213 }
214
215 switch (tinfo->mode) {
216#ifdef CONFIG_IPV6_SEG6_INLINE
217 case SEG6_IPTUN_MODE_INLINE:
218 err = seg6_do_srh_inline(skb, tinfo->srh);
219 skb_reset_inner_headers(skb);
220 break;
221#endif
222 case SEG6_IPTUN_MODE_ENCAP:
223 err = seg6_do_srh_encap(skb, tinfo->srh);
224 break;
225 }
226
227 if (err)
228 return err;
229
230 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
231 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
232
233 skb_set_inner_protocol(skb, skb->protocol);
234
235 return 0;
236}
237
238int seg6_input(struct sk_buff *skb)
239{
240 int err;
241
242 err = seg6_do_srh(skb);
243 if (unlikely(err)) {
244 kfree_skb(skb);
245 return err;
246 }
247
248 skb_dst_drop(skb);
249 ip6_route_input(skb);
250
251 return dst_input(skb);
252}
253
254int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
255{
256 struct dst_entry *orig_dst = skb_dst(skb);
257 struct dst_entry *dst = NULL;
258 struct seg6_lwt *slwt;
259 int err = -EINVAL;
260
261 err = seg6_do_srh(skb);
262 if (unlikely(err))
263 goto drop;
264
265 slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
266
267#ifdef CONFIG_DST_CACHE
268 dst = dst_cache_get(&slwt->cache);
269#endif
270
271 if (unlikely(!dst)) {
272 struct ipv6hdr *hdr = ipv6_hdr(skb);
273 struct flowi6 fl6;
274
275 fl6.daddr = hdr->daddr;
276 fl6.saddr = hdr->saddr;
277 fl6.flowlabel = ip6_flowinfo(hdr);
278 fl6.flowi6_mark = skb->mark;
279 fl6.flowi6_proto = hdr->nexthdr;
280
281 dst = ip6_route_output(net, NULL, &fl6);
282 if (dst->error) {
283 err = dst->error;
284 dst_release(dst);
285 goto drop;
286 }
287
288#ifdef CONFIG_DST_CACHE
289 dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
290#endif
291 }
292
293 skb_dst_drop(skb);
294 skb_dst_set(skb, dst);
295
296 return dst_output(net, sk, skb);
297drop:
298 kfree_skb(skb);
299 return err;
300}
301
302static int seg6_build_state(struct net_device *dev, struct nlattr *nla,
303 unsigned int family, const void *cfg,
304 struct lwtunnel_state **ts)
305{
306 struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
307 struct seg6_iptunnel_encap *tuninfo;
308 struct lwtunnel_state *newts;
309 int tuninfo_len, min_size;
310 struct seg6_lwt *slwt;
311 int err;
312
313 err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
314 seg6_iptunnel_policy);
315
316 if (err < 0)
317 return err;
318
319 if (!tb[SEG6_IPTUNNEL_SRH])
320 return -EINVAL;
321
322 tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
323 tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
324
325 /* tuninfo must contain at least the iptunnel encap structure,
326 * the SRH and one segment
327 */
328 min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
329 sizeof(struct in6_addr);
330 if (tuninfo_len < min_size)
331 return -EINVAL;
332
333 switch (tuninfo->mode) {
334#ifdef CONFIG_IPV6_SEG6_INLINE
335 case SEG6_IPTUN_MODE_INLINE:
336 break;
337#endif
338 case SEG6_IPTUN_MODE_ENCAP:
339 break;
340 default:
341 return -EINVAL;
342 }
343
344 /* verify that SRH is consistent */
345 if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
346 return -EINVAL;
347
348 newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
349 if (!newts)
350 return -ENOMEM;
351
352 slwt = seg6_lwt_lwtunnel(newts);
353
354#ifdef CONFIG_DST_CACHE
355 err = dst_cache_init(&slwt->cache, GFP_KERNEL);
356 if (err) {
357 kfree(newts);
358 return err;
359 }
360#endif
361
362 memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
363
364 newts->type = LWTUNNEL_ENCAP_SEG6;
365 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
366 LWTUNNEL_STATE_INPUT_REDIRECT;
367 newts->headroom = seg6_lwt_headroom(tuninfo);
368
369 *ts = newts;
370
371 return 0;
372}
373
374#ifdef CONFIG_DST_CACHE
375static void seg6_destroy_state(struct lwtunnel_state *lwt)
376{
377 dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
378}
379#endif
380
381static int seg6_fill_encap_info(struct sk_buff *skb,
382 struct lwtunnel_state *lwtstate)
383{
384 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
385
386 if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
387 return -EMSGSIZE;
388
389 return 0;
390}
391
392static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
393{
394 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
395
396 return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
397}
398
399static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
400{
401 struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
402 struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
403 int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
404
405 if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
406 return 1;
407
408 return memcmp(a_hdr, b_hdr, len);
409}
410
411static const struct lwtunnel_encap_ops seg6_iptun_ops = {
412 .build_state = seg6_build_state,
413#ifdef CONFIG_DST_CACHE
414 .destroy_state = seg6_destroy_state,
415#endif
416 .output = seg6_output,
417 .input = seg6_input,
418 .fill_encap = seg6_fill_encap_info,
419 .get_encap_size = seg6_encap_nlsize,
420 .cmp_encap = seg6_encap_cmp,
421};
422
423int __init seg6_iptunnel_init(void)
424{
425 return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
426}
427
428void seg6_iptunnel_exit(void)
429{
430 lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
431}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b1cdf8009d29..fad992ad4bc8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -31,7 +31,7 @@
31#include <linux/if_arp.h> 31#include <linux/if_arp.h>
32#include <linux/icmp.h> 32#include <linux/icmp.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <asm/uaccess.h> 34#include <linux/uaccess.h>
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/netfilter_ipv4.h> 36#include <linux/netfilter_ipv4.h>
37#include <linux/if_ether.h> 37#include <linux/if_ether.h>
@@ -76,7 +76,7 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
76 __be32 *v4dst); 76 __be32 *v4dst);
77static struct rtnl_link_ops sit_link_ops __read_mostly; 77static struct rtnl_link_ops sit_link_ops __read_mostly;
78 78
79static int sit_net_id __read_mostly; 79static unsigned int sit_net_id __read_mostly;
80struct sit_net { 80struct sit_net {
81 struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE]; 81 struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
82 struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE]; 82 struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
@@ -1318,23 +1318,11 @@ done:
1318 return err; 1318 return err;
1319} 1319}
1320 1320
1321static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1322{
1323 struct ip_tunnel *tunnel = netdev_priv(dev);
1324 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1325
1326 if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen)
1327 return -EINVAL;
1328 dev->mtu = new_mtu;
1329 return 0;
1330}
1331
1332static const struct net_device_ops ipip6_netdev_ops = { 1321static const struct net_device_ops ipip6_netdev_ops = {
1333 .ndo_init = ipip6_tunnel_init, 1322 .ndo_init = ipip6_tunnel_init,
1334 .ndo_uninit = ipip6_tunnel_uninit, 1323 .ndo_uninit = ipip6_tunnel_uninit,
1335 .ndo_start_xmit = sit_tunnel_xmit, 1324 .ndo_start_xmit = sit_tunnel_xmit,
1336 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1325 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1337 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1338 .ndo_get_stats64 = ip_tunnel_get_stats64, 1326 .ndo_get_stats64 = ip_tunnel_get_stats64,
1339 .ndo_get_iflink = ip_tunnel_get_iflink, 1327 .ndo_get_iflink = ip_tunnel_get_iflink,
1340}; 1328};
@@ -1365,6 +1353,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
1365 dev->type = ARPHRD_SIT; 1353 dev->type = ARPHRD_SIT;
1366 dev->hard_header_len = LL_MAX_HEADER + t_hlen; 1354 dev->hard_header_len = LL_MAX_HEADER + t_hlen;
1367 dev->mtu = ETH_DATA_LEN - t_hlen; 1355 dev->mtu = ETH_DATA_LEN - t_hlen;
1356 dev->min_mtu = IPV6_MIN_MTU;
1357 dev->max_mtu = 0xFFF8 - t_hlen;
1368 dev->flags = IFF_NOARP; 1358 dev->flags = IFF_NOARP;
1369 netif_keep_dst(dev); 1359 netif_keep_dst(dev);
1370 dev->addr_len = 4; 1360 dev->addr_len = 4;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 59c483937aec..a4d49760bf43 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -209,6 +209,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
209 treq->snt_synack.v64 = 0; 209 treq->snt_synack.v64 = 0;
210 treq->rcv_isn = ntohl(th->seq) - 1; 210 treq->rcv_isn = ntohl(th->seq) - 1;
211 treq->snt_isn = cookie; 211 treq->snt_isn = cookie;
212 treq->ts_off = 0;
212 213
213 /* 214 /*
214 * We need to lookup the dst_entry to get the correct window size. 215 * We need to lookup the dst_entry to get the correct window size.
@@ -227,6 +228,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
227 fl6.flowi6_mark = ireq->ir_mark; 228 fl6.flowi6_mark = ireq->ir_mark;
228 fl6.fl6_dport = ireq->ir_rmt_port; 229 fl6.fl6_dport = ireq->ir_rmt_port;
229 fl6.fl6_sport = inet_sk(sk)->inet_sport; 230 fl6.fl6_sport = inet_sk(sk)->inet_sport;
231 fl6.flowi6_uid = sk->sk_uid;
230 security_req_classify_flow(req, flowi6_to_flowi(&fl6)); 232 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
231 233
232 dst = ip6_dst_lookup_flow(sk, &fl6, final_p); 234 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b9f1fee9a886..73bc8fc68acd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
101 } 101 }
102} 102}
103 103
104static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) 104static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
105{ 105{
106 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, 106 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32, 107 ipv6_hdr(skb)->saddr.s6_addr32,
108 tcp_hdr(skb)->dest, 108 tcp_hdr(skb)->dest,
109 tcp_hdr(skb)->source); 109 tcp_hdr(skb)->source, tsoff);
110} 110}
111 111
112static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 112static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -233,6 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
233 fl6.flowi6_mark = sk->sk_mark; 233 fl6.flowi6_mark = sk->sk_mark;
234 fl6.fl6_dport = usin->sin6_port; 234 fl6.fl6_dport = usin->sin6_port;
235 fl6.fl6_sport = inet->inet_sport; 235 fl6.fl6_sport = inet->inet_sport;
236 fl6.flowi6_uid = sk->sk_uid;
236 237
237 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 238 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
238 final_p = fl6_update_dst(&fl6, opt, &final); 239 final_p = fl6_update_dst(&fl6, opt, &final);
@@ -282,7 +283,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
282 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, 283 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
283 sk->sk_v6_daddr.s6_addr32, 284 sk->sk_v6_daddr.s6_addr32,
284 inet->inet_sport, 285 inet->inet_sport,
285 inet->inet_dport); 286 inet->inet_dport,
287 &tp->tsoffset);
286 288
287 err = tcp_connect(sk); 289 err = tcp_connect(sk);
288 if (err) 290 if (err)
@@ -397,7 +399,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
397 if (!sock_owned_by_user(sk)) 399 if (!sock_owned_by_user(sk))
398 tcp_v6_mtu_reduced(sk); 400 tcp_v6_mtu_reduced(sk);
399 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 401 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
400 &tp->tsq_flags)) 402 &sk->sk_tsq_flags))
401 sock_hold(sk); 403 sock_hold(sk);
402 goto out; 404 goto out;
403 } 405 }
@@ -828,6 +830,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
828 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); 830 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
829 fl6.fl6_dport = t1->dest; 831 fl6.fl6_dport = t1->dest;
830 fl6.fl6_sport = t1->source; 832 fl6.fl6_sport = t1->source;
833 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
831 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 834 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
832 835
833 /* Pass a socket to ip6_dst_lookup either it is for RST 836 /* Pass a socket to ip6_dst_lookup either it is for RST
@@ -954,7 +957,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
954 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 957 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
955 tcp_rsk(req)->rcv_nxt, 958 tcp_rsk(req)->rcv_nxt,
956 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 959 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
957 tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, 960 tcp_time_stamp + tcp_rsk(req)->ts_off,
961 req->ts_recent, sk->sk_bound_dev_if,
958 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 962 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
959 0, 0); 963 0, 0);
960} 964}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e4a8000d59ad..4d5c4eee4b3f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -35,7 +35,7 @@
35#include <linux/module.h> 35#include <linux/module.h>
36#include <linux/skbuff.h> 36#include <linux/skbuff.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <asm/uaccess.h> 38#include <linux/uaccess.h>
39 39
40#include <net/addrconf.h> 40#include <net/addrconf.h>
41#include <net/ndisc.h> 41#include <net/ndisc.h>
@@ -302,7 +302,8 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
302 * Does increment socket refcount. 302 * Does increment socket refcount.
303 */ 303 */
304#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ 304#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
305 IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) 305 IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
306 IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
306struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, 307struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
307 const struct in6_addr *daddr, __be16 dport, int dif) 308 const struct in6_addr *daddr, __be16 dport, int dif)
308{ 309{
@@ -334,7 +335,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
334 int is_udplite = IS_UDPLITE(sk); 335 int is_udplite = IS_UDPLITE(sk);
335 bool checksum_valid = false; 336 bool checksum_valid = false;
336 int is_udp4; 337 int is_udp4;
337 bool slow;
338 338
339 if (flags & MSG_ERRQUEUE) 339 if (flags & MSG_ERRQUEUE)
340 return ipv6_recv_error(sk, msg, len, addr_len); 340 return ipv6_recv_error(sk, msg, len, addr_len);
@@ -344,8 +344,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
344 344
345try_again: 345try_again:
346 peeking = off = sk_peek_offset(sk, flags); 346 peeking = off = sk_peek_offset(sk, flags);
347 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 347 skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
348 &peeked, &off, &err);
349 if (!skb) 348 if (!skb)
350 return err; 349 return err;
351 350
@@ -364,7 +363,8 @@ try_again:
364 * coverage checksum (UDP-Lite), do it before the copy. 363 * coverage checksum (UDP-Lite), do it before the copy.
365 */ 364 */
366 365
367 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) { 366 if (copied < ulen || peeking ||
367 (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
368 checksum_valid = !udp_lib_checksum_complete(skb); 368 checksum_valid = !udp_lib_checksum_complete(skb);
369 if (!checksum_valid) 369 if (!checksum_valid)
370 goto csum_copy_err; 370 goto csum_copy_err;
@@ -378,7 +378,6 @@ try_again:
378 goto csum_copy_err; 378 goto csum_copy_err;
379 } 379 }
380 if (unlikely(err)) { 380 if (unlikely(err)) {
381 trace_kfree_skb(skb, udpv6_recvmsg);
382 if (!peeked) { 381 if (!peeked) {
383 atomic_inc(&sk->sk_drops); 382 atomic_inc(&sk->sk_drops);
384 if (is_udp4) 383 if (is_udp4)
@@ -388,7 +387,7 @@ try_again:
388 UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, 387 UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
389 is_udplite); 388 is_udplite);
390 } 389 }
391 skb_free_datagram_locked(sk, skb); 390 kfree_skb(skb);
392 return err; 391 return err;
393 } 392 }
394 if (!peeked) { 393 if (!peeked) {
@@ -427,7 +426,7 @@ try_again:
427 426
428 if (is_udp4) { 427 if (is_udp4) {
429 if (inet->cmsg_flags) 428 if (inet->cmsg_flags)
430 ip_cmsg_recv_offset(msg, skb, 429 ip_cmsg_recv_offset(msg, sk, skb,
431 sizeof(struct udphdr), off); 430 sizeof(struct udphdr), off);
432 } else { 431 } else {
433 if (np->rxopt.all) 432 if (np->rxopt.all)
@@ -438,12 +437,11 @@ try_again:
438 if (flags & MSG_TRUNC) 437 if (flags & MSG_TRUNC)
439 err = ulen; 438 err = ulen;
440 439
441 __skb_free_datagram_locked(sk, skb, peeking ? -err : err); 440 skb_consume_udp(sk, skb, peeking ? -err : err);
442 return err; 441 return err;
443 442
444csum_copy_err: 443csum_copy_err:
445 slow = lock_sock_fast(sk); 444 if (!__sk_queue_drop_skb(sk, skb, flags)) {
446 if (!skb_kill_datagram(sk, skb, flags)) {
447 if (is_udp4) { 445 if (is_udp4) {
448 UDP_INC_STATS(sock_net(sk), 446 UDP_INC_STATS(sock_net(sk),
449 UDP_MIB_CSUMERRORS, is_udplite); 447 UDP_MIB_CSUMERRORS, is_udplite);
@@ -456,7 +454,7 @@ csum_copy_err:
456 UDP_MIB_INERRORS, is_udplite); 454 UDP_MIB_INERRORS, is_udplite);
457 } 455 }
458 } 456 }
459 unlock_sock_fast(sk, slow); 457 kfree_skb(skb);
460 458
461 /* starting over for a new packet, but check if we need to yield */ 459 /* starting over for a new packet, but check if we need to yield */
462 cond_resched(); 460 cond_resched();
@@ -522,9 +520,11 @@ int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
522 sock_rps_save_rxhash(sk, skb); 520 sock_rps_save_rxhash(sk, skb);
523 sk_mark_napi_id(sk, skb); 521 sk_mark_napi_id(sk, skb);
524 sk_incoming_cpu_update(sk); 522 sk_incoming_cpu_update(sk);
523 } else {
524 sk_mark_napi_id_once(sk, skb);
525 } 525 }
526 526
527 rc = __sock_queue_rcv_skb(sk, skb); 527 rc = __udp_enqueue_schedule_skb(sk, skb);
528 if (rc < 0) { 528 if (rc < 0) {
529 int is_udplite = IS_UDPLITE(sk); 529 int is_udplite = IS_UDPLITE(sk);
530 530
@@ -536,6 +536,7 @@ int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
536 kfree_skb(skb); 536 kfree_skb(skb);
537 return -1; 537 return -1;
538 } 538 }
539
539 return 0; 540 return 0;
540} 541}
541 542
@@ -557,7 +558,6 @@ EXPORT_SYMBOL(udpv6_encap_enable);
557int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 558int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
558{ 559{
559 struct udp_sock *up = udp_sk(sk); 560 struct udp_sock *up = udp_sk(sk);
560 int rc;
561 int is_udplite = IS_UDPLITE(sk); 561 int is_udplite = IS_UDPLITE(sk);
562 562
563 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 563 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
@@ -623,25 +623,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
623 goto drop; 623 goto drop;
624 624
625 udp_csum_pull_header(skb); 625 udp_csum_pull_header(skb);
626 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
627 __UDP6_INC_STATS(sock_net(sk),
628 UDP_MIB_RCVBUFERRORS, is_udplite);
629 goto drop;
630 }
631 626
632 skb_dst_drop(skb); 627 skb_dst_drop(skb);
633 628
634 bh_lock_sock(sk); 629 return __udpv6_queue_rcv_skb(sk, skb);
635 rc = 0;
636 if (!sock_owned_by_user(sk))
637 rc = __udpv6_queue_rcv_skb(sk, skb);
638 else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
639 bh_unlock_sock(sk);
640 goto drop;
641 }
642 bh_unlock_sock(sk);
643
644 return rc;
645 630
646csum_error: 631csum_error:
647 __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 632 __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
@@ -1156,6 +1141,7 @@ do_udp_sendmsg:
1156 fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; 1141 fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
1157 1142
1158 fl6.flowi6_mark = sk->sk_mark; 1143 fl6.flowi6_mark = sk->sk_mark;
1144 fl6.flowi6_uid = sk->sk_uid;
1159 sockc.tsflags = sk->sk_tsflags; 1145 sockc.tsflags = sk->sk_tsflags;
1160 1146
1161 if (msg->msg_controllen) { 1147 if (msg->msg_controllen) {
@@ -1434,12 +1420,12 @@ struct proto udpv6_prot = {
1434 .connect = ip6_datagram_connect, 1420 .connect = ip6_datagram_connect,
1435 .disconnect = udp_disconnect, 1421 .disconnect = udp_disconnect,
1436 .ioctl = udp_ioctl, 1422 .ioctl = udp_ioctl,
1423 .init = udp_init_sock,
1437 .destroy = udpv6_destroy_sock, 1424 .destroy = udpv6_destroy_sock,
1438 .setsockopt = udpv6_setsockopt, 1425 .setsockopt = udpv6_setsockopt,
1439 .getsockopt = udpv6_getsockopt, 1426 .getsockopt = udpv6_getsockopt,
1440 .sendmsg = udpv6_sendmsg, 1427 .sendmsg = udpv6_sendmsg,
1441 .recvmsg = udpv6_recvmsg, 1428 .recvmsg = udpv6_recvmsg,
1442 .backlog_rcv = __udpv6_queue_rcv_skb,
1443 .release_cb = ip6_datagram_release_cb, 1429 .release_cb = ip6_datagram_release_cb,
1444 .hash = udp_lib_hash, 1430 .hash = udp_lib_hash,
1445 .unhash = udp_lib_unhash, 1431 .unhash = udp_lib_unhash,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 2f5101a12283..2784cc363f2b 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -45,10 +45,11 @@ struct proto udplitev6_prot = {
45 .getsockopt = udpv6_getsockopt, 45 .getsockopt = udpv6_getsockopt,
46 .sendmsg = udpv6_sendmsg, 46 .sendmsg = udpv6_sendmsg,
47 .recvmsg = udpv6_recvmsg, 47 .recvmsg = udpv6_recvmsg,
48 .backlog_rcv = __udpv6_queue_rcv_skb,
49 .hash = udp_lib_hash, 48 .hash = udp_lib_hash,
50 .unhash = udp_lib_unhash, 49 .unhash = udp_lib_unhash,
51 .get_port = udp_v6_get_port, 50 .get_port = udp_v6_get_port,
51 .memory_allocated = &udp_memory_allocated,
52 .sysctl_mem = sysctl_udp_mem,
52 .obj_size = sizeof(struct udp6_sock), 53 .obj_size = sizeof(struct udp6_sock),
53 .h.udp_table = &udplite_table, 54 .h.udp_table = &udplite_table,
54#ifdef CONFIG_COMPAT 55#ifdef CONFIG_COMPAT
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index e1c0bbe7996c..d7b731a78d09 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -44,7 +44,7 @@ struct xfrm6_tunnel_net {
44 u32 spi; 44 u32 spi;
45}; 45};
46 46
47static int xfrm6_tunnel_net_id __read_mostly; 47static unsigned int xfrm6_tunnel_net_id __read_mostly;
48static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net) 48static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net)
49{ 49{
50 return net_generic(net, xfrm6_tunnel_net_id); 50 return net_generic(net, xfrm6_tunnel_net_id);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 48d0dc89b58d..8a9219ff2e77 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -56,7 +56,7 @@
56#include <net/tcp_states.h> 56#include <net/tcp_states.h>
57#include <net/net_namespace.h> 57#include <net/net_namespace.h>
58 58
59#include <asm/uaccess.h> 59#include <linux/uaccess.h>
60 60
61/* Configuration Variables */ 61/* Configuration Variables */
62static unsigned char ipxcfg_max_hops = 16; 62static unsigned char ipxcfg_max_hops = 16;
@@ -1809,7 +1809,7 @@ static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
1809 rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied); 1809 rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied);
1810 if (rc) 1810 if (rc)
1811 goto out_free; 1811 goto out_free;
1812 if (skb->tstamp.tv64) 1812 if (skb->tstamp)
1813 sk->sk_stamp = skb->tstamp; 1813 sk->sk_stamp = skb->tstamp;
1814 1814
1815 if (sipx) { 1815 if (sipx) {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 391c3cbd2eed..ab254041dab7 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -52,7 +52,7 @@
52#include <linux/poll.h> 52#include <linux/poll.h>
53 53
54#include <asm/ioctls.h> /* TIOCOUTQ, TIOCINQ */ 54#include <asm/ioctls.h> /* TIOCOUTQ, TIOCINQ */
55#include <asm/uaccess.h> 55#include <linux/uaccess.h>
56 56
57#include <net/sock.h> 57#include <net/sock.h>
58#include <net/tcp_states.h> 58#include <net/tcp_states.h>
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 873c4b707d6a..817b1b186aff 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -40,7 +40,7 @@
40#include <linux/interrupt.h> 40#include <linux/interrupt.h>
41#include <linux/device.h> /* for MODULE_ALIAS_CHARDEV_MAJOR */ 41#include <linux/device.h> /* for MODULE_ALIAS_CHARDEV_MAJOR */
42 42
43#include <asm/uaccess.h> 43#include <linux/uaccess.h>
44 44
45#include <net/irda/irda.h> 45#include <net/irda/irda.h>
46#include <net/irda/irmod.h> 46#include <net/irda/irmod.h>
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 8f5678cb6263..f18070118d05 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -32,7 +32,7 @@
32#include <linux/tty.h> 32#include <linux/tty.h>
33#include <linux/serial.h> 33#include <linux/serial.h>
34 34
35#include <asm/uaccess.h> 35#include <linux/uaccess.h>
36 36
37#include <net/irda/irda.h> 37#include <net/irda/irda.h>
38#include <net/irda/irmod.h> 38#include <net/irda/irmod.h>
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 856736656a30..890b90d055d5 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -43,7 +43,7 @@
43#include <linux/export.h> 43#include <linux/export.h>
44 44
45#include <asm/ioctls.h> 45#include <asm/ioctls.h>
46#include <asm/uaccess.h> 46#include <linux/uaccess.h>
47#include <asm/dma.h> 47#include <asm/dma.h>
48#include <asm/io.h> 48#include <asm/io.h>
49 49
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index d8b7267280c3..74d09f91709e 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -51,7 +51,6 @@ static const struct net_device_ops irlan_eth_netdev_ops = {
51 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
52 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
53 .ndo_set_rx_mode = irlan_eth_set_multicast_list, 53 .ndo_set_rx_mode = irlan_eth_set_multicast_list,
54 .ndo_change_mtu = eth_change_mtu,
55 .ndo_validate_addr = eth_validate_addr, 54 .ndo_validate_addr = eth_validate_addr,
56}; 55};
57 56
@@ -67,7 +66,8 @@ static void irlan_eth_setup(struct net_device *dev)
67 66
68 dev->netdev_ops = &irlan_eth_netdev_ops; 67 dev->netdev_ops = &irlan_eth_netdev_ops;
69 dev->destructor = free_netdev; 68 dev->destructor = free_netdev;
70 69 dev->min_mtu = 0;
70 dev->max_mtu = ETH_MAX_MTU;
71 71
72 /* 72 /*
73 * Lets do all queueing in IrTTP instead of this device driver. 73 * Lets do all queueing in IrTTP instead of this device driver.
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 8d65bb9477fc..9d451f8ed47a 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -245,12 +245,11 @@
245#include <linux/tty.h> 245#include <linux/tty.h>
246#include <linux/proc_fs.h> 246#include <linux/proc_fs.h>
247#include <linux/netdevice.h> 247#include <linux/netdevice.h>
248#include <linux/miscdevice.h>
249#include <linux/poll.h> 248#include <linux/poll.h>
250#include <linux/capability.h> 249#include <linux/capability.h>
251#include <linux/ctype.h> /* isspace() */ 250#include <linux/ctype.h> /* isspace() */
252#include <linux/string.h> /* skip_spaces() */ 251#include <linux/string.h> /* skip_spaces() */
253#include <asm/uaccess.h> 252#include <linux/uaccess.h>
254#include <linux/init.h> 253#include <linux/init.h>
255 254
256#include <linux/ppp_defs.h> 255#include <linux/ppp_defs.h>
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
index 940225866da0..32061442cc8e 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/net/irda/irnet/irnet_ppp.h
@@ -15,13 +15,10 @@
15/***************************** INCLUDES *****************************/ 15/***************************** INCLUDES *****************************/
16 16
17#include "irnet.h" /* Module global include */ 17#include "irnet.h" /* Module global include */
18#include <linux/miscdevice.h>
18 19
19/************************ CONSTANTS & MACROS ************************/ 20/************************ CONSTANTS & MACROS ************************/
20 21
21/* /dev/irnet file constants */
22#define IRNET_MAJOR 10 /* Misc range */
23#define IRNET_MINOR 187 /* Official allocation */
24
25/* IrNET control channel stuff */ 22/* IrNET control channel stuff */
26#define IRNET_MAX_COMMAND 256 /* Max length of a command line */ 23#define IRNET_MAX_COMMAND 256 /* Max length of a command line */
27 24
@@ -111,9 +108,9 @@ static const struct file_operations irnet_device_fops =
111/* Structure so that the misc major (drivers/char/misc.c) take care of us... */ 108/* Structure so that the misc major (drivers/char/misc.c) take care of us... */
112static struct miscdevice irnet_misc_device = 109static struct miscdevice irnet_misc_device =
113{ 110{
114 IRNET_MINOR, 111 .minor = IRNET_MINOR,
115 "irnet", 112 .name = "irnet",
116 &irnet_device_fops 113 .fops = &irnet_device_fops
117}; 114};
118 115
119#endif /* IRNET_PPP_H */ 116#endif /* IRNET_PPP_H */
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index e15c40e86660..7fc340e574cf 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -24,13 +24,7 @@
24 24
25 25
26 26
27static struct genl_family irda_nl_family = { 27static struct genl_family irda_nl_family;
28 .id = GENL_ID_GENERATE,
29 .name = IRDA_NL_NAME,
30 .hdrsize = 0,
31 .version = IRDA_NL_VERSION,
32 .maxattr = IRDA_NL_CMD_MAX,
33};
34 28
35static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info) 29static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info)
36{ 30{
@@ -147,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = {
147 141
148}; 142};
149 143
150int irda_nl_register(void) 144static struct genl_family irda_nl_family __ro_after_init = {
145 .name = IRDA_NL_NAME,
146 .hdrsize = 0,
147 .version = IRDA_NL_VERSION,
148 .maxattr = IRDA_NL_CMD_MAX,
149 .module = THIS_MODULE,
150 .ops = irda_nl_ops,
151 .n_ops = ARRAY_SIZE(irda_nl_ops),
152};
153
154int __init irda_nl_register(void)
151{ 155{
152 return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops); 156 return genl_register_family(&irda_nl_family);
153} 157}
154 158
155void irda_nl_unregister(void) 159void irda_nl_unregister(void)
diff --git a/net/irda/irproc.c b/net/irda/irproc.c
index b9ac598e2116..77cfdde9d82f 100644
--- a/net/irda/irproc.c
+++ b/net/irda/irproc.c
@@ -23,7 +23,6 @@
23 * 23 *
24 ********************************************************************/ 24 ********************************************************************/
25 25
26#include <linux/miscdevice.h>
27#include <linux/proc_fs.h> 26#include <linux/proc_fs.h>
28#include <linux/seq_file.h> 27#include <linux/seq_file.h>
29#include <linux/module.h> 28#include <linux/module.h>
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 02b45a8e8b35..cfb9e5f4e28f 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -453,19 +453,27 @@ static void iucv_sever_path(struct sock *sk, int with_user_data)
453 } 453 }
454} 454}
455 455
456/* Send FIN through an IUCV socket for HIPER transport */ 456/* Send controlling flags through an IUCV socket for HIPER transport */
457static int iucv_send_ctrl(struct sock *sk, u8 flags) 457static int iucv_send_ctrl(struct sock *sk, u8 flags)
458{ 458{
459 int err = 0; 459 int err = 0;
460 int blen; 460 int blen;
461 struct sk_buff *skb; 461 struct sk_buff *skb;
462 u8 shutdown = 0;
462 463
463 blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; 464 blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
465 if (sk->sk_shutdown & SEND_SHUTDOWN) {
466 /* controlling flags should be sent anyway */
467 shutdown = sk->sk_shutdown;
468 sk->sk_shutdown &= RCV_SHUTDOWN;
469 }
464 skb = sock_alloc_send_skb(sk, blen, 1, &err); 470 skb = sock_alloc_send_skb(sk, blen, 1, &err);
465 if (skb) { 471 if (skb) {
466 skb_reserve(skb, blen); 472 skb_reserve(skb, blen);
467 err = afiucv_hs_send(NULL, sk, skb, flags); 473 err = afiucv_hs_send(NULL, sk, skb, flags);
468 } 474 }
475 if (shutdown)
476 sk->sk_shutdown = shutdown;
469 return err; 477 return err;
470} 478}
471 479
@@ -1315,8 +1323,13 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
1315 } 1323 }
1316 1324
1317 IUCV_SKB_CB(skb)->offset = 0; 1325 IUCV_SKB_CB(skb)->offset = 0;
1318 if (sock_queue_rcv_skb(sk, skb)) 1326 if (sk_filter(sk, skb)) {
1319 skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); 1327 atomic_inc(&sk->sk_drops); /* skb rejected by filter */
1328 kfree_skb(skb);
1329 return;
1330 }
1331 if (__sock_queue_rcv_skb(sk, skb)) /* handle rcv queue full */
1332 skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
1320} 1333}
1321 1334
1322/* iucv_process_message_q() - Process outstanding IUCV messages 1335/* iucv_process_message_q() - Process outstanding IUCV messages
@@ -1430,13 +1443,13 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
1430 rskb = skb_dequeue(&iucv->backlog_skb_q); 1443 rskb = skb_dequeue(&iucv->backlog_skb_q);
1431 while (rskb) { 1444 while (rskb) {
1432 IUCV_SKB_CB(rskb)->offset = 0; 1445 IUCV_SKB_CB(rskb)->offset = 0;
1433 if (sock_queue_rcv_skb(sk, rskb)) { 1446 if (__sock_queue_rcv_skb(sk, rskb)) {
1447 /* handle rcv queue full */
1434 skb_queue_head(&iucv->backlog_skb_q, 1448 skb_queue_head(&iucv->backlog_skb_q,
1435 rskb); 1449 rskb);
1436 break; 1450 break;
1437 } else {
1438 rskb = skb_dequeue(&iucv->backlog_skb_q);
1439 } 1451 }
1452 rskb = skb_dequeue(&iucv->backlog_skb_q);
1440 } 1453 }
1441 if (skb_queue_empty(&iucv->backlog_skb_q)) { 1454 if (skb_queue_empty(&iucv->backlog_skb_q)) {
1442 if (!list_empty(&iucv->message_q.list)) 1455 if (!list_empty(&iucv->message_q.list))
@@ -2116,12 +2129,17 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
2116 skb_reset_transport_header(skb); 2129 skb_reset_transport_header(skb);
2117 skb_reset_network_header(skb); 2130 skb_reset_network_header(skb);
2118 IUCV_SKB_CB(skb)->offset = 0; 2131 IUCV_SKB_CB(skb)->offset = 0;
2132 if (sk_filter(sk, skb)) {
2133 atomic_inc(&sk->sk_drops); /* skb rejected by filter */
2134 kfree_skb(skb);
2135 return NET_RX_SUCCESS;
2136 }
2137
2119 spin_lock(&iucv->message_q.lock); 2138 spin_lock(&iucv->message_q.lock);
2120 if (skb_queue_empty(&iucv->backlog_skb_q)) { 2139 if (skb_queue_empty(&iucv->backlog_skb_q)) {
2121 if (sock_queue_rcv_skb(sk, skb)) { 2140 if (__sock_queue_rcv_skb(sk, skb))
2122 /* handle rcv queue full */ 2141 /* handle rcv queue full */
2123 skb_queue_tail(&iucv->backlog_skb_q, skb); 2142 skb_queue_tail(&iucv->backlog_skb_q, skb);
2124 }
2125 } else 2143 } else
2126 skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); 2144 skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
2127 spin_unlock(&iucv->message_q.lock); 2145 spin_unlock(&iucv->message_q.lock);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 88a2a3ba4212..8f7ef167c45a 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -639,7 +639,7 @@ static void iucv_disable(void)
639 put_online_cpus(); 639 put_online_cpus();
640} 640}
641 641
642static void free_iucv_data(int cpu) 642static int iucv_cpu_dead(unsigned int cpu)
643{ 643{
644 kfree(iucv_param_irq[cpu]); 644 kfree(iucv_param_irq[cpu]);
645 iucv_param_irq[cpu] = NULL; 645 iucv_param_irq[cpu] = NULL;
@@ -647,9 +647,10 @@ static void free_iucv_data(int cpu)
647 iucv_param[cpu] = NULL; 647 iucv_param[cpu] = NULL;
648 kfree(iucv_irq_data[cpu]); 648 kfree(iucv_irq_data[cpu]);
649 iucv_irq_data[cpu] = NULL; 649 iucv_irq_data[cpu] = NULL;
650 return 0;
650} 651}
651 652
652static int alloc_iucv_data(int cpu) 653static int iucv_cpu_prepare(unsigned int cpu)
653{ 654{
654 /* Note: GFP_DMA used to get memory below 2G */ 655 /* Note: GFP_DMA used to get memory below 2G */
655 iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), 656 iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
@@ -671,58 +672,38 @@ static int alloc_iucv_data(int cpu)
671 return 0; 672 return 0;
672 673
673out_free: 674out_free:
674 free_iucv_data(cpu); 675 iucv_cpu_dead(cpu);
675 return -ENOMEM; 676 return -ENOMEM;
676} 677}
677 678
678static int iucv_cpu_notify(struct notifier_block *self, 679static int iucv_cpu_online(unsigned int cpu)
679 unsigned long action, void *hcpu)
680{ 680{
681 cpumask_t cpumask; 681 if (!iucv_path_table)
682 long cpu = (long) hcpu; 682 return 0;
683 683 iucv_declare_cpu(NULL);
684 switch (action) { 684 return 0;
685 case CPU_UP_PREPARE:
686 case CPU_UP_PREPARE_FROZEN:
687 if (alloc_iucv_data(cpu))
688 return notifier_from_errno(-ENOMEM);
689 break;
690 case CPU_UP_CANCELED:
691 case CPU_UP_CANCELED_FROZEN:
692 case CPU_DEAD:
693 case CPU_DEAD_FROZEN:
694 free_iucv_data(cpu);
695 break;
696 case CPU_ONLINE:
697 case CPU_ONLINE_FROZEN:
698 case CPU_DOWN_FAILED:
699 case CPU_DOWN_FAILED_FROZEN:
700 if (!iucv_path_table)
701 break;
702 smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
703 break;
704 case CPU_DOWN_PREPARE:
705 case CPU_DOWN_PREPARE_FROZEN:
706 if (!iucv_path_table)
707 break;
708 cpumask_copy(&cpumask, &iucv_buffer_cpumask);
709 cpumask_clear_cpu(cpu, &cpumask);
710 if (cpumask_empty(&cpumask))
711 /* Can't offline last IUCV enabled cpu. */
712 return notifier_from_errno(-EINVAL);
713 smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
714 if (cpumask_empty(&iucv_irq_cpumask))
715 smp_call_function_single(
716 cpumask_first(&iucv_buffer_cpumask),
717 iucv_allow_cpu, NULL, 1);
718 break;
719 }
720 return NOTIFY_OK;
721} 685}
722 686
723static struct notifier_block __refdata iucv_cpu_notifier = { 687static int iucv_cpu_down_prep(unsigned int cpu)
724 .notifier_call = iucv_cpu_notify, 688{
725}; 689 cpumask_t cpumask;
690
691 if (!iucv_path_table)
692 return 0;
693
694 cpumask_copy(&cpumask, &iucv_buffer_cpumask);
695 cpumask_clear_cpu(cpu, &cpumask);
696 if (cpumask_empty(&cpumask))
697 /* Can't offline last IUCV enabled cpu. */
698 return -EINVAL;
699
700 iucv_retrieve_cpu(NULL);
701 if (!cpumask_empty(&iucv_irq_cpumask))
702 return 0;
703 smp_call_function_single(cpumask_first(&iucv_buffer_cpumask),
704 iucv_allow_cpu, NULL, 1);
705 return 0;
706}
726 707
727/** 708/**
728 * iucv_sever_pathid 709 * iucv_sever_pathid
@@ -2027,6 +2008,7 @@ struct iucv_interface iucv_if = {
2027}; 2008};
2028EXPORT_SYMBOL(iucv_if); 2009EXPORT_SYMBOL(iucv_if);
2029 2010
2011static enum cpuhp_state iucv_online;
2030/** 2012/**
2031 * iucv_init 2013 * iucv_init
2032 * 2014 *
@@ -2035,7 +2017,6 @@ EXPORT_SYMBOL(iucv_if);
2035static int __init iucv_init(void) 2017static int __init iucv_init(void)
2036{ 2018{
2037 int rc; 2019 int rc;
2038 int cpu;
2039 2020
2040 if (!MACHINE_IS_VM) { 2021 if (!MACHINE_IS_VM) {
2041 rc = -EPROTONOSUPPORT; 2022 rc = -EPROTONOSUPPORT;
@@ -2054,23 +2035,19 @@ static int __init iucv_init(void)
2054 goto out_int; 2035 goto out_int;
2055 } 2036 }
2056 2037
2057 cpu_notifier_register_begin(); 2038 rc = cpuhp_setup_state(CPUHP_NET_IUCV_PREPARE, "net/iucv:prepare",
2058 2039 iucv_cpu_prepare, iucv_cpu_dead);
2059 for_each_online_cpu(cpu) {
2060 if (alloc_iucv_data(cpu)) {
2061 rc = -ENOMEM;
2062 goto out_free;
2063 }
2064 }
2065 rc = __register_hotcpu_notifier(&iucv_cpu_notifier);
2066 if (rc) 2040 if (rc)
2067 goto out_free; 2041 goto out_dev;
2068 2042 rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "net/iucv:online",
2069 cpu_notifier_register_done(); 2043 iucv_cpu_online, iucv_cpu_down_prep);
2044 if (rc < 0)
2045 goto out_prep;
2046 iucv_online = rc;
2070 2047
2071 rc = register_reboot_notifier(&iucv_reboot_notifier); 2048 rc = register_reboot_notifier(&iucv_reboot_notifier);
2072 if (rc) 2049 if (rc)
2073 goto out_cpu; 2050 goto out_remove_hp;
2074 ASCEBC(iucv_error_no_listener, 16); 2051 ASCEBC(iucv_error_no_listener, 16);
2075 ASCEBC(iucv_error_no_memory, 16); 2052 ASCEBC(iucv_error_no_memory, 16);
2076 ASCEBC(iucv_error_pathid, 16); 2053 ASCEBC(iucv_error_pathid, 16);
@@ -2084,15 +2061,11 @@ static int __init iucv_init(void)
2084 2061
2085out_reboot: 2062out_reboot:
2086 unregister_reboot_notifier(&iucv_reboot_notifier); 2063 unregister_reboot_notifier(&iucv_reboot_notifier);
2087out_cpu: 2064out_remove_hp:
2088 cpu_notifier_register_begin(); 2065 cpuhp_remove_state(iucv_online);
2089 __unregister_hotcpu_notifier(&iucv_cpu_notifier); 2066out_prep:
2090out_free: 2067 cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
2091 for_each_possible_cpu(cpu) 2068out_dev:
2092 free_iucv_data(cpu);
2093
2094 cpu_notifier_register_done();
2095
2096 root_device_unregister(iucv_root); 2069 root_device_unregister(iucv_root);
2097out_int: 2070out_int:
2098 unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); 2071 unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
@@ -2110,7 +2083,6 @@ out:
2110static void __exit iucv_exit(void) 2083static void __exit iucv_exit(void)
2111{ 2084{
2112 struct iucv_irq_list *p, *n; 2085 struct iucv_irq_list *p, *n;
2113 int cpu;
2114 2086
2115 spin_lock_irq(&iucv_queue_lock); 2087 spin_lock_irq(&iucv_queue_lock);
2116 list_for_each_entry_safe(p, n, &iucv_task_queue, list) 2088 list_for_each_entry_safe(p, n, &iucv_task_queue, list)
@@ -2119,11 +2091,9 @@ static void __exit iucv_exit(void)
2119 kfree(p); 2091 kfree(p);
2120 spin_unlock_irq(&iucv_queue_lock); 2092 spin_unlock_irq(&iucv_queue_lock);
2121 unregister_reboot_notifier(&iucv_reboot_notifier); 2093 unregister_reboot_notifier(&iucv_reboot_notifier);
2122 cpu_notifier_register_begin(); 2094
2123 __unregister_hotcpu_notifier(&iucv_cpu_notifier); 2095 cpuhp_remove_state_nocalls(iucv_online);
2124 for_each_possible_cpu(cpu) 2096 cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
2125 free_iucv_data(cpu);
2126 cpu_notifier_register_done();
2127 root_device_unregister(iucv_root); 2097 root_device_unregister(iucv_root);
2128 bus_unregister(&iucv_bus); 2098 bus_unregister(&iucv_bus);
2129 unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); 2099 unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f9c9ecb0cdd3..c6252ed42c1d 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -36,7 +36,7 @@
36#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) 36#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
37#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) 37#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
38 38
39static int pfkey_net_id __read_mostly; 39static unsigned int pfkey_net_id __read_mostly;
40struct netns_pfkey { 40struct netns_pfkey {
41 /* List of all pfkey sockets. */ 41 /* List of all pfkey sockets. */
42 struct hlist_head table; 42 struct hlist_head table;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a2ed3bda4ddc..85948c69b236 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -715,7 +715,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
715 l2tp_info(session, L2TP_MSG_SEQ, 715 l2tp_info(session, L2TP_MSG_SEQ,
716 "%s: requested to enable seq numbers by LNS\n", 716 "%s: requested to enable seq numbers by LNS\n",
717 session->name); 717 session->name);
718 session->send_seq = -1; 718 session->send_seq = 1;
719 l2tp_session_set_header_len(session, tunnel->version); 719 l2tp_session_set_header_len(session, tunnel->version);
720 } 720 }
721 } else { 721 } else {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2599af6378e4..8f560f7140a0 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -23,16 +23,6 @@
23#define L2TP_HASH_BITS_2 8 23#define L2TP_HASH_BITS_2 8
24#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2) 24#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2)
25 25
26/* Debug message categories for the DEBUG socket option */
27enum {
28 L2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if
29 * compiled in) */
30 L2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel
31 * interface */
32 L2TP_MSG_SEQ = (1 << 2), /* sequence numbers */
33 L2TP_MSG_DATA = (1 << 3), /* data packets */
34};
35
36struct sk_buff; 26struct sk_buff;
37 27
38struct l2tp_stats { 28struct l2tp_stats {
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 965f7e344cef..e2c6ae024565 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -259,6 +259,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
259 session->mtu = dev->mtu - session->hdr_len; 259 session->mtu = dev->mtu - session->hdr_len;
260 dev->mtu = session->mtu; 260 dev->mtu = session->mtu;
261 dev->needed_headroom += session->hdr_len; 261 dev->needed_headroom += session->hdr_len;
262 dev->min_mtu = 0;
263 dev->max_mtu = ETH_MAX_MTU;
262 264
263 priv = netdev_priv(dev); 265 priv = netdev_priv(dev);
264 priv->dev = dev; 266 priv->dev = dev;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aa821cb639e5..f092ac441fdd 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -525,6 +525,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
525 memset(&fl6, 0, sizeof(fl6)); 525 memset(&fl6, 0, sizeof(fl6));
526 526
527 fl6.flowi6_mark = sk->sk_mark; 527 fl6.flowi6_mark = sk->sk_mark;
528 fl6.flowi6_uid = sk->sk_uid;
528 529
529 ipc6.hlimit = -1; 530 ipc6.hlimit = -1;
530 ipc6.tclass = -1; 531 ipc6.tclass = -1;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bf3117771822..3620fba31786 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -31,14 +31,7 @@
31#include "l2tp_core.h" 31#include "l2tp_core.h"
32 32
33 33
34static struct genl_family l2tp_nl_family = { 34static struct genl_family l2tp_nl_family;
35 .id = GENL_ID_GENERATE,
36 .name = L2TP_GENL_NAME,
37 .version = L2TP_GENL_VERSION,
38 .hdrsize = 0,
39 .maxattr = L2TP_ATTR_MAX,
40 .netnsok = true,
41};
42 35
43static const struct genl_multicast_group l2tp_multicast_group[] = { 36static const struct genl_multicast_group l2tp_multicast_group[] = {
44 { 37 {
@@ -227,14 +220,14 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
227 cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]); 220 cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
228 if (info->attrs[L2TP_ATTR_UDP_DPORT]) 221 if (info->attrs[L2TP_ATTR_UDP_DPORT])
229 cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]); 222 cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
230 if (info->attrs[L2TP_ATTR_UDP_CSUM]) 223 cfg.use_udp_checksums = nla_get_flag(
231 cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]); 224 info->attrs[L2TP_ATTR_UDP_CSUM]);
232 225
233#if IS_ENABLED(CONFIG_IPV6) 226#if IS_ENABLED(CONFIG_IPV6)
234 if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]) 227 cfg.udp6_zero_tx_checksums = nla_get_flag(
235 cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); 228 info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
236 if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]) 229 cfg.udp6_zero_rx_checksums = nla_get_flag(
237 cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); 230 info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
238#endif 231#endif
239 } 232 }
240 233
@@ -386,9 +379,24 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
386 379
387 switch (tunnel->encap) { 380 switch (tunnel->encap) {
388 case L2TP_ENCAPTYPE_UDP: 381 case L2TP_ENCAPTYPE_UDP:
382 switch (sk->sk_family) {
383 case AF_INET:
384 if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
385 goto nla_put_failure;
386 break;
387#if IS_ENABLED(CONFIG_IPV6)
388 case AF_INET6:
389 if (udp_get_no_check6_tx(sk) &&
390 nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX))
391 goto nla_put_failure;
392 if (udp_get_no_check6_rx(sk) &&
393 nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX))
394 goto nla_put_failure;
395 break;
396#endif
397 }
389 if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || 398 if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
390 nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) || 399 nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
391 nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
392 goto nla_put_failure; 400 goto nla_put_failure;
393 /* NOBREAK */ 401 /* NOBREAK */
394 case L2TP_ENCAPTYPE_IP: 402 case L2TP_ENCAPTYPE_IP:
@@ -977,6 +985,19 @@ static const struct genl_ops l2tp_nl_ops[] = {
977 }, 985 },
978}; 986};
979 987
988static struct genl_family l2tp_nl_family __ro_after_init = {
989 .name = L2TP_GENL_NAME,
990 .version = L2TP_GENL_VERSION,
991 .hdrsize = 0,
992 .maxattr = L2TP_ATTR_MAX,
993 .netnsok = true,
994 .module = THIS_MODULE,
995 .ops = l2tp_nl_ops,
996 .n_ops = ARRAY_SIZE(l2tp_nl_ops),
997 .mcgrps = l2tp_multicast_group,
998 .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group),
999};
1000
980int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops) 1001int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
981{ 1002{
982 int ret; 1003 int ret;
@@ -1010,12 +1031,10 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
1010} 1031}
1011EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); 1032EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
1012 1033
1013static int l2tp_nl_init(void) 1034static int __init l2tp_nl_init(void)
1014{ 1035{
1015 pr_info("L2TP netlink interface\n"); 1036 pr_info("L2TP netlink interface\n");
1016 return genl_register_family_with_ops_groups(&l2tp_nl_family, 1037 return genl_register_family(&l2tp_nl_family);
1017 l2tp_nl_ops,
1018 l2tp_multicast_group);
1019} 1038}
1020 1039
1021static void l2tp_nl_cleanup(void) 1040static void l2tp_nl_cleanup(void)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 41d47bfda15c..36cc56fd0418 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -231,14 +231,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
231 if (sk->sk_state & PPPOX_BOUND) { 231 if (sk->sk_state & PPPOX_BOUND) {
232 struct pppox_sock *po; 232 struct pppox_sock *po;
233 233
234 l2tp_dbg(session, PPPOL2TP_MSG_DATA, 234 l2tp_dbg(session, L2TP_MSG_DATA,
235 "%s: recv %d byte data frame, passing to ppp\n", 235 "%s: recv %d byte data frame, passing to ppp\n",
236 session->name, data_len); 236 session->name, data_len);
237 237
238 po = pppox_sk(sk); 238 po = pppox_sk(sk);
239 ppp_input(&po->chan, skb); 239 ppp_input(&po->chan, skb);
240 } else { 240 } else {
241 l2tp_dbg(session, PPPOL2TP_MSG_DATA, 241 l2tp_dbg(session, L2TP_MSG_DATA,
242 "%s: recv %d byte data frame, passing to L2TP socket\n", 242 "%s: recv %d byte data frame, passing to L2TP socket\n",
243 session->name, data_len); 243 session->name, data_len);
244 244
@@ -251,7 +251,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
251 return; 251 return;
252 252
253no_sock: 253no_sock:
254 l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: no socket\n", session->name); 254 l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
255 kfree_skb(skb); 255 kfree_skb(skb);
256} 256}
257 257
@@ -773,7 +773,7 @@ out_no_ppp:
773 /* This is how we get the session context from the socket. */ 773 /* This is how we get the session context from the socket. */
774 sk->sk_user_data = session; 774 sk->sk_user_data = session;
775 sk->sk_state = PPPOX_CONNECTED; 775 sk->sk_state = PPPOX_CONNECTED;
776 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: created\n", 776 l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
777 session->name); 777 session->name);
778 778
779end: 779end:
@@ -827,7 +827,7 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
827 ps = l2tp_session_priv(session); 827 ps = l2tp_session_priv(session);
828 ps->tunnel_sock = tunnel->sock; 828 ps->tunnel_sock = tunnel->sock;
829 829
830 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: created\n", 830 l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
831 session->name); 831 session->name);
832 832
833 error = 0; 833 error = 0;
@@ -989,7 +989,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
989 struct l2tp_tunnel *tunnel = session->tunnel; 989 struct l2tp_tunnel *tunnel = session->tunnel;
990 struct pppol2tp_ioc_stats stats; 990 struct pppol2tp_ioc_stats stats;
991 991
992 l2tp_dbg(session, PPPOL2TP_MSG_CONTROL, 992 l2tp_dbg(session, L2TP_MSG_CONTROL,
993 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n", 993 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
994 session->name, cmd, arg); 994 session->name, cmd, arg);
995 995
@@ -1009,7 +1009,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
1009 if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq))) 1009 if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
1010 break; 1010 break;
1011 1011
1012 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get mtu=%d\n", 1012 l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mtu=%d\n",
1013 session->name, session->mtu); 1013 session->name, session->mtu);
1014 err = 0; 1014 err = 0;
1015 break; 1015 break;
@@ -1025,7 +1025,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
1025 1025
1026 session->mtu = ifr.ifr_mtu; 1026 session->mtu = ifr.ifr_mtu;
1027 1027
1028 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set mtu=%d\n", 1028 l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mtu=%d\n",
1029 session->name, session->mtu); 1029 session->name, session->mtu);
1030 err = 0; 1030 err = 0;
1031 break; 1031 break;
@@ -1039,7 +1039,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
1039 if (put_user(session->mru, (int __user *) arg)) 1039 if (put_user(session->mru, (int __user *) arg))
1040 break; 1040 break;
1041 1041
1042 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get mru=%d\n", 1042 l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mru=%d\n",
1043 session->name, session->mru); 1043 session->name, session->mru);
1044 err = 0; 1044 err = 0;
1045 break; 1045 break;
@@ -1054,7 +1054,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
1054 break; 1054 break;
1055 1055
1056 session->mru = val; 1056 session->mru = val;
1057 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set mru=%d\n", 1057 l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mru=%d\n",
1058 session->name, session->mru); 1058 session->name, session->mru);
1059 err = 0; 1059 err = 0;
1060 break; 1060 break;
@@ -1064,7 +1064,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
1064 if (put_user(ps->flags, (int __user *) arg)) 1064 if (put_user(ps->flags, (int __user *) arg))
1065 break; 1065 break;
1066 1066
1067 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get flags=%d\n", 1067 l2tp_info(session, L2TP_MSG_CONTROL, "%s: get flags=%d\n",
1068 session->name, ps->flags); 1068 session->name, ps->flags);
1069 err = 0; 1069 err = 0;
1070 break; 1070 break;
@@ -1074,7 +1074,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
1074 if (get_user(val, (int __user *) arg)) 1074 if (get_user(val, (int __user *) arg))
1075 break; 1075 break;
1076 ps->flags = val; 1076 ps->flags = val;
1077 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set flags=%d\n", 1077 l2tp_info(session, L2TP_MSG_CONTROL, "%s: set flags=%d\n",
1078 session->name, ps->flags); 1078 session->name, ps->flags);
1079 err = 0; 1079 err = 0;
1080 break; 1080 break;
@@ -1091,7 +1091,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
1091 if (copy_to_user((void __user *) arg, &stats, 1091 if (copy_to_user((void __user *) arg, &stats,
1092 sizeof(stats))) 1092 sizeof(stats)))
1093 break; 1093 break;
1094 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get L2TP stats\n", 1094 l2tp_info(session, L2TP_MSG_CONTROL, "%s: get L2TP stats\n",
1095 session->name); 1095 session->name);
1096 err = 0; 1096 err = 0;
1097 break; 1097 break;
@@ -1119,7 +1119,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
1119 struct sock *sk; 1119 struct sock *sk;
1120 struct pppol2tp_ioc_stats stats; 1120 struct pppol2tp_ioc_stats stats;
1121 1121
1122 l2tp_dbg(tunnel, PPPOL2TP_MSG_CONTROL, 1122 l2tp_dbg(tunnel, L2TP_MSG_CONTROL,
1123 "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", 1123 "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n",
1124 tunnel->name, cmd, arg); 1124 tunnel->name, cmd, arg);
1125 1125
@@ -1155,7 +1155,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
1155 err = -EFAULT; 1155 err = -EFAULT;
1156 break; 1156 break;
1157 } 1157 }
1158 l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: get L2TP stats\n", 1158 l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: get L2TP stats\n",
1159 tunnel->name); 1159 tunnel->name);
1160 err = 0; 1160 err = 0;
1161 break; 1161 break;
@@ -1245,7 +1245,7 @@ static int pppol2tp_tunnel_setsockopt(struct sock *sk,
1245 switch (optname) { 1245 switch (optname) {
1246 case PPPOL2TP_SO_DEBUG: 1246 case PPPOL2TP_SO_DEBUG:
1247 tunnel->debug = val; 1247 tunnel->debug = val;
1248 l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: set debug=%x\n", 1248 l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: set debug=%x\n",
1249 tunnel->name, tunnel->debug); 1249 tunnel->name, tunnel->debug);
1250 break; 1250 break;
1251 1251
@@ -1272,8 +1272,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
1272 err = -EINVAL; 1272 err = -EINVAL;
1273 break; 1273 break;
1274 } 1274 }
1275 session->recv_seq = val ? -1 : 0; 1275 session->recv_seq = !!val;
1276 l2tp_info(session, PPPOL2TP_MSG_CONTROL, 1276 l2tp_info(session, L2TP_MSG_CONTROL,
1277 "%s: set recv_seq=%d\n", 1277 "%s: set recv_seq=%d\n",
1278 session->name, session->recv_seq); 1278 session->name, session->recv_seq);
1279 break; 1279 break;
@@ -1283,7 +1283,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
1283 err = -EINVAL; 1283 err = -EINVAL;
1284 break; 1284 break;
1285 } 1285 }
1286 session->send_seq = val ? -1 : 0; 1286 session->send_seq = !!val;
1287 { 1287 {
1288 struct sock *ssk = ps->sock; 1288 struct sock *ssk = ps->sock;
1289 struct pppox_sock *po = pppox_sk(ssk); 1289 struct pppox_sock *po = pppox_sk(ssk);
@@ -1291,7 +1291,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
1291 PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; 1291 PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
1292 } 1292 }
1293 l2tp_session_set_header_len(session, session->tunnel->version); 1293 l2tp_session_set_header_len(session, session->tunnel->version);
1294 l2tp_info(session, PPPOL2TP_MSG_CONTROL, 1294 l2tp_info(session, L2TP_MSG_CONTROL,
1295 "%s: set send_seq=%d\n", 1295 "%s: set send_seq=%d\n",
1296 session->name, session->send_seq); 1296 session->name, session->send_seq);
1297 break; 1297 break;
@@ -1301,21 +1301,21 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
1301 err = -EINVAL; 1301 err = -EINVAL;
1302 break; 1302 break;
1303 } 1303 }
1304 session->lns_mode = val ? -1 : 0; 1304 session->lns_mode = !!val;
1305 l2tp_info(session, PPPOL2TP_MSG_CONTROL, 1305 l2tp_info(session, L2TP_MSG_CONTROL,
1306 "%s: set lns_mode=%d\n", 1306 "%s: set lns_mode=%d\n",
1307 session->name, session->lns_mode); 1307 session->name, session->lns_mode);
1308 break; 1308 break;
1309 1309
1310 case PPPOL2TP_SO_DEBUG: 1310 case PPPOL2TP_SO_DEBUG:
1311 session->debug = val; 1311 session->debug = val;
1312 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set debug=%x\n", 1312 l2tp_info(session, L2TP_MSG_CONTROL, "%s: set debug=%x\n",
1313 session->name, session->debug); 1313 session->name, session->debug);
1314 break; 1314 break;
1315 1315
1316 case PPPOL2TP_SO_REORDERTO: 1316 case PPPOL2TP_SO_REORDERTO:
1317 session->reorder_timeout = msecs_to_jiffies(val); 1317 session->reorder_timeout = msecs_to_jiffies(val);
1318 l2tp_info(session, PPPOL2TP_MSG_CONTROL, 1318 l2tp_info(session, L2TP_MSG_CONTROL,
1319 "%s: set reorder_timeout=%d\n", 1319 "%s: set reorder_timeout=%d\n",
1320 session->name, session->reorder_timeout); 1320 session->name, session->reorder_timeout);
1321 break; 1321 break;
@@ -1396,7 +1396,7 @@ static int pppol2tp_tunnel_getsockopt(struct sock *sk,
1396 switch (optname) { 1396 switch (optname) {
1397 case PPPOL2TP_SO_DEBUG: 1397 case PPPOL2TP_SO_DEBUG:
1398 *val = tunnel->debug; 1398 *val = tunnel->debug;
1399 l2tp_info(tunnel, PPPOL2TP_MSG_CONTROL, "%s: get debug=%x\n", 1399 l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: get debug=%x\n",
1400 tunnel->name, tunnel->debug); 1400 tunnel->name, tunnel->debug);
1401 break; 1401 break;
1402 1402
@@ -1419,31 +1419,31 @@ static int pppol2tp_session_getsockopt(struct sock *sk,
1419 switch (optname) { 1419 switch (optname) {
1420 case PPPOL2TP_SO_RECVSEQ: 1420 case PPPOL2TP_SO_RECVSEQ:
1421 *val = session->recv_seq; 1421 *val = session->recv_seq;
1422 l2tp_info(session, PPPOL2TP_MSG_CONTROL, 1422 l2tp_info(session, L2TP_MSG_CONTROL,
1423 "%s: get recv_seq=%d\n", session->name, *val); 1423 "%s: get recv_seq=%d\n", session->name, *val);
1424 break; 1424 break;
1425 1425
1426 case PPPOL2TP_SO_SENDSEQ: 1426 case PPPOL2TP_SO_SENDSEQ:
1427 *val = session->send_seq; 1427 *val = session->send_seq;
1428 l2tp_info(session, PPPOL2TP_MSG_CONTROL, 1428 l2tp_info(session, L2TP_MSG_CONTROL,
1429 "%s: get send_seq=%d\n", session->name, *val); 1429 "%s: get send_seq=%d\n", session->name, *val);
1430 break; 1430 break;
1431 1431
1432 case PPPOL2TP_SO_LNSMODE: 1432 case PPPOL2TP_SO_LNSMODE:
1433 *val = session->lns_mode; 1433 *val = session->lns_mode;
1434 l2tp_info(session, PPPOL2TP_MSG_CONTROL, 1434 l2tp_info(session, L2TP_MSG_CONTROL,
1435 "%s: get lns_mode=%d\n", session->name, *val); 1435 "%s: get lns_mode=%d\n", session->name, *val);
1436 break; 1436 break;
1437 1437
1438 case PPPOL2TP_SO_DEBUG: 1438 case PPPOL2TP_SO_DEBUG:
1439 *val = session->debug; 1439 *val = session->debug;
1440 l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: get debug=%d\n", 1440 l2tp_info(session, L2TP_MSG_CONTROL, "%s: get debug=%d\n",
1441 session->name, *val); 1441 session->name, *val);
1442 break; 1442 break;
1443 1443
1444 case PPPOL2TP_SO_REORDERTO: 1444 case PPPOL2TP_SO_REORDERTO:
1445 *val = (int) jiffies_to_msecs(session->reorder_timeout); 1445 *val = (int) jiffies_to_msecs(session->reorder_timeout);
1446 l2tp_info(session, PPPOL2TP_MSG_CONTROL, 1446 l2tp_info(session, L2TP_MSG_CONTROL,
1447 "%s: get reorder_timeout=%d\n", session->name, *val); 1447 "%s: get reorder_timeout=%d\n", session->name, *val);
1448 break; 1448 break;
1449 1449
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index fc60d9d738b5..b50b64ac8815 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -33,7 +33,7 @@
33#include <linux/skbuff.h> 33#include <linux/skbuff.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <net/sock.h> 35#include <net/sock.h>
36#include <asm/uaccess.h> 36#include <linux/uaccess.h>
37#include <linux/fcntl.h> 37#include <linux/fcntl.h>
38#include <linux/mm.h> 38#include <linux/mm.h>
39#include <linux/interrupt.h> 39#include <linux/interrupt.h>
diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c
index 182470847fcf..d5d2110eb717 100644
--- a/net/lapb/lapb_in.c
+++ b/net/lapb/lapb_in.c
@@ -31,7 +31,7 @@
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <net/sock.h> 33#include <net/sock.h>
34#include <asm/uaccess.h> 34#include <linux/uaccess.h>
35#include <linux/fcntl.h> 35#include <linux/fcntl.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/interrupt.h> 37#include <linux/interrupt.h>
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index 482c94d9d958..eda726e22f64 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c
@@ -29,7 +29,7 @@
29#include <linux/skbuff.h> 29#include <linux/skbuff.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <net/sock.h> 31#include <net/sock.h>
32#include <asm/uaccess.h> 32#include <linux/uaccess.h>
33#include <linux/fcntl.h> 33#include <linux/fcntl.h>
34#include <linux/mm.h> 34#include <linux/mm.h>
35#include <linux/interrupt.h> 35#include <linux/interrupt.h>
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
index 3c1914df641f..75efde3e616c 100644
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c
@@ -28,7 +28,7 @@
28#include <linux/skbuff.h> 28#include <linux/skbuff.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <net/sock.h> 30#include <net/sock.h>
31#include <asm/uaccess.h> 31#include <linux/uaccess.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/interrupt.h> 34#include <linux/interrupt.h>
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 355cc3b6fa4d..1a5535bc3b8d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -29,7 +29,7 @@
29#include <linux/inet.h> 29#include <linux/inet.h>
30#include <linux/skbuff.h> 30#include <linux/skbuff.h>
31#include <net/sock.h> 31#include <net/sock.h>
32#include <asm/uaccess.h> 32#include <linux/uaccess.h>
33#include <linux/fcntl.h> 33#include <linux/fcntl.h>
34#include <linux/mm.h> 34#include <linux/mm.h>
35#include <linux/interrupt.h> 35#include <linux/interrupt.h>
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index db916cf51ffe..5e9296382420 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -532,12 +532,12 @@ out:
532 532
533static int llc_ui_wait_for_disc(struct sock *sk, long timeout) 533static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
534{ 534{
535 DEFINE_WAIT(wait); 535 DEFINE_WAIT_FUNC(wait, woken_wake_function);
536 int rc = 0; 536 int rc = 0;
537 537
538 add_wait_queue(sk_sleep(sk), &wait);
538 while (1) { 539 while (1) {
539 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 540 if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait))
540 if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE))
541 break; 541 break;
542 rc = -ERESTARTSYS; 542 rc = -ERESTARTSYS;
543 if (signal_pending(current)) 543 if (signal_pending(current))
@@ -547,39 +547,39 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
547 break; 547 break;
548 rc = 0; 548 rc = 0;
549 } 549 }
550 finish_wait(sk_sleep(sk), &wait); 550 remove_wait_queue(sk_sleep(sk), &wait);
551 return rc; 551 return rc;
552} 552}
553 553
554static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) 554static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
555{ 555{
556 DEFINE_WAIT(wait); 556 DEFINE_WAIT_FUNC(wait, woken_wake_function);
557 557
558 add_wait_queue(sk_sleep(sk), &wait);
558 while (1) { 559 while (1) {
559 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 560 if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait))
560 if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT))
561 break; 561 break;
562 if (signal_pending(current) || !timeout) 562 if (signal_pending(current) || !timeout)
563 break; 563 break;
564 } 564 }
565 finish_wait(sk_sleep(sk), &wait); 565 remove_wait_queue(sk_sleep(sk), &wait);
566 return timeout; 566 return timeout;
567} 567}
568 568
569static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) 569static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
570{ 570{
571 DEFINE_WAIT(wait); 571 DEFINE_WAIT_FUNC(wait, woken_wake_function);
572 struct llc_sock *llc = llc_sk(sk); 572 struct llc_sock *llc = llc_sk(sk);
573 int rc; 573 int rc;
574 574
575 add_wait_queue(sk_sleep(sk), &wait);
575 while (1) { 576 while (1) {
576 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
577 rc = 0; 577 rc = 0;
578 if (sk_wait_event(sk, &timeout, 578 if (sk_wait_event(sk, &timeout,
579 (sk->sk_shutdown & RCV_SHUTDOWN) || 579 (sk->sk_shutdown & RCV_SHUTDOWN) ||
580 (!llc_data_accept_state(llc->state) && 580 (!llc_data_accept_state(llc->state) &&
581 !llc->remote_busy_flag && 581 !llc->remote_busy_flag &&
582 !llc->p_flag))) 582 !llc->p_flag), &wait))
583 break; 583 break;
584 rc = -ERESTARTSYS; 584 rc = -ERESTARTSYS;
585 if (signal_pending(current)) 585 if (signal_pending(current))
@@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
588 if (!timeout) 588 if (!timeout)
589 break; 589 break;
590 } 590 }
591 finish_wait(sk_sleep(sk), &wait); 591 remove_wait_queue(sk_sleep(sk), &wait);
592 return rc; 592 return rc;
593} 593}
594 594
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index f9137a8341f4..282912245938 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -19,6 +19,7 @@ mac80211-y := \
19 aes_gcm.o \ 19 aes_gcm.o \
20 aes_cmac.o \ 20 aes_cmac.o \
21 aes_gmac.o \ 21 aes_gmac.o \
22 fils_aead.o \
22 cfg.o \ 23 cfg.o \
23 ethtool.o \ 24 ethtool.o \
24 rx.o \ 25 rx.o \
@@ -60,4 +61,4 @@ rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o
60mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) 61mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
61mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y) 62mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y)
62 63
63ccflags-y += -D__CHECK_ENDIAN__ -DDEBUG 64ccflags-y += -DDEBUG
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index bdf0790d89cc..d0bd5fff5f0a 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -23,7 +23,7 @@
23#define AAD_LEN 20 23#define AAD_LEN 20
24 24
25 25
26static void gf_mulx(u8 *pad) 26void gf_mulx(u8 *pad)
27{ 27{
28 int i, carry; 28 int i, carry;
29 29
@@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad)
35 pad[AES_BLOCK_SIZE - 1] ^= 0x87; 35 pad[AES_BLOCK_SIZE - 1] ^= 0x87;
36} 36}
37 37
38static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, 38void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
39 const u8 *addr[], const size_t *len, u8 *mac, 39 const u8 *addr[], const size_t *len, u8 *mac,
40 size_t mac_len) 40 size_t mac_len)
41{ 41{
42 u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE]; 42 u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE];
43 const u8 *pos, *end; 43 const u8 *pos, *end;
diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h
index 3702041f44fd..c827e1d5de8b 100644
--- a/net/mac80211/aes_cmac.h
+++ b/net/mac80211/aes_cmac.h
@@ -11,6 +11,10 @@
11 11
12#include <linux/crypto.h> 12#include <linux/crypto.h>
13 13
14void gf_mulx(u8 *pad);
15void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
16 const u8 *addr[], const size_t *len, u8 *mac,
17 size_t mac_len);
14struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], 18struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[],
15 size_t key_len); 19 size_t key_len);
16void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, 20void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad,
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index f6749dced021..3b5fd4188f2a 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -315,11 +315,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
315 mutex_lock(&sta->ampdu_mlme.mtx); 315 mutex_lock(&sta->ampdu_mlme.mtx);
316 316
317 if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { 317 if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
318 tid_agg_rx = rcu_dereference_protected( 318 if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
319 sta->ampdu_mlme.tid_rx[tid],
320 lockdep_is_held(&sta->ampdu_mlme.mtx));
321
322 if (tid_agg_rx->dialog_token == dialog_token) {
323 ht_dbg_ratelimited(sta->sdata, 319 ht_dbg_ratelimited(sta->sdata,
324 "updated AddBA Req from %pM on tid %u\n", 320 "updated AddBA Req from %pM on tid %u\n",
325 sta->sta.addr, tid); 321 sta->sta.addr, tid);
@@ -396,7 +392,6 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
396 } 392 }
397 393
398 /* update data */ 394 /* update data */
399 tid_agg_rx->dialog_token = dialog_token;
400 tid_agg_rx->ssn = start_seq_num; 395 tid_agg_rx->ssn = start_seq_num;
401 tid_agg_rx->head_seq_num = start_seq_num; 396 tid_agg_rx->head_seq_num = start_seq_num;
402 tid_agg_rx->buf_size = buf_size; 397 tid_agg_rx->buf_size = buf_size;
@@ -418,6 +413,7 @@ end:
418 if (status == WLAN_STATUS_SUCCESS) { 413 if (status == WLAN_STATUS_SUCCESS) {
419 __set_bit(tid, sta->ampdu_mlme.agg_session_valid); 414 __set_bit(tid, sta->ampdu_mlme.agg_session_valid);
420 __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); 415 __clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
416 sta->ampdu_mlme.tid_rx_token[tid] = dialog_token;
421 } 417 }
422 mutex_unlock(&sta->ampdu_mlme.mtx); 418 mutex_unlock(&sta->ampdu_mlme.mtx);
423 419
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fd6541f3ade3..e91e503bf992 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -357,10 +357,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
357 mutex_lock(&local->sta_mtx); 357 mutex_lock(&local->sta_mtx);
358 358
359 if (mac_addr) { 359 if (mac_addr) {
360 if (ieee80211_vif_is_mesh(&sdata->vif)) 360 sta = sta_info_get_bss(sdata, mac_addr);
361 sta = sta_info_get(sdata, mac_addr);
362 else
363 sta = sta_info_get_bss(sdata, mac_addr);
364 /* 361 /*
365 * The ASSOC test makes sure the driver is ready to 362 * The ASSOC test makes sure the driver is ready to
366 * receive the key. When wpa_supplicant has roamed 363 * receive the key. When wpa_supplicant has roamed
@@ -867,6 +864,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
867 } 864 }
868 sdata->needed_rx_chains = sdata->local->rx_chains; 865 sdata->needed_rx_chains = sdata->local->rx_chains;
869 866
867 sdata->vif.bss_conf.beacon_int = params->beacon_interval;
868
870 mutex_lock(&local->mtx); 869 mutex_lock(&local->mtx);
871 err = ieee80211_vif_use_channel(sdata, &params->chandef, 870 err = ieee80211_vif_use_channel(sdata, &params->chandef,
872 IEEE80211_CHANCTX_SHARED); 871 IEEE80211_CHANCTX_SHARED);
@@ -897,7 +896,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
897 vlan->vif.type); 896 vlan->vif.type);
898 } 897 }
899 898
900 sdata->vif.bss_conf.beacon_int = params->beacon_interval;
901 sdata->vif.bss_conf.dtim_period = params->dtim_period; 899 sdata->vif.bss_conf.dtim_period = params->dtim_period;
902 sdata->vif.bss_conf.enable_beacon = true; 900 sdata->vif.bss_conf.enable_beacon = true;
903 sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; 901 sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p;
@@ -1523,9 +1521,6 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1523 goto out_err; 1521 goto out_err;
1524 1522
1525 if (params->vlan && params->vlan != sta->sdata->dev) { 1523 if (params->vlan && params->vlan != sta->sdata->dev) {
1526 bool prev_4addr = false;
1527 bool new_4addr = false;
1528
1529 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); 1524 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
1530 1525
1531 if (params->vlan->ieee80211_ptr->use_4addr) { 1526 if (params->vlan->ieee80211_ptr->use_4addr) {
@@ -1535,26 +1530,21 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1535 } 1530 }
1536 1531
1537 rcu_assign_pointer(vlansdata->u.vlan.sta, sta); 1532 rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
1538 new_4addr = true;
1539 __ieee80211_check_fast_rx_iface(vlansdata); 1533 __ieee80211_check_fast_rx_iface(vlansdata);
1540 } 1534 }
1541 1535
1542 if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && 1536 if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
1543 sta->sdata->u.vlan.sta) { 1537 sta->sdata->u.vlan.sta)
1544 RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); 1538 RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
1545 prev_4addr = true; 1539
1546 } 1540 if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
1541 ieee80211_vif_dec_num_mcast(sta->sdata);
1547 1542
1548 sta->sdata = vlansdata; 1543 sta->sdata = vlansdata;
1549 ieee80211_check_fast_xmit(sta); 1544 ieee80211_check_fast_xmit(sta);
1550 1545
1551 if (sta->sta_state == IEEE80211_STA_AUTHORIZED && 1546 if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
1552 prev_4addr != new_4addr) { 1547 ieee80211_vif_inc_num_mcast(sta->sdata);
1553 if (new_4addr)
1554 atomic_dec(&sta->sdata->bss->num_mcast_sta);
1555 else
1556 atomic_inc(&sta->sdata->bss->num_mcast_sta);
1557 }
1558 1548
1559 ieee80211_send_layer2_update(sta); 1549 ieee80211_send_layer2_update(sta);
1560 } 1550 }
@@ -2480,13 +2470,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata,
2480 smps_mode == IEEE80211_SMPS_AUTOMATIC) 2470 smps_mode == IEEE80211_SMPS_AUTOMATIC)
2481 return 0; 2471 return 0;
2482 2472
2483 /* If no associated stations, there's no need to do anything */
2484 if (!atomic_read(&sdata->u.ap.num_mcast_sta)) {
2485 sdata->smps_mode = smps_mode;
2486 ieee80211_queue_work(&sdata->local->hw, &sdata->recalc_smps);
2487 return 0;
2488 }
2489
2490 ht_dbg(sdata, 2473 ht_dbg(sdata,
2491 "SMPS %d requested in AP mode, sending Action frame to %d stations\n", 2474 "SMPS %d requested in AP mode, sending Action frame to %d stations\n",
2492 smps_mode, atomic_read(&sdata->u.ap.num_mcast_sta)); 2475 smps_mode, atomic_read(&sdata->u.ap.num_mcast_sta));
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index f56e2f487d09..e02ba42ca827 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -210,6 +210,7 @@ static const char *hw_flag_names[] = {
210 FLAG(TX_AMSDU), 210 FLAG(TX_AMSDU),
211 FLAG(TX_FRAG_LIST), 211 FLAG(TX_FRAG_LIST),
212 FLAG(REPORTS_LOW_ACK), 212 FLAG(REPORTS_LOW_ACK),
213 FLAG(SUPPORTS_TX_FRAG),
213#undef FLAG 214#undef FLAG
214}; 215};
215 216
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index bcec1240f41d..1a05f85cb1f0 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -477,6 +477,7 @@ IEEE80211_IF_FILE_RW(tdls_wider_bw);
477IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC); 477IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC);
478IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC); 478IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC);
479IEEE80211_IF_FILE(dtim_count, u.ap.ps.dtim_count, DEC); 479IEEE80211_IF_FILE(dtim_count, u.ap.ps.dtim_count, DEC);
480IEEE80211_IF_FILE(num_mcast_sta_vlan, u.vlan.num_mcast_sta, ATOMIC);
480 481
481static ssize_t ieee80211_if_fmt_num_buffered_multicast( 482static ssize_t ieee80211_if_fmt_num_buffered_multicast(
482 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) 483 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
@@ -684,6 +685,13 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
684 DEBUGFS_ADD_MODE(tkip_mic_test, 0200); 685 DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
685} 686}
686 687
688static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
689{
690 /* add num_mcast_sta_vlan using name num_mcast_sta */
691 debugfs_create_file("num_mcast_sta", 0400, sdata->vif.debugfs_dir,
692 sdata, &num_mcast_sta_vlan_ops);
693}
694
687static void add_ibss_files(struct ieee80211_sub_if_data *sdata) 695static void add_ibss_files(struct ieee80211_sub_if_data *sdata)
688{ 696{
689 DEBUGFS_ADD_MODE(tsf, 0600); 697 DEBUGFS_ADD_MODE(tsf, 0600);
@@ -787,6 +795,9 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
787 case NL80211_IFTYPE_AP: 795 case NL80211_IFTYPE_AP:
788 add_ap_files(sdata); 796 add_ap_files(sdata);
789 break; 797 break;
798 case NL80211_IFTYPE_AP_VLAN:
799 add_vlan_files(sdata);
800 break;
790 case NL80211_IFTYPE_WDS: 801 case NL80211_IFTYPE_WDS:
791 add_wds_files(sdata); 802 add_wds_files(sdata);
792 break; 803 break;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index a2fcdb47a0e6..f6003b8c2c33 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -199,13 +199,18 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
199 "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); 199 "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
200 200
201 for (i = 0; i < IEEE80211_NUM_TIDS; i++) { 201 for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
202 bool tid_rx_valid;
203
202 tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]); 204 tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
203 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); 205 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
206 tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid);
204 207
205 p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); 208 p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
206 p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx); 209 p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
210 tid_rx_valid);
207 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", 211 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
208 tid_rx ? tid_rx->dialog_token : 0); 212 tid_rx_valid ?
213 sta->ampdu_mlme.tid_rx_token[i] : 0);
209 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", 214 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
210 tid_rx ? tid_rx->ssn : 0); 215 tid_rx ? tid_rx->ssn : 0);
211 216
diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
new file mode 100644
index 000000000000..ecfdd97758a3
--- /dev/null
+++ b/net/mac80211/fils_aead.c
@@ -0,0 +1,342 @@
1/*
2 * FILS AEAD for (Re)Association Request/Response frames
3 * Copyright 2016, Qualcomm Atheros, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <crypto/aes.h>
11#include <crypto/algapi.h>
12#include <crypto/skcipher.h>
13
14#include "ieee80211_i.h"
15#include "aes_cmac.h"
16#include "fils_aead.h"
17
18static int aes_s2v(struct crypto_cipher *tfm,
19 size_t num_elem, const u8 *addr[], size_t len[], u8 *v)
20{
21 u8 d[AES_BLOCK_SIZE], tmp[AES_BLOCK_SIZE];
22 size_t i;
23 const u8 *data[2];
24 size_t data_len[2], data_elems;
25
26 /* D = AES-CMAC(K, <zero>) */
27 memset(tmp, 0, AES_BLOCK_SIZE);
28 data[0] = tmp;
29 data_len[0] = AES_BLOCK_SIZE;
30 aes_cmac_vector(tfm, 1, data, data_len, d, AES_BLOCK_SIZE);
31
32 for (i = 0; i < num_elem - 1; i++) {
33 /* D = dbl(D) xor AES_CMAC(K, Si) */
34 gf_mulx(d); /* dbl */
35 aes_cmac_vector(tfm, 1, &addr[i], &len[i], tmp,
36 AES_BLOCK_SIZE);
37 crypto_xor(d, tmp, AES_BLOCK_SIZE);
38 }
39
40 if (len[i] >= AES_BLOCK_SIZE) {
41 /* len(Sn) >= 128 */
42 size_t j;
43 const u8 *pos;
44
45 /* T = Sn xorend D */
46
47 /* Use a temporary buffer to perform xorend on Sn (addr[i]) to
48 * avoid modifying the const input argument.
49 */
50 data[0] = addr[i];
51 data_len[0] = len[i] - AES_BLOCK_SIZE;
52 pos = addr[i] + data_len[0];
53 for (j = 0; j < AES_BLOCK_SIZE; j++)
54 tmp[j] = pos[j] ^ d[j];
55 data[1] = tmp;
56 data_len[1] = AES_BLOCK_SIZE;
57 data_elems = 2;
58 } else {
59 /* len(Sn) < 128 */
60 /* T = dbl(D) xor pad(Sn) */
61 gf_mulx(d); /* dbl */
62 memset(tmp, 0, AES_BLOCK_SIZE);
63 memcpy(tmp, addr[i], len[i]);
64 tmp[len[i]] = 0x80;
65 crypto_xor(d, tmp, AES_BLOCK_SIZE);
66 data[0] = d;
67 data_len[0] = sizeof(d);
68 data_elems = 1;
69 }
70 /* V = AES-CMAC(K, T) */
71 aes_cmac_vector(tfm, data_elems, data, data_len, v, AES_BLOCK_SIZE);
72
73 return 0;
74}
75
76/* Note: addr[] and len[] needs to have one extra slot at the end. */
77static int aes_siv_encrypt(const u8 *key, size_t key_len,
78 const u8 *plain, size_t plain_len,
79 size_t num_elem, const u8 *addr[],
80 size_t len[], u8 *out)
81{
82 u8 v[AES_BLOCK_SIZE];
83 struct crypto_cipher *tfm;
84 struct crypto_skcipher *tfm2;
85 struct skcipher_request *req;
86 int res;
87 struct scatterlist src[1], dst[1];
88 u8 *tmp;
89
90 key_len /= 2; /* S2V key || CTR key */
91
92 addr[num_elem] = plain;
93 len[num_elem] = plain_len;
94 num_elem++;
95
96 /* S2V */
97
98 tfm = crypto_alloc_cipher("aes", 0, 0);
99 if (IS_ERR(tfm))
100 return PTR_ERR(tfm);
101 /* K1 for S2V */
102 res = crypto_cipher_setkey(tfm, key, key_len);
103 if (!res)
104 res = aes_s2v(tfm, num_elem, addr, len, v);
105 crypto_free_cipher(tfm);
106 if (res)
107 return res;
108
109 /* Use a temporary buffer of the plaintext to handle need for
110 * overwriting this during AES-CTR.
111 */
112 tmp = kmemdup(plain, plain_len, GFP_KERNEL);
113 if (!tmp)
114 return -ENOMEM;
115
116 /* IV for CTR before encrypted data */
117 memcpy(out, v, AES_BLOCK_SIZE);
118
119 /* Synthetic IV to be used as the initial counter in CTR:
120 * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31)
121 */
122 v[8] &= 0x7f;
123 v[12] &= 0x7f;
124
125 /* CTR */
126
127 tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0);
128 if (IS_ERR(tfm2)) {
129 kfree(tmp);
130 return PTR_ERR(tfm2);
131 }
132 /* K2 for CTR */
133 res = crypto_skcipher_setkey(tfm2, key + key_len, key_len);
134 if (res)
135 goto fail;
136
137 req = skcipher_request_alloc(tfm2, GFP_KERNEL);
138 if (!req) {
139 res = -ENOMEM;
140 goto fail;
141 }
142
143 sg_init_one(src, tmp, plain_len);
144 sg_init_one(dst, out + AES_BLOCK_SIZE, plain_len);
145 skcipher_request_set_crypt(req, src, dst, plain_len, v);
146 res = crypto_skcipher_encrypt(req);
147 skcipher_request_free(req);
148fail:
149 kfree(tmp);
150 crypto_free_skcipher(tfm2);
151 return res;
152}
153
154/* Note: addr[] and len[] needs to have one extra slot at the end. */
155static int aes_siv_decrypt(const u8 *key, size_t key_len,
156 const u8 *iv_crypt, size_t iv_c_len,
157 size_t num_elem, const u8 *addr[], size_t len[],
158 u8 *out)
159{
160 struct crypto_cipher *tfm;
161 struct crypto_skcipher *tfm2;
162 struct skcipher_request *req;
163 struct scatterlist src[1], dst[1];
164 size_t crypt_len;
165 int res;
166 u8 frame_iv[AES_BLOCK_SIZE], iv[AES_BLOCK_SIZE];
167 u8 check[AES_BLOCK_SIZE];
168
169 crypt_len = iv_c_len - AES_BLOCK_SIZE;
170 key_len /= 2; /* S2V key || CTR key */
171 addr[num_elem] = out;
172 len[num_elem] = crypt_len;
173 num_elem++;
174
175 memcpy(iv, iv_crypt, AES_BLOCK_SIZE);
176 memcpy(frame_iv, iv_crypt, AES_BLOCK_SIZE);
177
178 /* Synthetic IV to be used as the initial counter in CTR:
179 * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31)
180 */
181 iv[8] &= 0x7f;
182 iv[12] &= 0x7f;
183
184 /* CTR */
185
186 tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0);
187 if (IS_ERR(tfm2))
188 return PTR_ERR(tfm2);
189 /* K2 for CTR */
190 res = crypto_skcipher_setkey(tfm2, key + key_len, key_len);
191 if (res) {
192 crypto_free_skcipher(tfm2);
193 return res;
194 }
195
196 req = skcipher_request_alloc(tfm2, GFP_KERNEL);
197 if (!req) {
198 crypto_free_skcipher(tfm2);
199 return -ENOMEM;
200 }
201
202 sg_init_one(src, iv_crypt + AES_BLOCK_SIZE, crypt_len);
203 sg_init_one(dst, out, crypt_len);
204 skcipher_request_set_crypt(req, src, dst, crypt_len, iv);
205 res = crypto_skcipher_decrypt(req);
206 skcipher_request_free(req);
207 crypto_free_skcipher(tfm2);
208 if (res)
209 return res;
210
211 /* S2V */
212
213 tfm = crypto_alloc_cipher("aes", 0, 0);
214 if (IS_ERR(tfm))
215 return PTR_ERR(tfm);
216 /* K1 for S2V */
217 res = crypto_cipher_setkey(tfm, key, key_len);
218 if (!res)
219 res = aes_s2v(tfm, num_elem, addr, len, check);
220 crypto_free_cipher(tfm);
221 if (res)
222 return res;
223 if (memcmp(check, frame_iv, AES_BLOCK_SIZE) != 0)
224 return -EINVAL;
225 return 0;
226}
227
228int fils_encrypt_assoc_req(struct sk_buff *skb,
229 struct ieee80211_mgd_assoc_data *assoc_data)
230{
231 struct ieee80211_mgmt *mgmt = (void *)skb->data;
232 u8 *capab, *ies, *encr;
233 const u8 *addr[5 + 1], *session;
234 size_t len[5 + 1];
235 size_t crypt_len;
236
237 if (ieee80211_is_reassoc_req(mgmt->frame_control)) {
238 capab = (u8 *)&mgmt->u.reassoc_req.capab_info;
239 ies = mgmt->u.reassoc_req.variable;
240 } else {
241 capab = (u8 *)&mgmt->u.assoc_req.capab_info;
242 ies = mgmt->u.assoc_req.variable;
243 }
244
245 session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION,
246 ies, skb->data + skb->len - ies);
247 if (!session || session[1] != 1 + 8)
248 return -EINVAL;
249 /* encrypt after FILS Session element */
250 encr = (u8 *)session + 2 + 1 + 8;
251
252 /* AES-SIV AAD vectors */
253
254 /* The STA's MAC address */
255 addr[0] = mgmt->sa;
256 len[0] = ETH_ALEN;
257 /* The AP's BSSID */
258 addr[1] = mgmt->da;
259 len[1] = ETH_ALEN;
260 /* The STA's nonce */
261 addr[2] = assoc_data->fils_nonces;
262 len[2] = FILS_NONCE_LEN;
263 /* The AP's nonce */
264 addr[3] = &assoc_data->fils_nonces[FILS_NONCE_LEN];
265 len[3] = FILS_NONCE_LEN;
266 /* The (Re)Association Request frame from the Capability Information
267 * field to the FILS Session element (both inclusive).
268 */
269 addr[4] = capab;
270 len[4] = encr - capab;
271
272 crypt_len = skb->data + skb->len - encr;
273 skb_put(skb, AES_BLOCK_SIZE);
274 return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len,
275 encr, crypt_len, 1, addr, len, encr);
276}
277
278int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,
279 u8 *frame, size_t *frame_len,
280 struct ieee80211_mgd_assoc_data *assoc_data)
281{
282 struct ieee80211_mgmt *mgmt = (void *)frame;
283 u8 *capab, *ies, *encr;
284 const u8 *addr[5 + 1], *session;
285 size_t len[5 + 1];
286 int res;
287 size_t crypt_len;
288
289 if (*frame_len < 24 + 6)
290 return -EINVAL;
291
292 capab = (u8 *)&mgmt->u.assoc_resp.capab_info;
293 ies = mgmt->u.assoc_resp.variable;
294 session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION,
295 ies, frame + *frame_len - ies);
296 if (!session || session[1] != 1 + 8) {
297 mlme_dbg(sdata,
298 "No (valid) FILS Session element in (Re)Association Response frame from %pM",
299 mgmt->sa);
300 return -EINVAL;
301 }
302 /* decrypt after FILS Session element */
303 encr = (u8 *)session + 2 + 1 + 8;
304
305 /* AES-SIV AAD vectors */
306
307 /* The AP's BSSID */
308 addr[0] = mgmt->sa;
309 len[0] = ETH_ALEN;
310 /* The STA's MAC address */
311 addr[1] = mgmt->da;
312 len[1] = ETH_ALEN;
313 /* The AP's nonce */
314 addr[2] = &assoc_data->fils_nonces[FILS_NONCE_LEN];
315 len[2] = FILS_NONCE_LEN;
316 /* The STA's nonce */
317 addr[3] = assoc_data->fils_nonces;
318 len[3] = FILS_NONCE_LEN;
319 /* The (Re)Association Response frame from the Capability Information
320 * field to the FILS Session element (both inclusive).
321 */
322 addr[4] = capab;
323 len[4] = encr - capab;
324
325 crypt_len = frame + *frame_len - encr;
326 if (crypt_len < AES_BLOCK_SIZE) {
327 mlme_dbg(sdata,
328 "Not enough room for AES-SIV data after FILS Session element in (Re)Association Response frame from %pM",
329 mgmt->sa);
330 return -EINVAL;
331 }
332 res = aes_siv_decrypt(assoc_data->fils_kek, assoc_data->fils_kek_len,
333 encr, crypt_len, 5, addr, len, encr);
334 if (res != 0) {
335 mlme_dbg(sdata,
336 "AES-SIV decryption of (Re)Association Response frame from %pM failed",
337 mgmt->sa);
338 return res;
339 }
340 *frame_len -= AES_BLOCK_SIZE;
341 return 0;
342}
diff --git a/net/mac80211/fils_aead.h b/net/mac80211/fils_aead.h
new file mode 100644
index 000000000000..fbc65232f0b3
--- /dev/null
+++ b/net/mac80211/fils_aead.h
@@ -0,0 +1,19 @@
1/*
2 * FILS AEAD for (Re)Association Request/Response frames
3 * Copyright 2016, Qualcomm Atheros, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef FILS_AEAD_H
11#define FILS_AEAD_H
12
13int fils_encrypt_assoc_req(struct sk_buff *skb,
14 struct ieee80211_mgd_assoc_data *assoc_data);
15int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,
16 u8 *frame, size_t *frame_len,
17 struct ieee80211_mgd_assoc_data *assoc_data);
18
19#endif /* FILS_AEAD_H */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 34c2add2c455..b2069fbd60f9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -84,6 +84,8 @@ struct ieee80211_local;
84#define IEEE80211_DEFAULT_MAX_SP_LEN \ 84#define IEEE80211_DEFAULT_MAX_SP_LEN \
85 IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL 85 IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
86 86
87extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
88
87#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) 89#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */)
88 90
89#define IEEE80211_MAX_NAN_INSTANCE_ID 255 91#define IEEE80211_MAX_NAN_INSTANCE_ID 255
@@ -157,7 +159,7 @@ enum ieee80211_bss_valid_data_flags {
157 IEEE80211_BSS_VALID_ERP = BIT(3) 159 IEEE80211_BSS_VALID_ERP = BIT(3)
158}; 160};
159 161
160typedef unsigned __bitwise__ ieee80211_tx_result; 162typedef unsigned __bitwise ieee80211_tx_result;
161#define TX_CONTINUE ((__force ieee80211_tx_result) 0u) 163#define TX_CONTINUE ((__force ieee80211_tx_result) 0u)
162#define TX_DROP ((__force ieee80211_tx_result) 1u) 164#define TX_DROP ((__force ieee80211_tx_result) 1u)
163#define TX_QUEUED ((__force ieee80211_tx_result) 2u) 165#define TX_QUEUED ((__force ieee80211_tx_result) 2u)
@@ -178,7 +180,7 @@ struct ieee80211_tx_data {
178}; 180};
179 181
180 182
181typedef unsigned __bitwise__ ieee80211_rx_result; 183typedef unsigned __bitwise ieee80211_rx_result;
182#define RX_CONTINUE ((__force ieee80211_rx_result) 0u) 184#define RX_CONTINUE ((__force ieee80211_rx_result) 0u)
183#define RX_DROP_UNUSABLE ((__force ieee80211_rx_result) 1u) 185#define RX_DROP_UNUSABLE ((__force ieee80211_rx_result) 1u)
184#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u) 186#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u)
@@ -307,6 +309,7 @@ struct ieee80211_if_vlan {
307 309
308 /* used for all tx if the VLAN is configured to 4-addr mode */ 310 /* used for all tx if the VLAN is configured to 4-addr mode */
309 struct sta_info __rcu *sta; 311 struct sta_info __rcu *sta;
312 atomic_t num_mcast_sta; /* number of stations receiving multicast */
310}; 313};
311 314
312struct mesh_stats { 315struct mesh_stats {
@@ -398,6 +401,10 @@ struct ieee80211_mgd_assoc_data {
398 401
399 struct ieee80211_vht_cap ap_vht_cap; 402 struct ieee80211_vht_cap ap_vht_cap;
400 403
404 u8 fils_nonces[2 * FILS_NONCE_LEN];
405 u8 fils_kek[FILS_MAX_KEK_LEN];
406 size_t fils_kek_len;
407
401 size_t ie_len; 408 size_t ie_len;
402 u8 ie[]; 409 u8 ie[];
403}; 410};
@@ -442,7 +449,7 @@ struct ieee80211_if_managed {
442 struct ieee80211_mgd_auth_data *auth_data; 449 struct ieee80211_mgd_auth_data *auth_data;
443 struct ieee80211_mgd_assoc_data *assoc_data; 450 struct ieee80211_mgd_assoc_data *assoc_data;
444 451
445 u8 bssid[ETH_ALEN]; 452 u8 bssid[ETH_ALEN] __aligned(2);
446 453
447 u16 aid; 454 u16 aid;
448 455
@@ -1527,6 +1534,23 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
1527 return false; 1534 return false;
1528} 1535}
1529 1536
1537void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata);
1538void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata);
1539
1540/* This function returns the number of multicast stations connected to this
1541 * interface. It returns -1 if that number is not tracked, that is for netdevs
1542 * not in AP or AP_VLAN mode or when using 4addr.
1543 */
1544static inline int
1545ieee80211_vif_get_num_mcast_if(struct ieee80211_sub_if_data *sdata)
1546{
1547 if (sdata->vif.type == NL80211_IFTYPE_AP)
1548 return atomic_read(&sdata->u.ap.num_mcast_sta);
1549 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
1550 return atomic_read(&sdata->u.vlan.num_mcast_sta);
1551 return -1;
1552}
1553
1530u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, 1554u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
1531 struct ieee80211_rx_status *status, 1555 struct ieee80211_rx_status *status,
1532 unsigned int mpdu_len, 1556 unsigned int mpdu_len,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 638ec0759078..41497b670e2b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -150,15 +150,6 @@ void ieee80211_recalc_idle(struct ieee80211_local *local)
150 ieee80211_hw_config(local, change); 150 ieee80211_hw_config(local, change);
151} 151}
152 152
153static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
154{
155 if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN)
156 return -EINVAL;
157
158 dev->mtu = new_mtu;
159 return 0;
160}
161
162static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, 153static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
163 bool check_dup) 154 bool check_dup)
164{ 155{
@@ -1166,7 +1157,6 @@ static const struct net_device_ops ieee80211_dataif_ops = {
1166 .ndo_uninit = ieee80211_uninit, 1157 .ndo_uninit = ieee80211_uninit,
1167 .ndo_start_xmit = ieee80211_subif_start_xmit, 1158 .ndo_start_xmit = ieee80211_subif_start_xmit,
1168 .ndo_set_rx_mode = ieee80211_set_multicast_list, 1159 .ndo_set_rx_mode = ieee80211_set_multicast_list,
1169 .ndo_change_mtu = ieee80211_change_mtu,
1170 .ndo_set_mac_address = ieee80211_change_mac, 1160 .ndo_set_mac_address = ieee80211_change_mac,
1171 .ndo_select_queue = ieee80211_netdev_select_queue, 1161 .ndo_select_queue = ieee80211_netdev_select_queue,
1172 .ndo_get_stats64 = ieee80211_get_stats64, 1162 .ndo_get_stats64 = ieee80211_get_stats64,
@@ -1200,7 +1190,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
1200 .ndo_uninit = ieee80211_uninit, 1190 .ndo_uninit = ieee80211_uninit,
1201 .ndo_start_xmit = ieee80211_monitor_start_xmit, 1191 .ndo_start_xmit = ieee80211_monitor_start_xmit,
1202 .ndo_set_rx_mode = ieee80211_set_multicast_list, 1192 .ndo_set_rx_mode = ieee80211_set_multicast_list,
1203 .ndo_change_mtu = ieee80211_change_mtu,
1204 .ndo_set_mac_address = ieee80211_change_mac, 1193 .ndo_set_mac_address = ieee80211_change_mac,
1205 .ndo_select_queue = ieee80211_monitor_select_queue, 1194 .ndo_select_queue = ieee80211_monitor_select_queue,
1206 .ndo_get_stats64 = ieee80211_get_stats64, 1195 .ndo_get_stats64 = ieee80211_get_stats64,
@@ -1884,6 +1873,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1884 1873
1885 netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops); 1874 netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
1886 1875
1876 /* MTU range: 256 - 2304 */
1877 ndev->min_mtu = 256;
1878 ndev->max_mtu = IEEE80211_MAX_DATA_LEN;
1879
1887 ret = register_netdevice(ndev); 1880 ret = register_netdevice(ndev);
1888 if (ret) { 1881 if (ret) {
1889 ieee80211_if_free(ndev); 1882 ieee80211_if_free(ndev);
@@ -2005,3 +1998,19 @@ void ieee80211_iface_exit(void)
2005{ 1998{
2006 unregister_netdevice_notifier(&mac80211_netdev_notifier); 1999 unregister_netdevice_notifier(&mac80211_netdev_notifier);
2007} 2000}
2001
2002void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata)
2003{
2004 if (sdata->vif.type == NL80211_IFTYPE_AP)
2005 atomic_inc(&sdata->u.ap.num_mcast_sta);
2006 else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
2007 atomic_inc(&sdata->u.vlan.num_mcast_sta);
2008}
2009
2010void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata)
2011{
2012 if (sdata->vif.type == NL80211_IFTYPE_AP)
2013 atomic_dec(&sdata->u.ap.num_mcast_sta);
2014 else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
2015 atomic_dec(&sdata->u.vlan.num_mcast_sta);
2016}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index edd6f2945f69..a98fc2b5e0dc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -265,7 +265,8 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
265 if (uni) { 265 if (uni) {
266 rcu_assign_pointer(sdata->default_unicast_key, key); 266 rcu_assign_pointer(sdata->default_unicast_key, key);
267 ieee80211_check_fast_xmit_iface(sdata); 267 ieee80211_check_fast_xmit_iface(sdata);
268 drv_set_default_unicast_key(sdata->local, sdata, idx); 268 if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)
269 drv_set_default_unicast_key(sdata->local, sdata, idx);
269 } 270 }
270 271
271 if (multi) 272 if (multi)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1075ac24c8c5..1822c77f2b1c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -549,6 +549,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
549 NL80211_FEATURE_MAC_ON_CREATE | 549 NL80211_FEATURE_MAC_ON_CREATE |
550 NL80211_FEATURE_USERSPACE_MPM | 550 NL80211_FEATURE_USERSPACE_MPM |
551 NL80211_FEATURE_FULL_AP_CLIENT_STATE; 551 NL80211_FEATURE_FULL_AP_CLIENT_STATE;
552 wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA);
552 553
553 if (!ops->hw_scan) 554 if (!ops->hw_scan)
554 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | 555 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -821,6 +822,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
821 !local->ops->tdls_recv_channel_switch)) 822 !local->ops->tdls_recv_channel_switch))
822 return -EOPNOTSUPP; 823 return -EOPNOTSUPP;
823 824
825 if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_TX_FRAG) &&
826 !local->ops->set_frag_threshold))
827 return -EINVAL;
828
824 if (WARN_ON(local->hw.wiphy->interface_modes & 829 if (WARN_ON(local->hw.wiphy->interface_modes &
825 BIT(NL80211_IFTYPE_NAN) && 830 BIT(NL80211_IFTYPE_NAN) &&
826 (!local->ops->start_nan || !local->ops->stop_nan))) 831 (!local->ops->start_nan || !local->ops->stop_nan)))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7486f2dab4ba..098ce9b179ee 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -30,6 +30,7 @@
30#include "driver-ops.h" 30#include "driver-ops.h"
31#include "rate.h" 31#include "rate.h"
32#include "led.h" 32#include "led.h"
33#include "fils_aead.h"
33 34
34#define IEEE80211_AUTH_TIMEOUT (HZ / 5) 35#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
35#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) 36#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2)
@@ -652,6 +653,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
652 2 + sizeof(struct ieee80211_ht_cap) + /* HT */ 653 2 + sizeof(struct ieee80211_ht_cap) + /* HT */
653 2 + sizeof(struct ieee80211_vht_cap) + /* VHT */ 654 2 + sizeof(struct ieee80211_vht_cap) + /* VHT */
654 assoc_data->ie_len + /* extra IEs */ 655 assoc_data->ie_len + /* extra IEs */
656 (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
655 9, /* WMM */ 657 9, /* WMM */
656 GFP_KERNEL); 658 GFP_KERNEL);
657 if (!skb) 659 if (!skb)
@@ -875,6 +877,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
875 memcpy(pos, assoc_data->ie + offset, noffset - offset); 877 memcpy(pos, assoc_data->ie + offset, noffset - offset);
876 } 878 }
877 879
880 if (assoc_data->fils_kek_len &&
881 fils_encrypt_assoc_req(skb, assoc_data) < 0) {
882 dev_kfree_skb(skb);
883 return;
884 }
885
878 drv_mgd_prepare_tx(local, sdata); 886 drv_mgd_prepare_tx(local, sdata);
879 887
880 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 888 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -2510,7 +2518,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
2510} 2518}
2511 2519
2512static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, 2520static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
2513 bool assoc) 2521 bool assoc, bool abandon)
2514{ 2522{
2515 struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; 2523 struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
2516 2524
@@ -2533,6 +2541,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
2533 mutex_lock(&sdata->local->mtx); 2541 mutex_lock(&sdata->local->mtx);
2534 ieee80211_vif_release_channel(sdata); 2542 ieee80211_vif_release_channel(sdata);
2535 mutex_unlock(&sdata->local->mtx); 2543 mutex_unlock(&sdata->local->mtx);
2544
2545 if (abandon)
2546 cfg80211_abandon_assoc(sdata->dev, assoc_data->bss);
2536 } 2547 }
2537 2548
2538 kfree(assoc_data); 2549 kfree(assoc_data);
@@ -2618,6 +2629,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
2618 case WLAN_AUTH_LEAP: 2629 case WLAN_AUTH_LEAP:
2619 case WLAN_AUTH_FT: 2630 case WLAN_AUTH_FT:
2620 case WLAN_AUTH_SAE: 2631 case WLAN_AUTH_SAE:
2632 case WLAN_AUTH_FILS_SK:
2633 case WLAN_AUTH_FILS_SK_PFS:
2634 case WLAN_AUTH_FILS_PK:
2621 break; 2635 break;
2622 case WLAN_AUTH_SHARED_KEY: 2636 case WLAN_AUTH_SHARED_KEY:
2623 if (ifmgd->auth_data->expected_transaction != 4) { 2637 if (ifmgd->auth_data->expected_transaction != 4) {
@@ -2762,7 +2776,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
2762 bssid, reason_code, 2776 bssid, reason_code,
2763 ieee80211_get_reason_code_string(reason_code)); 2777 ieee80211_get_reason_code_string(reason_code));
2764 2778
2765 ieee80211_destroy_assoc_data(sdata, false); 2779 ieee80211_destroy_assoc_data(sdata, false, true);
2766 2780
2767 cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); 2781 cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
2768 return; 2782 return;
@@ -3143,6 +3157,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
3143 reassoc ? "Rea" : "A", mgmt->sa, 3157 reassoc ? "Rea" : "A", mgmt->sa,
3144 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); 3158 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
3145 3159
3160 if (assoc_data->fils_kek_len &&
3161 fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0)
3162 return;
3163
3146 pos = mgmt->u.assoc_resp.variable; 3164 pos = mgmt->u.assoc_resp.variable;
3147 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems); 3165 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
3148 3166
@@ -3167,14 +3185,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
3167 if (status_code != WLAN_STATUS_SUCCESS) { 3185 if (status_code != WLAN_STATUS_SUCCESS) {
3168 sdata_info(sdata, "%pM denied association (code=%d)\n", 3186 sdata_info(sdata, "%pM denied association (code=%d)\n",
3169 mgmt->sa, status_code); 3187 mgmt->sa, status_code);
3170 ieee80211_destroy_assoc_data(sdata, false); 3188 ieee80211_destroy_assoc_data(sdata, false, false);
3171 event.u.mlme.status = MLME_DENIED; 3189 event.u.mlme.status = MLME_DENIED;
3172 event.u.mlme.reason = status_code; 3190 event.u.mlme.reason = status_code;
3173 drv_event_callback(sdata->local, sdata, &event); 3191 drv_event_callback(sdata->local, sdata, &event);
3174 } else { 3192 } else {
3175 if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { 3193 if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) {
3176 /* oops -- internal error -- send timeout for now */ 3194 /* oops -- internal error -- send timeout for now */
3177 ieee80211_destroy_assoc_data(sdata, false); 3195 ieee80211_destroy_assoc_data(sdata, false, false);
3178 cfg80211_assoc_timeout(sdata->dev, bss); 3196 cfg80211_assoc_timeout(sdata->dev, bss);
3179 return; 3197 return;
3180 } 3198 }
@@ -3187,13 +3205,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
3187 * recalc after assoc_data is NULL but before associated 3205 * recalc after assoc_data is NULL but before associated
3188 * is set can cause the interface to go idle 3206 * is set can cause the interface to go idle
3189 */ 3207 */
3190 ieee80211_destroy_assoc_data(sdata, true); 3208 ieee80211_destroy_assoc_data(sdata, true, false);
3191 3209
3192 /* get uapsd queues configuration */ 3210 /* get uapsd queues configuration */
3193 uapsd_queues = 0; 3211 uapsd_queues = 0;
3194 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) 3212 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
3195 if (sdata->tx_conf[ac].uapsd) 3213 if (sdata->tx_conf[ac].uapsd)
3196 uapsd_queues |= BIT(ac); 3214 uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
3197 } 3215 }
3198 3216
3199 cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues); 3217 cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues);
@@ -3886,7 +3904,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3886 .u.mlme.status = MLME_TIMEOUT, 3904 .u.mlme.status = MLME_TIMEOUT,
3887 }; 3905 };
3888 3906
3889 ieee80211_destroy_assoc_data(sdata, false); 3907 ieee80211_destroy_assoc_data(sdata, false, false);
3890 cfg80211_assoc_timeout(sdata->dev, bss); 3908 cfg80211_assoc_timeout(sdata->dev, bss);
3891 drv_event_callback(sdata->local, sdata, &event); 3909 drv_event_callback(sdata->local, sdata, &event);
3892 } 3910 }
@@ -4025,7 +4043,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
4025 WLAN_REASON_DEAUTH_LEAVING, 4043 WLAN_REASON_DEAUTH_LEAVING,
4026 false, frame_buf); 4044 false, frame_buf);
4027 if (ifmgd->assoc_data) 4045 if (ifmgd->assoc_data)
4028 ieee80211_destroy_assoc_data(sdata, false); 4046 ieee80211_destroy_assoc_data(sdata, false, true);
4029 if (ifmgd->auth_data) 4047 if (ifmgd->auth_data)
4030 ieee80211_destroy_auth_data(sdata, false); 4048 ieee80211_destroy_auth_data(sdata, false);
4031 cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, 4049 cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
@@ -4479,24 +4497,36 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4479 case NL80211_AUTHTYPE_SAE: 4497 case NL80211_AUTHTYPE_SAE:
4480 auth_alg = WLAN_AUTH_SAE; 4498 auth_alg = WLAN_AUTH_SAE;
4481 break; 4499 break;
4500 case NL80211_AUTHTYPE_FILS_SK:
4501 auth_alg = WLAN_AUTH_FILS_SK;
4502 break;
4503 case NL80211_AUTHTYPE_FILS_SK_PFS:
4504 auth_alg = WLAN_AUTH_FILS_SK_PFS;
4505 break;
4506 case NL80211_AUTHTYPE_FILS_PK:
4507 auth_alg = WLAN_AUTH_FILS_PK;
4508 break;
4482 default: 4509 default:
4483 return -EOPNOTSUPP; 4510 return -EOPNOTSUPP;
4484 } 4511 }
4485 4512
4486 auth_data = kzalloc(sizeof(*auth_data) + req->sae_data_len + 4513 auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
4487 req->ie_len, GFP_KERNEL); 4514 req->ie_len, GFP_KERNEL);
4488 if (!auth_data) 4515 if (!auth_data)
4489 return -ENOMEM; 4516 return -ENOMEM;
4490 4517
4491 auth_data->bss = req->bss; 4518 auth_data->bss = req->bss;
4492 4519
4493 if (req->sae_data_len >= 4) { 4520 if (req->auth_data_len >= 4) {
4494 __le16 *pos = (__le16 *) req->sae_data; 4521 if (req->auth_type == NL80211_AUTHTYPE_SAE) {
4495 auth_data->sae_trans = le16_to_cpu(pos[0]); 4522 __le16 *pos = (__le16 *) req->auth_data;
4496 auth_data->sae_status = le16_to_cpu(pos[1]); 4523
4497 memcpy(auth_data->data, req->sae_data + 4, 4524 auth_data->sae_trans = le16_to_cpu(pos[0]);
4498 req->sae_data_len - 4); 4525 auth_data->sae_status = le16_to_cpu(pos[1]);
4499 auth_data->data_len += req->sae_data_len - 4; 4526 }
4527 memcpy(auth_data->data, req->auth_data + 4,
4528 req->auth_data_len - 4);
4529 auth_data->data_len += req->auth_data_len - 4;
4500 } 4530 }
4501 4531
4502 if (req->ie && req->ie_len) { 4532 if (req->ie && req->ie_len) {
@@ -4692,6 +4722,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4692 assoc_data->ie_len = req->ie_len; 4722 assoc_data->ie_len = req->ie_len;
4693 } 4723 }
4694 4724
4725 if (req->fils_kek) {
4726 /* should already be checked in cfg80211 - so warn */
4727 if (WARN_ON(req->fils_kek_len > FILS_MAX_KEK_LEN)) {
4728 err = -EINVAL;
4729 goto err_free;
4730 }
4731 memcpy(assoc_data->fils_kek, req->fils_kek,
4732 req->fils_kek_len);
4733 assoc_data->fils_kek_len = req->fils_kek_len;
4734 }
4735
4736 if (req->fils_nonces)
4737 memcpy(assoc_data->fils_nonces, req->fils_nonces,
4738 2 * FILS_NONCE_LEN);
4739
4695 assoc_data->bss = req->bss; 4740 assoc_data->bss = req->bss;
4696 4741
4697 if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) { 4742 if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
@@ -4907,7 +4952,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
4907 IEEE80211_STYPE_DEAUTH, 4952 IEEE80211_STYPE_DEAUTH,
4908 req->reason_code, tx, 4953 req->reason_code, tx,
4909 frame_buf); 4954 frame_buf);
4910 ieee80211_destroy_assoc_data(sdata, false); 4955 ieee80211_destroy_assoc_data(sdata, false, true);
4911 ieee80211_report_disconnect(sdata, frame_buf, 4956 ieee80211_report_disconnect(sdata, frame_buf,
4912 sizeof(frame_buf), true, 4957 sizeof(frame_buf), true,
4913 req->reason_code); 4958 req->reason_code);
@@ -4982,7 +5027,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
4982 sdata_lock(sdata); 5027 sdata_lock(sdata);
4983 if (ifmgd->assoc_data) { 5028 if (ifmgd->assoc_data) {
4984 struct cfg80211_bss *bss = ifmgd->assoc_data->bss; 5029 struct cfg80211_bss *bss = ifmgd->assoc_data->bss;
4985 ieee80211_destroy_assoc_data(sdata, false); 5030 ieee80211_destroy_assoc_data(sdata, false, false);
4986 cfg80211_assoc_timeout(sdata->dev, bss); 5031 cfg80211_assoc_timeout(sdata->dev, bss);
4987 } 5032 }
4988 if (ifmgd->auth_data) 5033 if (ifmgd->auth_data)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a47bbc973f2d..3e289a64ed43 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1394,13 +1394,15 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid)
1394 u8 ac = ieee802_1d_to_ac[tid & 7]; 1394 u8 ac = ieee802_1d_to_ac[tid & 7];
1395 1395
1396 /* 1396 /*
1397 * If this AC is not trigger-enabled do nothing. 1397 * If this AC is not trigger-enabled do nothing unless the
1398 * driver is calling us after it already checked.
1398 * 1399 *
1399 * NB: This could/should check a separate bitmap of trigger- 1400 * NB: This could/should check a separate bitmap of trigger-
1400 * enabled queues, but for now we only implement uAPSD w/o 1401 * enabled queues, but for now we only implement uAPSD w/o
1401 * TSPEC changes to the ACs, so they're always the same. 1402 * TSPEC changes to the ACs, so they're always the same.
1402 */ 1403 */
1403 if (!(sta->sta.uapsd_queues & BIT(ac))) 1404 if (!(sta->sta.uapsd_queues & ieee80211_ac_to_qos_mask[ac]) &&
1405 tid != IEEE80211_NUM_TIDS)
1404 return; 1406 return;
1405 1407
1406 /* if we are in a service period, do nothing */ 1408 /* if we are in a service period, do nothing */
@@ -2215,7 +2217,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
2215 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && 2217 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
2216 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && 2218 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
2217 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { 2219 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
2218 if (is_multicast_ether_addr(ehdr->h_dest)) { 2220 if (is_multicast_ether_addr(ehdr->h_dest) &&
2221 ieee80211_vif_get_num_mcast_if(sdata) != 0) {
2219 /* 2222 /*
2220 * send multicast frames both to higher layers in 2223 * send multicast frames both to higher layers in
2221 * local net stack and back to the wireless medium 2224 * local net stack and back to the wireless medium
@@ -2224,7 +2227,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
2224 if (!xmit_skb) 2227 if (!xmit_skb)
2225 net_info_ratelimited("%s: failed to clone multicast frame\n", 2228 net_info_ratelimited("%s: failed to clone multicast frame\n",
2226 dev->name); 2229 dev->name);
2227 } else { 2230 } else if (!is_multicast_ether_addr(ehdr->h_dest)) {
2228 dsta = sta_info_get(sdata, skb->data); 2231 dsta = sta_info_get(sdata, skb->data);
2229 if (dsta) { 2232 if (dsta) {
2230 /* 2233 /*
@@ -2469,7 +2472,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2469 if (!ifmsh->mshcfg.dot11MeshForwarding) 2472 if (!ifmsh->mshcfg.dot11MeshForwarding)
2470 goto out; 2473 goto out;
2471 2474
2472 fwd_skb = skb_copy(skb, GFP_ATOMIC); 2475 fwd_skb = skb_copy_expand(skb, local->tx_headroom, 0, GFP_ATOMIC);
2473 if (!fwd_skb) { 2476 if (!fwd_skb) {
2474 net_info_ratelimited("%s: failed to clone mesh frame\n", 2477 net_info_ratelimited("%s: failed to clone mesh frame\n",
2475 sdata->name); 2478 sdata->name);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8e05032689f0..b6cfcf038c11 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -709,7 +709,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
709 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { 709 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
710 unsigned long tids; 710 unsigned long tids;
711 711
712 if (ignore_for_tim & BIT(ac)) 712 if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac])
713 continue; 713 continue;
714 714
715 indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) || 715 indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) ||
@@ -1389,7 +1389,7 @@ ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs,
1389 return true; 1389 return true;
1390 1390
1391 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { 1391 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
1392 if (ignored_acs & BIT(ac)) 1392 if (ignored_acs & ieee80211_ac_to_qos_mask[ac])
1393 continue; 1393 continue;
1394 1394
1395 if (!skb_queue_empty(&sta->tx_filtered[ac]) || 1395 if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
@@ -1414,7 +1414,7 @@ ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs,
1414 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { 1414 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
1415 unsigned long tids; 1415 unsigned long tids;
1416 1416
1417 if (ignored_acs & BIT(ac)) 1417 if (ignored_acs & ieee80211_ac_to_qos_mask[ac])
1418 continue; 1418 continue;
1419 1419
1420 tids = ieee80211_tids_for_ac(ac); 1420 tids = ieee80211_tids_for_ac(ac);
@@ -1482,7 +1482,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
1482 BIT(find_highest_prio_tid(driver_release_tids)); 1482 BIT(find_highest_prio_tid(driver_release_tids));
1483 1483
1484 if (skb_queue_empty(&frames) && !driver_release_tids) { 1484 if (skb_queue_empty(&frames) && !driver_release_tids) {
1485 int tid; 1485 int tid, ac;
1486 1486
1487 /* 1487 /*
1488 * For PS-Poll, this can only happen due to a race condition 1488 * For PS-Poll, this can only happen due to a race condition
@@ -1500,7 +1500,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
1500 */ 1500 */
1501 1501
1502 /* This will evaluate to 1, 3, 5 or 7. */ 1502 /* This will evaluate to 1, 3, 5 or 7. */
1503 tid = 7 - ((ffs(~ignored_acs) - 1) << 1); 1503 for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++)
1504 if (ignored_acs & BIT(ac))
1505 continue;
1506 tid = 7 - 2 * ac;
1504 1507
1505 ieee80211_send_null_response(sta, tid, reason, true, false); 1508 ieee80211_send_null_response(sta, tid, reason, true, false);
1506 } else if (!driver_release_tids) { 1509 } else if (!driver_release_tids) {
@@ -1871,10 +1874,7 @@ int sta_info_move_state(struct sta_info *sta,
1871 if (!sta->sta.support_p2p_ps) 1874 if (!sta->sta.support_p2p_ps)
1872 ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); 1875 ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
1873 } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { 1876 } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
1874 if (sta->sdata->vif.type == NL80211_IFTYPE_AP || 1877 ieee80211_vif_dec_num_mcast(sta->sdata);
1875 (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
1876 !sta->sdata->u.vlan.sta))
1877 atomic_dec(&sta->sdata->bss->num_mcast_sta);
1878 clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); 1878 clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
1879 ieee80211_clear_fast_xmit(sta); 1879 ieee80211_clear_fast_xmit(sta);
1880 ieee80211_clear_fast_rx(sta); 1880 ieee80211_clear_fast_rx(sta);
@@ -1882,10 +1882,7 @@ int sta_info_move_state(struct sta_info *sta,
1882 break; 1882 break;
1883 case IEEE80211_STA_AUTHORIZED: 1883 case IEEE80211_STA_AUTHORIZED:
1884 if (sta->sta_state == IEEE80211_STA_ASSOC) { 1884 if (sta->sta_state == IEEE80211_STA_ASSOC) {
1885 if (sta->sdata->vif.type == NL80211_IFTYPE_AP || 1885 ieee80211_vif_inc_num_mcast(sta->sdata);
1886 (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
1887 !sta->sdata->u.vlan.sta))
1888 atomic_inc(&sta->sdata->bss->num_mcast_sta);
1889 set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); 1886 set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
1890 ieee80211_check_fast_xmit(sta); 1887 ieee80211_check_fast_xmit(sta);
1891 ieee80211_check_fast_rx(sta); 1888 ieee80211_check_fast_rx(sta);
@@ -1975,6 +1972,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
1975 u16 brate; 1972 u16 brate;
1976 unsigned int shift; 1973 unsigned int shift;
1977 1974
1975 rinfo->flags = 0;
1978 sband = local->hw.wiphy->bands[(rate >> 4) & 0xf]; 1976 sband = local->hw.wiphy->bands[(rate >> 4) & 0xf];
1979 brate = sband->bitrates[rate & 0xf].bitrate; 1977 brate = sband->bitrates[rate & 0xf].bitrate;
1980 if (rinfo->bw == RATE_INFO_BW_5) 1978 if (rinfo->bw == RATE_INFO_BW_5)
@@ -1990,14 +1988,15 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
1990 rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; 1988 rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
1991} 1989}
1992 1990
1993static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) 1991static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
1994{ 1992{
1995 u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate); 1993 u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
1996 1994
1997 if (rate == STA_STATS_RATE_INVALID) 1995 if (rate == STA_STATS_RATE_INVALID)
1998 rinfo->flags = 0; 1996 return -EINVAL;
1999 else 1997
2000 sta_stats_decode_rate(sta->local, rate, rinfo); 1998 sta_stats_decode_rate(sta->local, rate, rinfo);
1999 return 0;
2001} 2000}
2002 2001
2003static void sta_set_tidstats(struct sta_info *sta, 2002static void sta_set_tidstats(struct sta_info *sta,
@@ -2202,8 +2201,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
2202 } 2201 }
2203 2202
2204 if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) { 2203 if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) {
2205 sta_set_rate_info_rx(sta, &sinfo->rxrate); 2204 if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0)
2206 sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE); 2205 sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
2207 } 2206 }
2208 2207
2209 sinfo->filled |= BIT(NL80211_STA_INFO_TID_STATS); 2208 sinfo->filled |= BIT(NL80211_STA_INFO_TID_STATS);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index ed5fcb984a01..dd06ef0b8861 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -184,7 +184,6 @@ struct tid_ampdu_tx {
184 * @ssn: Starting Sequence Number expected to be aggregated. 184 * @ssn: Starting Sequence Number expected to be aggregated.
185 * @buf_size: buffer size for incoming A-MPDUs 185 * @buf_size: buffer size for incoming A-MPDUs
186 * @timeout: reset timer value (in TUs). 186 * @timeout: reset timer value (in TUs).
187 * @dialog_token: dialog token for aggregation session
188 * @rcu_head: RCU head used for freeing this struct 187 * @rcu_head: RCU head used for freeing this struct
189 * @reorder_lock: serializes access to reorder buffer, see below. 188 * @reorder_lock: serializes access to reorder buffer, see below.
190 * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and 189 * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
@@ -213,7 +212,6 @@ struct tid_ampdu_rx {
213 u16 ssn; 212 u16 ssn;
214 u16 buf_size; 213 u16 buf_size;
215 u16 timeout; 214 u16 timeout;
216 u8 dialog_token;
217 bool auto_seq; 215 bool auto_seq;
218 bool removed; 216 bool removed;
219}; 217};
@@ -225,6 +223,7 @@ struct tid_ampdu_rx {
225 * to tid_tx[idx], which are protected by the sta spinlock) 223 * to tid_tx[idx], which are protected by the sta spinlock)
226 * tid_start_tx is also protected by sta->lock. 224 * tid_start_tx is also protected by sta->lock.
227 * @tid_rx: aggregation info for Rx per TID -- RCU protected 225 * @tid_rx: aggregation info for Rx per TID -- RCU protected
226 * @tid_rx_token: dialog tokens for valid aggregation sessions
228 * @tid_rx_timer_expired: bitmap indicating on which TIDs the 227 * @tid_rx_timer_expired: bitmap indicating on which TIDs the
229 * RX timer expired until the work for it runs 228 * RX timer expired until the work for it runs
230 * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the 229 * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the
@@ -243,6 +242,7 @@ struct sta_ampdu_mlme {
243 struct mutex mtx; 242 struct mutex mtx;
244 /* rx */ 243 /* rx */
245 struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; 244 struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS];
245 u8 tid_rx_token[IEEE80211_NUM_TIDS];
246 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; 246 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
247 unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; 247 unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
248 unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; 248 unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index bd5f4be89435..2c21b7039136 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -331,9 +331,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
331 I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); 331 I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc);
332 return TX_DROP; 332 return TX_DROP;
333 } 333 }
334 } else if (unlikely(tx->sdata->vif.type == NL80211_IFTYPE_AP && 334 } else if (unlikely(ieee80211_is_data(hdr->frame_control) &&
335 ieee80211_is_data(hdr->frame_control) && 335 ieee80211_vif_get_num_mcast_if(tx->sdata) == 0)) {
336 !atomic_read(&tx->sdata->u.ap.num_mcast_sta))) {
337 /* 336 /*
338 * No associated STAs - no need to send multicast 337 * No associated STAs - no need to send multicast
339 * frames. 338 * frames.
@@ -935,7 +934,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
935 if (info->flags & IEEE80211_TX_CTL_DONTFRAG) 934 if (info->flags & IEEE80211_TX_CTL_DONTFRAG)
936 return TX_CONTINUE; 935 return TX_CONTINUE;
937 936
938 if (tx->local->ops->set_frag_threshold) 937 if (ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG))
939 return TX_CONTINUE; 938 return TX_CONTINUE;
940 939
941 /* 940 /*
@@ -2798,7 +2797,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
2798 2797
2799 /* fast-xmit doesn't handle fragmentation at all */ 2798 /* fast-xmit doesn't handle fragmentation at all */
2800 if (local->hw.wiphy->frag_threshold != (u32)-1 && 2799 if (local->hw.wiphy->frag_threshold != (u32)-1 &&
2801 !local->ops->set_frag_threshold) 2800 !ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG))
2802 goto out; 2801 goto out;
2803 2802
2804 rcu_read_lock(); 2803 rcu_read_lock();
@@ -3057,11 +3056,12 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
3057 struct ieee80211_local *local = sdata->local; 3056 struct ieee80211_local *local = sdata->local;
3058 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 3057 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
3059 struct ieee80211_hdr *hdr; 3058 struct ieee80211_hdr *hdr;
3060 struct ethhdr amsdu_hdr; 3059 struct ethhdr *amsdu_hdr;
3061 int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header); 3060 int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header);
3062 int subframe_len = skb->len - hdr_len; 3061 int subframe_len = skb->len - hdr_len;
3063 void *data; 3062 void *data;
3064 u8 *qc; 3063 u8 *qc, *h_80211_src, *h_80211_dst;
3064 const u8 *bssid;
3065 3065
3066 if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) 3066 if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
3067 return false; 3067 return false;
@@ -3069,19 +3069,44 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
3069 if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) 3069 if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
3070 return true; 3070 return true;
3071 3071
3072 if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr), 3072 if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr),
3073 &subframe_len)) 3073 &subframe_len))
3074 return false; 3074 return false;
3075 3075
3076 amsdu_hdr.h_proto = cpu_to_be16(subframe_len); 3076 data = skb_push(skb, sizeof(*amsdu_hdr));
3077 memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN); 3077 memmove(data, data + sizeof(*amsdu_hdr), hdr_len);
3078 memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN); 3078 hdr = data;
3079 amsdu_hdr = data + hdr_len;
3080 /* h_80211_src/dst is addr* field within hdr */
3081 h_80211_src = data + fast_tx->sa_offs;
3082 h_80211_dst = data + fast_tx->da_offs;
3083
3084 amsdu_hdr->h_proto = cpu_to_be16(subframe_len);
3085 ether_addr_copy(amsdu_hdr->h_source, h_80211_src);
3086 ether_addr_copy(amsdu_hdr->h_dest, h_80211_dst);
3087
3088 /* according to IEEE 802.11-2012 8.3.2 table 8-19, the outer SA/DA
3089 * fields needs to be changed to BSSID for A-MSDU frames depending
3090 * on FromDS/ToDS values.
3091 */
3092 switch (sdata->vif.type) {
3093 case NL80211_IFTYPE_STATION:
3094 bssid = sdata->u.mgd.bssid;
3095 break;
3096 case NL80211_IFTYPE_AP:
3097 case NL80211_IFTYPE_AP_VLAN:
3098 bssid = sdata->vif.addr;
3099 break;
3100 default:
3101 bssid = NULL;
3102 }
3079 3103
3080 data = skb_push(skb, sizeof(amsdu_hdr)); 3104 if (bssid && ieee80211_has_fromds(hdr->frame_control))
3081 memmove(data, data + sizeof(amsdu_hdr), hdr_len); 3105 ether_addr_copy(h_80211_src, bssid);
3082 memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr)); 3106
3107 if (bssid && ieee80211_has_tods(hdr->frame_control))
3108 ether_addr_copy(h_80211_dst, bssid);
3083 3109
3084 hdr = data;
3085 qc = ieee80211_get_qos_ctl(hdr); 3110 qc = ieee80211_get_qos_ctl(hdr);
3086 *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; 3111 *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT;
3087 3112
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 545c79a42a77..ac59fbd280df 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3308,10 +3308,11 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
3308 struct ieee80211_local *local = sdata->local; 3308 struct ieee80211_local *local = sdata->local;
3309 struct ieee80211_sub_if_data *sdata_iter; 3309 struct ieee80211_sub_if_data *sdata_iter;
3310 enum nl80211_iftype iftype = sdata->wdev.iftype; 3310 enum nl80211_iftype iftype = sdata->wdev.iftype;
3311 int num[NUM_NL80211_IFTYPES];
3312 struct ieee80211_chanctx *ctx; 3311 struct ieee80211_chanctx *ctx;
3313 int num_different_channels = 0;
3314 int total = 1; 3312 int total = 1;
3313 struct iface_combination_params params = {
3314 .radar_detect = radar_detect,
3315 };
3315 3316
3316 lockdep_assert_held(&local->chanctx_mtx); 3317 lockdep_assert_held(&local->chanctx_mtx);
3317 3318
@@ -3322,12 +3323,19 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
3322 !chandef->chan)) 3323 !chandef->chan))
3323 return -EINVAL; 3324 return -EINVAL;
3324 3325
3325 if (chandef)
3326 num_different_channels = 1;
3327
3328 if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) 3326 if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
3329 return -EINVAL; 3327 return -EINVAL;
3330 3328
3329 if (sdata->vif.type == NL80211_IFTYPE_AP ||
3330 sdata->vif.type == NL80211_IFTYPE_MESH_POINT) {
3331 /*
3332 * always passing this is harmless, since it'll be the
3333 * same value that cfg80211 finds if it finds the same
3334 * interface ... and that's always allowed
3335 */
3336 params.new_beacon_int = sdata->vif.bss_conf.beacon_int;
3337 }
3338
3331 /* Always allow software iftypes */ 3339 /* Always allow software iftypes */
3332 if (local->hw.wiphy->software_iftypes & BIT(iftype)) { 3340 if (local->hw.wiphy->software_iftypes & BIT(iftype)) {
3333 if (radar_detect) 3341 if (radar_detect)
@@ -3335,24 +3343,26 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
3335 return 0; 3343 return 0;
3336 } 3344 }
3337 3345
3338 memset(num, 0, sizeof(num)); 3346 if (chandef)
3347 params.num_different_channels = 1;
3339 3348
3340 if (iftype != NL80211_IFTYPE_UNSPECIFIED) 3349 if (iftype != NL80211_IFTYPE_UNSPECIFIED)
3341 num[iftype] = 1; 3350 params.iftype_num[iftype] = 1;
3342 3351
3343 list_for_each_entry(ctx, &local->chanctx_list, list) { 3352 list_for_each_entry(ctx, &local->chanctx_list, list) {
3344 if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) 3353 if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
3345 continue; 3354 continue;
3346 radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); 3355 params.radar_detect |=
3356 ieee80211_chanctx_radar_detect(local, ctx);
3347 if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { 3357 if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) {
3348 num_different_channels++; 3358 params.num_different_channels++;
3349 continue; 3359 continue;
3350 } 3360 }
3351 if (chandef && chanmode == IEEE80211_CHANCTX_SHARED && 3361 if (chandef && chanmode == IEEE80211_CHANCTX_SHARED &&
3352 cfg80211_chandef_compatible(chandef, 3362 cfg80211_chandef_compatible(chandef,
3353 &ctx->conf.def)) 3363 &ctx->conf.def))
3354 continue; 3364 continue;
3355 num_different_channels++; 3365 params.num_different_channels++;
3356 } 3366 }
3357 3367
3358 list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { 3368 list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) {
@@ -3365,16 +3375,14 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
3365 local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) 3375 local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
3366 continue; 3376 continue;
3367 3377
3368 num[wdev_iter->iftype]++; 3378 params.iftype_num[wdev_iter->iftype]++;
3369 total++; 3379 total++;
3370 } 3380 }
3371 3381
3372 if (total == 1 && !radar_detect) 3382 if (total == 1 && !params.radar_detect)
3373 return 0; 3383 return 0;
3374 3384
3375 return cfg80211_check_combinations(local->hw.wiphy, 3385 return cfg80211_check_combinations(local->hw.wiphy, &params);
3376 num_different_channels,
3377 radar_detect, num);
3378} 3386}
3379 3387
3380static void 3388static void
@@ -3390,12 +3398,10 @@ ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c,
3390int ieee80211_max_num_channels(struct ieee80211_local *local) 3398int ieee80211_max_num_channels(struct ieee80211_local *local)
3391{ 3399{
3392 struct ieee80211_sub_if_data *sdata; 3400 struct ieee80211_sub_if_data *sdata;
3393 int num[NUM_NL80211_IFTYPES] = {};
3394 struct ieee80211_chanctx *ctx; 3401 struct ieee80211_chanctx *ctx;
3395 int num_different_channels = 0;
3396 u8 radar_detect = 0;
3397 u32 max_num_different_channels = 1; 3402 u32 max_num_different_channels = 1;
3398 int err; 3403 int err;
3404 struct iface_combination_params params = {0};
3399 3405
3400 lockdep_assert_held(&local->chanctx_mtx); 3406 lockdep_assert_held(&local->chanctx_mtx);
3401 3407
@@ -3403,17 +3409,17 @@ int ieee80211_max_num_channels(struct ieee80211_local *local)
3403 if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) 3409 if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
3404 continue; 3410 continue;
3405 3411
3406 num_different_channels++; 3412 params.num_different_channels++;
3407 3413
3408 radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); 3414 params.radar_detect |=
3415 ieee80211_chanctx_radar_detect(local, ctx);
3409 } 3416 }
3410 3417
3411 list_for_each_entry_rcu(sdata, &local->interfaces, list) 3418 list_for_each_entry_rcu(sdata, &local->interfaces, list)
3412 num[sdata->wdev.iftype]++; 3419 params.iftype_num[sdata->wdev.iftype]++;
3413 3420
3414 err = cfg80211_iter_combinations(local->hw.wiphy, 3421 err = cfg80211_iter_combinations(local->hw.wiphy, &params,
3415 num_different_channels, radar_detect, 3422 ieee80211_iter_max_chans,
3416 num, ieee80211_iter_max_chans,
3417 &max_num_different_channels); 3423 &max_num_different_channels);
3418 if (err < 0) 3424 if (err < 0)
3419 return err; 3425 return err;
@@ -3456,3 +3462,10 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
3456 *byte_cnt = txqi->tin.backlog_bytes + frag_bytes; 3462 *byte_cnt = txqi->tin.backlog_bytes + frag_bytes;
3457} 3463}
3458EXPORT_SYMBOL(ieee80211_txq_get_depth); 3464EXPORT_SYMBOL(ieee80211_txq_get_depth);
3465
3466const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS] = {
3467 IEEE80211_WMM_IE_STA_QOSINFO_AC_VO,
3468 IEEE80211_WMM_IE_STA_QOSINFO_AC_VI,
3469 IEEE80211_WMM_IE_STA_QOSINFO_AC_BE,
3470 IEEE80211_WMM_IE_STA_QOSINFO_AC_BK
3471};
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 9eb0aee9105b..3e3d3014e9ab 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -236,26 +236,35 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,
236{ 236{
237 struct ieee80211_hdr *hdr = (void *)skb->data; 237 struct ieee80211_hdr *hdr = (void *)skb->data;
238 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 238 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
239 u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
240 u8 flags;
239 u8 *p; 241 u8 *p;
240 u8 ack_policy, tid;
241 242
242 if (!ieee80211_is_data_qos(hdr->frame_control)) 243 if (!ieee80211_is_data_qos(hdr->frame_control))
243 return; 244 return;
244 245
245 p = ieee80211_get_qos_ctl(hdr); 246 p = ieee80211_get_qos_ctl(hdr);
246 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
247 247
248 /* preserve EOSP bit */ 248 /* set up the first byte */
249 ack_policy = *p & IEEE80211_QOS_CTL_EOSP; 249
250 /*
251 * preserve everything but the TID and ACK policy
252 * (which we both write here)
253 */
254 flags = *p & ~(IEEE80211_QOS_CTL_TID_MASK |
255 IEEE80211_QOS_CTL_ACK_POLICY_MASK);
250 256
251 if (is_multicast_ether_addr(hdr->addr1) || 257 if (is_multicast_ether_addr(hdr->addr1) ||
252 sdata->noack_map & BIT(tid)) { 258 sdata->noack_map & BIT(tid)) {
253 ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK; 259 flags |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK;
254 info->flags |= IEEE80211_TX_CTL_NO_ACK; 260 info->flags |= IEEE80211_TX_CTL_NO_ACK;
255 } 261 }
256 262
257 /* qos header is 2 bytes */ 263 *p = flags | tid;
258 *p++ = ack_policy | tid; 264
265 /* set up the second byte */
266 p++;
267
259 if (ieee80211_vif_is_mesh(&sdata->vif)) { 268 if (ieee80211_vif_is_mesh(&sdata->vif)) {
260 /* preserve RSPI and Mesh PS Level bit */ 269 /* preserve RSPI and Mesh PS Level bit */
261 *p &= ((IEEE80211_QOS_CTL_RSPI | 270 *p &= ((IEEE80211_QOS_CTL_RSPI |
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 42ce9bd4426f..8af6dd388d11 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -57,7 +57,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
57 57
58 if (info->control.hw_key && 58 if (info->control.hw_key &&
59 (info->flags & IEEE80211_TX_CTL_DONTFRAG || 59 (info->flags & IEEE80211_TX_CTL_DONTFRAG ||
60 tx->local->ops->set_frag_threshold) && 60 ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) &&
61 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { 61 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
62 /* hwaccel - with no need for SW-generated MMIC */ 62 /* hwaccel - with no need for SW-generated MMIC */
63 return TX_CONTINUE; 63 return TX_CONTINUE;
diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 17a51e8389e2..5857bb1e1695 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile
@@ -3,5 +3,3 @@ mac802154-objs := main.o rx.o tx.o mac_cmd.o mib.o \
3 iface.o llsec.o util.o cfg.o trace.o 3 iface.o llsec.o util.o cfg.o trace.o
4 4
5CFLAGS_trace.o := -I$(src) 5CFLAGS_trace.o := -I$(src)
6
7ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index f9fd0957ab67..7c03fb0ea34c 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -80,11 +80,11 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
80 80
81 if (skb->len > max_sifs_size) 81 if (skb->len > max_sifs_size)
82 hrtimer_start(&local->ifs_timer, 82 hrtimer_start(&local->ifs_timer,
83 ktime_set(0, hw->phy->lifs_period * NSEC_PER_USEC), 83 hw->phy->lifs_period * NSEC_PER_USEC,
84 HRTIMER_MODE_REL); 84 HRTIMER_MODE_REL);
85 else 85 else
86 hrtimer_start(&local->ifs_timer, 86 hrtimer_start(&local->ifs_timer,
87 ktime_set(0, hw->phy->sifs_period * NSEC_PER_USEC), 87 hw->phy->sifs_period * NSEC_PER_USEC,
88 HRTIMER_MODE_REL); 88 HRTIMER_MODE_REL);
89 } else { 89 } else {
90 ieee802154_wake_queue(hw); 90 ieee802154_wake_queue(hw);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index cf52cf30ac4b..2f7ccd934416 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -133,7 +133,6 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
133 struct mpls_iptunnel_encap *tun_encap_info; 133 struct mpls_iptunnel_encap *tun_encap_info;
134 struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; 134 struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
135 struct lwtunnel_state *newts; 135 struct lwtunnel_state *newts;
136 int tun_encap_info_len;
137 int ret; 136 int ret;
138 137
139 ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, 138 ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
@@ -144,13 +143,11 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
144 if (!tb[MPLS_IPTUNNEL_DST]) 143 if (!tb[MPLS_IPTUNNEL_DST])
145 return -EINVAL; 144 return -EINVAL;
146 145
147 tun_encap_info_len = sizeof(*tun_encap_info);
148 146
149 newts = lwtunnel_state_alloc(tun_encap_info_len); 147 newts = lwtunnel_state_alloc(sizeof(*tun_encap_info));
150 if (!newts) 148 if (!newts)
151 return -ENOMEM; 149 return -ENOMEM;
152 150
153 newts->len = tun_encap_info_len;
154 tun_encap_info = mpls_lwtunnel_encap(newts); 151 tun_encap_info = mpls_lwtunnel_encap(newts);
155 ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, 152 ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
156 &tun_encap_info->labels, tun_encap_info->label); 153 &tun_encap_info->labels, tun_encap_info->label);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e8d56d9a4df2..63729b489c2c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -57,6 +57,10 @@ config NF_CONNTRACK
57config NF_LOG_COMMON 57config NF_LOG_COMMON
58 tristate 58 tristate
59 59
60config NF_LOG_NETDEV
61 tristate "Netdev packet logging"
62 select NF_LOG_COMMON
63
60if NF_CONNTRACK 64if NF_CONNTRACK
61 65
62config NF_CONNTRACK_MARK 66config NF_CONNTRACK_MARK
@@ -142,38 +146,38 @@ config NF_CONNTRACK_LABELS
142 to connection tracking entries. It selected by the connlabel match. 146 to connection tracking entries. It selected by the connlabel match.
143 147
144config NF_CT_PROTO_DCCP 148config NF_CT_PROTO_DCCP
145 tristate 'DCCP protocol connection tracking support' 149 bool 'DCCP protocol connection tracking support'
146 depends on NETFILTER_ADVANCED 150 depends on NETFILTER_ADVANCED
147 default IP_DCCP 151 default y
148 help 152 help
149 With this option enabled, the layer 3 independent connection 153 With this option enabled, the layer 3 independent connection
150 tracking code will be able to do state tracking on DCCP connections. 154 tracking code will be able to do state tracking on DCCP connections.
151 155
152 If unsure, say 'N'. 156 If unsure, say Y.
153 157
154config NF_CT_PROTO_GRE 158config NF_CT_PROTO_GRE
155 tristate 159 tristate
156 160
157config NF_CT_PROTO_SCTP 161config NF_CT_PROTO_SCTP
158 tristate 'SCTP protocol connection tracking support' 162 bool 'SCTP protocol connection tracking support'
159 depends on NETFILTER_ADVANCED 163 depends on NETFILTER_ADVANCED
160 default IP_SCTP 164 default y
161 help 165 help
162 With this option enabled, the layer 3 independent connection 166 With this option enabled, the layer 3 independent connection
163 tracking code will be able to do state tracking on SCTP connections. 167 tracking code will be able to do state tracking on SCTP connections.
164 168
165 If you want to compile it as a module, say M here and read 169 If unsure, say Y.
166 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
167 170
168config NF_CT_PROTO_UDPLITE 171config NF_CT_PROTO_UDPLITE
169 tristate 'UDP-Lite protocol connection tracking support' 172 bool 'UDP-Lite protocol connection tracking support'
170 depends on NETFILTER_ADVANCED 173 depends on NETFILTER_ADVANCED
174 default y
171 help 175 help
172 With this option enabled, the layer 3 independent connection 176 With this option enabled, the layer 3 independent connection
173 tracking code will be able to do state tracking on UDP-Lite 177 tracking code will be able to do state tracking on UDP-Lite
174 connections. 178 connections.
175 179
176 To compile it as a module, choose M here. If unsure, say N. 180 If unsure, say Y.
177 181
178config NF_CONNTRACK_AMANDA 182config NF_CONNTRACK_AMANDA
179 tristate "Amanda backup protocol support" 183 tristate "Amanda backup protocol support"
@@ -380,17 +384,17 @@ config NF_NAT_NEEDED
380 default y 384 default y
381 385
382config NF_NAT_PROTO_DCCP 386config NF_NAT_PROTO_DCCP
383 tristate 387 bool
384 depends on NF_NAT && NF_CT_PROTO_DCCP 388 depends on NF_NAT && NF_CT_PROTO_DCCP
385 default NF_NAT && NF_CT_PROTO_DCCP 389 default NF_NAT && NF_CT_PROTO_DCCP
386 390
387config NF_NAT_PROTO_UDPLITE 391config NF_NAT_PROTO_UDPLITE
388 tristate 392 bool
389 depends on NF_NAT && NF_CT_PROTO_UDPLITE 393 depends on NF_NAT && NF_CT_PROTO_UDPLITE
390 default NF_NAT && NF_CT_PROTO_UDPLITE 394 default NF_NAT && NF_CT_PROTO_UDPLITE
391 395
392config NF_NAT_PROTO_SCTP 396config NF_NAT_PROTO_SCTP
393 tristate 397 bool
394 default NF_NAT && NF_CT_PROTO_SCTP 398 default NF_NAT && NF_CT_PROTO_SCTP
395 depends on NF_NAT && NF_CT_PROTO_SCTP 399 depends on NF_NAT && NF_CT_PROTO_SCTP
396 select LIBCRC32C 400 select LIBCRC32C
@@ -474,6 +478,12 @@ config NFT_META
474 This option adds the "meta" expression that you can use to match and 478 This option adds the "meta" expression that you can use to match and
475 to set packet metainformation such as the packet mark. 479 to set packet metainformation such as the packet mark.
476 480
481config NFT_RT
482 tristate "Netfilter nf_tables routing module"
483 help
484 This option adds the "rt" expression that you can use to match
485 packet routing information such as the packet nexthop.
486
477config NFT_NUMGEN 487config NFT_NUMGEN
478 tristate "Netfilter nf_tables number generator module" 488 tristate "Netfilter nf_tables number generator module"
479 help 489 help
@@ -541,6 +551,12 @@ config NFT_NAT
541 This option adds the "nat" expression that you can use to perform 551 This option adds the "nat" expression that you can use to perform
542 typical Network Address Translation (NAT) packet transformations. 552 typical Network Address Translation (NAT) packet transformations.
543 553
554config NFT_OBJREF
555 tristate "Netfilter nf_tables stateful object reference module"
556 help
557 This option adds the "objref" expression that allows you to refer to
558 stateful objects, such as counters and quotas.
559
544config NFT_QUEUE 560config NFT_QUEUE
545 depends on NETFILTER_NETLINK_QUEUE 561 depends on NETFILTER_NETLINK_QUEUE
546 tristate "Netfilter nf_tables queue module" 562 tristate "Netfilter nf_tables queue module"
@@ -581,6 +597,19 @@ config NFT_HASH
581 This option adds the "hash" expression that you can use to perform 597 This option adds the "hash" expression that you can use to perform
582 a hash operation on registers. 598 a hash operation on registers.
583 599
600config NFT_FIB
601 tristate
602
603config NFT_FIB_INET
604 depends on NF_TABLES_INET
605 depends on NFT_FIB_IPV4
606 depends on NFT_FIB_IPV6
607 tristate "Netfilter nf_tables fib inet support"
608 help
609 This option allows using the FIB expression from the inet table.
610 The lookup will be delegated to the IPv4 or IPv6 FIB depending
611 on the protocol of the packet.
612
584if NF_TABLES_NETDEV 613if NF_TABLES_NETDEV
585 614
586config NF_DUP_NETDEV 615config NF_DUP_NETDEV
@@ -1409,9 +1438,10 @@ config NETFILTER_XT_MATCH_SOCKET
1409 tristate '"socket" match support' 1438 tristate '"socket" match support'
1410 depends on NETFILTER_XTABLES 1439 depends on NETFILTER_XTABLES
1411 depends on NETFILTER_ADVANCED 1440 depends on NETFILTER_ADVANCED
1412 depends on !NF_CONNTRACK || NF_CONNTRACK
1413 depends on IPV6 || IPV6=n 1441 depends on IPV6 || IPV6=n
1414 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 1442 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
1443 depends on NF_SOCKET_IPV4
1444 depends on NF_SOCKET_IPV6
1415 select NF_DEFRAG_IPV4 1445 select NF_DEFRAG_IPV4
1416 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n 1446 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
1417 help 1447 help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c23c3c84416f..ca30d1960f1d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -5,6 +5,9 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
5nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o 5nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
6nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 6nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
7nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o 7nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
8nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
9nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
10nf_conntrack-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
8 11
9obj-$(CONFIG_NETFILTER) = netfilter.o 12obj-$(CONFIG_NETFILTER) = netfilter.o
10 13
@@ -16,11 +19,7 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
16# connection tracking 19# connection tracking
17obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o 20obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
18 21
19# SCTP protocol connection tracking
20obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
21obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o 22obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
22obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
23obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
24 23
25# netlink interface for nf_conntrack 24# netlink interface for nf_conntrack
26obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o 25obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
@@ -45,17 +44,20 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
45nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ 44nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
46 nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o 45 nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
47 46
47# NAT protocols (nf_nat)
48nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
49nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
50nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
51
48# generic transport layer logging 52# generic transport layer logging
49obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o 53obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
50 54
55# packet logging for netdev family
56obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
57
51obj-$(CONFIG_NF_NAT) += nf_nat.o 58obj-$(CONFIG_NF_NAT) += nf_nat.o
52obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o 59obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
53 60
54# NAT protocols (nf_nat)
55obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
56obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
57obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
58
59# NAT helpers 61# NAT helpers
60obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o 62obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
61obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o 63obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -81,10 +83,12 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
81obj-$(CONFIG_NFT_COMPAT) += nft_compat.o 83obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
82obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o 84obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
83obj-$(CONFIG_NFT_META) += nft_meta.o 85obj-$(CONFIG_NFT_META) += nft_meta.o
86obj-$(CONFIG_NFT_RT) += nft_rt.o
84obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o 87obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
85obj-$(CONFIG_NFT_CT) += nft_ct.o 88obj-$(CONFIG_NFT_CT) += nft_ct.o
86obj-$(CONFIG_NFT_LIMIT) += nft_limit.o 89obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
87obj-$(CONFIG_NFT_NAT) += nft_nat.o 90obj-$(CONFIG_NFT_NAT) += nft_nat.o
91obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
88obj-$(CONFIG_NFT_QUEUE) += nft_queue.o 92obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
89obj-$(CONFIG_NFT_QUOTA) += nft_quota.o 93obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
90obj-$(CONFIG_NFT_REJECT) += nft_reject.o 94obj-$(CONFIG_NFT_REJECT) += nft_reject.o
@@ -96,6 +100,8 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o
96obj-$(CONFIG_NFT_MASQ) += nft_masq.o 100obj-$(CONFIG_NFT_MASQ) += nft_masq.o
97obj-$(CONFIG_NFT_REDIR) += nft_redir.o 101obj-$(CONFIG_NFT_REDIR) += nft_redir.o
98obj-$(CONFIG_NFT_HASH) += nft_hash.o 102obj-$(CONFIG_NFT_HASH) += nft_hash.o
103obj-$(CONFIG_NFT_FIB) += nft_fib.o
104obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
99 105
100# nf_tables netdev 106# nf_tables netdev
101obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o 107obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 004af030ef1a..ce6adfae521a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -102,17 +102,14 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
102 if (!entry) 102 if (!entry)
103 return -ENOMEM; 103 return -ENOMEM;
104 104
105 entry->orig_ops = reg; 105 nf_hook_entry_init(entry, reg);
106 entry->ops = *reg;
107 entry->next = NULL;
108 106
109 mutex_lock(&nf_hook_mutex); 107 mutex_lock(&nf_hook_mutex);
110 108
111 /* Find the spot in the list */ 109 /* Find the spot in the list */
112 while ((p = nf_entry_dereference(*pp)) != NULL) { 110 for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
113 if (reg->priority < p->orig_ops->priority) 111 if (reg->priority < nf_hook_entry_priority(p))
114 break; 112 break;
115 pp = &p->next;
116 } 113 }
117 rcu_assign_pointer(entry->next, p); 114 rcu_assign_pointer(entry->next, p);
118 rcu_assign_pointer(*pp, entry); 115 rcu_assign_pointer(*pp, entry);
@@ -139,12 +136,11 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
139 return; 136 return;
140 137
141 mutex_lock(&nf_hook_mutex); 138 mutex_lock(&nf_hook_mutex);
142 while ((p = nf_entry_dereference(*pp)) != NULL) { 139 for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
143 if (p->orig_ops == reg) { 140 if (nf_hook_entry_ops(p) == reg) {
144 rcu_assign_pointer(*pp, p->next); 141 rcu_assign_pointer(*pp, p->next);
145 break; 142 break;
146 } 143 }
147 pp = &p->next;
148 } 144 }
149 mutex_unlock(&nf_hook_mutex); 145 mutex_unlock(&nf_hook_mutex);
150 if (!p) { 146 if (!p) {
@@ -302,70 +298,40 @@ void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
302} 298}
303EXPORT_SYMBOL(_nf_unregister_hooks); 299EXPORT_SYMBOL(_nf_unregister_hooks);
304 300
305unsigned int nf_iterate(struct sk_buff *skb, 301/* Returns 1 if okfn() needs to be executed by the caller,
306 struct nf_hook_state *state, 302 * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
307 struct nf_hook_entry **entryp) 303int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
304 struct nf_hook_entry *entry)
308{ 305{
309 unsigned int verdict; 306 unsigned int verdict;
307 int ret;
310 308
311 /* 309 do {
312 * The caller must not block between calls to this 310 verdict = nf_hook_entry_hookfn(entry, skb, state);
313 * function because of risk of continuing from deleted element. 311 switch (verdict & NF_VERDICT_MASK) {
314 */ 312 case NF_ACCEPT:
315 while (*entryp) { 313 entry = rcu_dereference(entry->next);
316 if (state->thresh > (*entryp)->ops.priority) { 314 break;
317 *entryp = rcu_dereference((*entryp)->next); 315 case NF_DROP:
318 continue; 316 kfree_skb(skb);
319 } 317 ret = NF_DROP_GETERR(verdict);
320 318 if (ret == 0)
321 /* Optimization: we don't need to hold module 319 ret = -EPERM;
322 reference here, since function can't sleep. --RR */ 320 return ret;
323repeat: 321 case NF_QUEUE:
324 verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); 322 ret = nf_queue(skb, state, &entry, verdict);
325 if (verdict != NF_ACCEPT) { 323 if (ret == 1 && entry)
326#ifdef CONFIG_NETFILTER_DEBUG
327 if (unlikely((verdict & NF_VERDICT_MASK)
328 > NF_MAX_VERDICT)) {
329 NFDEBUG("Evil return from %p(%u).\n",
330 (*entryp)->ops.hook, state->hook);
331 *entryp = rcu_dereference((*entryp)->next);
332 continue; 324 continue;
333 } 325 return ret;
334#endif 326 default:
335 if (verdict != NF_REPEAT) 327 /* Implicit handling for NF_STOLEN, as well as any other
336 return verdict; 328 * non conventional verdicts.
337 goto repeat; 329 */
330 return 0;
338 } 331 }
339 *entryp = rcu_dereference((*entryp)->next); 332 } while (entry);
340 }
341 return NF_ACCEPT;
342}
343
344 333
345/* Returns 1 if okfn() needs to be executed by the caller, 334 return 1;
346 * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
347int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
348{
349 struct nf_hook_entry *entry;
350 unsigned int verdict;
351 int ret = 0;
352
353 entry = rcu_dereference(state->hook_entries);
354next_hook:
355 verdict = nf_iterate(skb, state, &entry);
356 if (verdict == NF_ACCEPT || verdict == NF_STOP) {
357 ret = 1;
358 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
359 kfree_skb(skb);
360 ret = NF_DROP_GETERR(verdict);
361 if (ret == 0)
362 ret = -EPERM;
363 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
364 ret = nf_queue(skb, state, &entry, verdict);
365 if (ret == 1 && entry)
366 goto next_hook;
367 }
368 return ret;
369} 335}
370EXPORT_SYMBOL(nf_hook_slow); 336EXPORT_SYMBOL(nf_hook_slow);
371 337
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 234a8ec82076..4083a8051f0f 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET
99 99
100 To compile it as a module, choose M here. If unsure, say N. 100 To compile it as a module, choose M here. If unsure, say N.
101 101
102config IP_SET_HASH_IPMAC
103 tristate "hash:ip,mac set support"
104 depends on IP_SET
105 help
106 This option adds the hash:ip,mac set type support, by which
107 one can store IPv4/IPv6 address and MAC (ethernet address) pairs in a set.
108
109 To compile it as a module, choose M here. If unsure, say N.
110
102config IP_SET_HASH_MAC 111config IP_SET_HASH_MAC
103 tristate "hash:mac set support" 112 tristate "hash:mac set support"
104 depends on IP_SET 113 depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 3dbd5e958489..28ec148df02d 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
14 14
15# hash types 15# hash types
16obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o 16obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
17obj-$(CONFIG_IP_SET_HASH_IPMAC) += ip_set_hash_ipmac.o
17obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o 18obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
18obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o 19obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
19obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o 20obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 2e8e7e5fb4a6..6f09a99298cd 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -22,6 +22,7 @@
22#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) 22#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
23#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) 23#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
24#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) 24#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
25#define mtype_memsize IPSET_TOKEN(MTYPE, _memsize)
25#define mtype_flush IPSET_TOKEN(MTYPE, _flush) 26#define mtype_flush IPSET_TOKEN(MTYPE, _flush)
26#define mtype_head IPSET_TOKEN(MTYPE, _head) 27#define mtype_head IPSET_TOKEN(MTYPE, _head)
27#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) 28#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
@@ -40,11 +41,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
40{ 41{
41 struct mtype *map = set->data; 42 struct mtype *map = set->data;
42 43
43 init_timer(&map->gc); 44 setup_timer(&map->gc, gc, (unsigned long)set);
44 map->gc.data = (unsigned long)set; 45 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
45 map->gc.function = gc;
46 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
47 add_timer(&map->gc);
48} 46}
49 47
50static void 48static void
@@ -82,6 +80,16 @@ mtype_flush(struct ip_set *set)
82 if (set->extensions & IPSET_EXT_DESTROY) 80 if (set->extensions & IPSET_EXT_DESTROY)
83 mtype_ext_cleanup(set); 81 mtype_ext_cleanup(set);
84 memset(map->members, 0, map->memsize); 82 memset(map->members, 0, map->memsize);
83 set->elements = 0;
84 set->ext_size = 0;
85}
86
87/* Calculate the actual memory size of the set data */
88static size_t
89mtype_memsize(const struct mtype *map, size_t dsize)
90{
91 return sizeof(*map) + map->memsize +
92 map->elements * dsize;
85} 93}
86 94
87static int 95static int
@@ -89,14 +97,15 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
89{ 97{
90 const struct mtype *map = set->data; 98 const struct mtype *map = set->data;
91 struct nlattr *nested; 99 struct nlattr *nested;
92 size_t memsize = sizeof(*map) + map->memsize; 100 size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
93 101
94 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 102 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
95 if (!nested) 103 if (!nested)
96 goto nla_put_failure; 104 goto nla_put_failure;
97 if (mtype_do_head(skb, map) || 105 if (mtype_do_head(skb, map) ||
98 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 106 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
99 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) 107 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
108 nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
100 goto nla_put_failure; 109 goto nla_put_failure;
101 if (unlikely(ip_set_put_flags(skb, set))) 110 if (unlikely(ip_set_put_flags(skb, set)))
102 goto nla_put_failure; 111 goto nla_put_failure;
@@ -140,6 +149,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
140 if (ret == IPSET_ADD_FAILED) { 149 if (ret == IPSET_ADD_FAILED) {
141 if (SET_WITH_TIMEOUT(set) && 150 if (SET_WITH_TIMEOUT(set) &&
142 ip_set_timeout_expired(ext_timeout(x, set))) { 151 ip_set_timeout_expired(ext_timeout(x, set))) {
152 set->elements--;
143 ret = 0; 153 ret = 0;
144 } else if (!(flags & IPSET_FLAG_EXIST)) { 154 } else if (!(flags & IPSET_FLAG_EXIST)) {
145 set_bit(e->id, map->members); 155 set_bit(e->id, map->members);
@@ -148,6 +158,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
148 /* Element is re-added, cleanup extensions */ 158 /* Element is re-added, cleanup extensions */
149 ip_set_ext_destroy(set, x); 159 ip_set_ext_destroy(set, x);
150 } 160 }
161 if (ret > 0)
162 set->elements--;
151 163
152 if (SET_WITH_TIMEOUT(set)) 164 if (SET_WITH_TIMEOUT(set))
153#ifdef IP_SET_BITMAP_STORED_TIMEOUT 165#ifdef IP_SET_BITMAP_STORED_TIMEOUT
@@ -159,12 +171,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
159 if (SET_WITH_COUNTER(set)) 171 if (SET_WITH_COUNTER(set))
160 ip_set_init_counter(ext_counter(x, set), ext); 172 ip_set_init_counter(ext_counter(x, set), ext);
161 if (SET_WITH_COMMENT(set)) 173 if (SET_WITH_COMMENT(set))
162 ip_set_init_comment(ext_comment(x, set), ext); 174 ip_set_init_comment(set, ext_comment(x, set), ext);
163 if (SET_WITH_SKBINFO(set)) 175 if (SET_WITH_SKBINFO(set))
164 ip_set_init_skbinfo(ext_skbinfo(x, set), ext); 176 ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
165 177
166 /* Activate element */ 178 /* Activate element */
167 set_bit(e->id, map->members); 179 set_bit(e->id, map->members);
180 set->elements++;
168 181
169 return 0; 182 return 0;
170} 183}
@@ -181,6 +194,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
181 return -IPSET_ERR_EXIST; 194 return -IPSET_ERR_EXIST;
182 195
183 ip_set_ext_destroy(set, x); 196 ip_set_ext_destroy(set, x);
197 set->elements--;
184 if (SET_WITH_TIMEOUT(set) && 198 if (SET_WITH_TIMEOUT(set) &&
185 ip_set_timeout_expired(ext_timeout(x, set))) 199 ip_set_timeout_expired(ext_timeout(x, set)))
186 return -IPSET_ERR_EXIST; 200 return -IPSET_ERR_EXIST;
@@ -276,6 +290,7 @@ mtype_gc(unsigned long ul_set)
276 if (ip_set_timeout_expired(ext_timeout(x, set))) { 290 if (ip_set_timeout_expired(ext_timeout(x, set))) {
277 clear_bit(id, map->members); 291 clear_bit(id, map->members);
278 ip_set_ext_destroy(set, x); 292 ip_set_ext_destroy(set, x);
293 set->elements--;
279 } 294 }
280 } 295 }
281 spin_unlock_bh(&set->lock); 296 spin_unlock_bh(&set->lock);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index a748b0c2c981..c296f9b606d4 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -36,7 +36,7 @@ struct ip_set_net {
36 bool is_destroyed; /* all sets are destroyed */ 36 bool is_destroyed; /* all sets are destroyed */
37}; 37};
38 38
39static int ip_set_net_id __read_mostly; 39static unsigned int ip_set_net_id __read_mostly;
40 40
41static inline struct ip_set_net *ip_set_pernet(struct net *net) 41static inline struct ip_set_net *ip_set_pernet(struct net *net)
42{ 42{
@@ -324,7 +324,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
324} 324}
325EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); 325EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
326 326
327typedef void (*destroyer)(void *); 327typedef void (*destroyer)(struct ip_set *, void *);
328/* ipset data extension types, in size order */ 328/* ipset data extension types, in size order */
329 329
330const struct ip_set_ext_type ip_set_extensions[] = { 330const struct ip_set_ext_type ip_set_extensions[] = {
@@ -426,20 +426,20 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
426 if (!SET_WITH_SKBINFO(set)) 426 if (!SET_WITH_SKBINFO(set))
427 return -IPSET_ERR_SKBINFO; 427 return -IPSET_ERR_SKBINFO;
428 fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); 428 fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
429 ext->skbmark = fullmark >> 32; 429 ext->skbinfo.skbmark = fullmark >> 32;
430 ext->skbmarkmask = fullmark & 0xffffffff; 430 ext->skbinfo.skbmarkmask = fullmark & 0xffffffff;
431 } 431 }
432 if (tb[IPSET_ATTR_SKBPRIO]) { 432 if (tb[IPSET_ATTR_SKBPRIO]) {
433 if (!SET_WITH_SKBINFO(set)) 433 if (!SET_WITH_SKBINFO(set))
434 return -IPSET_ERR_SKBINFO; 434 return -IPSET_ERR_SKBINFO;
435 ext->skbprio = be32_to_cpu(nla_get_be32( 435 ext->skbinfo.skbprio =
436 tb[IPSET_ATTR_SKBPRIO])); 436 be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO]));
437 } 437 }
438 if (tb[IPSET_ATTR_SKBQUEUE]) { 438 if (tb[IPSET_ATTR_SKBQUEUE]) {
439 if (!SET_WITH_SKBINFO(set)) 439 if (!SET_WITH_SKBINFO(set))
440 return -IPSET_ERR_SKBINFO; 440 return -IPSET_ERR_SKBINFO;
441 ext->skbqueue = be16_to_cpu(nla_get_be16( 441 ext->skbinfo.skbqueue =
442 tb[IPSET_ATTR_SKBQUEUE])); 442 be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE]));
443 } 443 }
444 return 0; 444 return 0;
445} 445}
@@ -541,7 +541,7 @@ int
541ip_set_test(ip_set_id_t index, const struct sk_buff *skb, 541ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
542 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 542 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
543{ 543{
544 struct ip_set *set = ip_set_rcu_get(par->net, index); 544 struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
545 int ret = 0; 545 int ret = 0;
546 546
547 BUG_ON(!set); 547 BUG_ON(!set);
@@ -579,7 +579,7 @@ int
579ip_set_add(ip_set_id_t index, const struct sk_buff *skb, 579ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
580 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 580 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
581{ 581{
582 struct ip_set *set = ip_set_rcu_get(par->net, index); 582 struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
583 int ret; 583 int ret;
584 584
585 BUG_ON(!set); 585 BUG_ON(!set);
@@ -601,7 +601,7 @@ int
601ip_set_del(ip_set_id_t index, const struct sk_buff *skb, 601ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
602 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 602 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
603{ 603{
604 struct ip_set *set = ip_set_rcu_get(par->net, index); 604 struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
605 int ret = 0; 605 int ret = 0;
606 606
607 BUG_ON(!set); 607 BUG_ON(!set);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index d32fd6b036bf..1b05d4a7d5a1 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -85,6 +85,8 @@ struct htable {
85}; 85};
86 86
87#define hbucket(h, i) ((h)->bucket[i]) 87#define hbucket(h, i) ((h)->bucket[i])
88#define ext_size(n, dsize) \
89 (sizeof(struct hbucket) + (n) * (dsize))
88 90
89#ifndef IPSET_NET_COUNT 91#ifndef IPSET_NET_COUNT
90#define IPSET_NET_COUNT 1 92#define IPSET_NET_COUNT 1
@@ -150,24 +152,34 @@ htable_bits(u32 hashsize)
150#define INIT_CIDR(cidr, host_mask) \ 152#define INIT_CIDR(cidr, host_mask) \
151 DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) 153 DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
152 154
153#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
154
155#ifdef IP_SET_HASH_WITH_NET0 155#ifdef IP_SET_HASH_WITH_NET0
156/* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */ 156/* cidr from 0 to HOST_MASK value and c = cidr + 1 */
157#define NLEN(family) (SET_HOST_MASK(family) + 1) 157#define NLEN (HOST_MASK + 1)
158#define CIDR_POS(c) ((c) - 1) 158#define CIDR_POS(c) ((c) - 1)
159#else 159#else
160/* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */ 160/* cidr from 1 to HOST_MASK value and c = cidr + 1 */
161#define NLEN(family) SET_HOST_MASK(family) 161#define NLEN HOST_MASK
162#define CIDR_POS(c) ((c) - 2) 162#define CIDR_POS(c) ((c) - 2)
163#endif 163#endif
164 164
165#else 165#else
166#define NLEN(family) 0 166#define NLEN 0
167#endif /* IP_SET_HASH_WITH_NETS */ 167#endif /* IP_SET_HASH_WITH_NETS */
168 168
169#endif /* _IP_SET_HASH_GEN_H */ 169#endif /* _IP_SET_HASH_GEN_H */
170 170
171#ifndef MTYPE
172#error "MTYPE is not defined!"
173#endif
174
175#ifndef HTYPE
176#error "HTYPE is not defined!"
177#endif
178
179#ifndef HOST_MASK
180#error "HOST_MASK is not defined!"
181#endif
182
171/* Family dependent templates */ 183/* Family dependent templates */
172 184
173#undef ahash_data 185#undef ahash_data
@@ -191,7 +203,6 @@ htable_bits(u32 hashsize)
191#undef mtype_same_set 203#undef mtype_same_set
192#undef mtype_kadt 204#undef mtype_kadt
193#undef mtype_uadt 205#undef mtype_uadt
194#undef mtype
195 206
196#undef mtype_add 207#undef mtype_add
197#undef mtype_del 208#undef mtype_del
@@ -207,6 +218,7 @@ htable_bits(u32 hashsize)
207#undef mtype_variant 218#undef mtype_variant
208#undef mtype_data_match 219#undef mtype_data_match
209 220
221#undef htype
210#undef HKEY 222#undef HKEY
211 223
212#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) 224#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal)
@@ -233,7 +245,6 @@ htable_bits(u32 hashsize)
233#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) 245#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
234#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) 246#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
235#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) 247#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
236#define mtype MTYPE
237 248
238#define mtype_add IPSET_TOKEN(MTYPE, _add) 249#define mtype_add IPSET_TOKEN(MTYPE, _add)
239#define mtype_del IPSET_TOKEN(MTYPE, _del) 250#define mtype_del IPSET_TOKEN(MTYPE, _del)
@@ -249,62 +260,54 @@ htable_bits(u32 hashsize)
249#define mtype_variant IPSET_TOKEN(MTYPE, _variant) 260#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
250#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) 261#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
251 262
252#ifndef MTYPE
253#error "MTYPE is not defined!"
254#endif
255
256#ifndef HOST_MASK
257#error "HOST_MASK is not defined!"
258#endif
259
260#ifndef HKEY_DATALEN 263#ifndef HKEY_DATALEN
261#define HKEY_DATALEN sizeof(struct mtype_elem) 264#define HKEY_DATALEN sizeof(struct mtype_elem)
262#endif 265#endif
263 266
264#define HKEY(data, initval, htable_bits) \ 267#define htype MTYPE
265(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \
266 & jhash_mask(htable_bits))
267 268
268#ifndef htype 269#define HKEY(data, initval, htable_bits) \
269#ifndef HTYPE 270({ \
270#error "HTYPE is not defined!" 271 const u32 *__k = (const u32 *)data; \
271#endif /* HTYPE */ 272 u32 __l = HKEY_DATALEN / sizeof(u32); \
272#define htype HTYPE 273 \
274 BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \
275 \
276 jhash2(__k, __l, initval) & jhash_mask(htable_bits); \
277})
273 278
274/* The generic hash structure */ 279/* The generic hash structure */
275struct htype { 280struct htype {
276 struct htable __rcu *table; /* the hash table */ 281 struct htable __rcu *table; /* the hash table */
282 struct timer_list gc; /* garbage collection when timeout enabled */
277 u32 maxelem; /* max elements in the hash */ 283 u32 maxelem; /* max elements in the hash */
278 u32 elements; /* current element (vs timeout) */
279 u32 initval; /* random jhash init value */ 284 u32 initval; /* random jhash init value */
280#ifdef IP_SET_HASH_WITH_MARKMASK 285#ifdef IP_SET_HASH_WITH_MARKMASK
281 u32 markmask; /* markmask value for mark mask to store */ 286 u32 markmask; /* markmask value for mark mask to store */
282#endif 287#endif
283 struct timer_list gc; /* garbage collection when timeout enabled */
284 struct mtype_elem next; /* temporary storage for uadd */
285#ifdef IP_SET_HASH_WITH_MULTI 288#ifdef IP_SET_HASH_WITH_MULTI
286 u8 ahash_max; /* max elements in an array block */ 289 u8 ahash_max; /* max elements in an array block */
287#endif 290#endif
288#ifdef IP_SET_HASH_WITH_NETMASK 291#ifdef IP_SET_HASH_WITH_NETMASK
289 u8 netmask; /* netmask value for subnets to store */ 292 u8 netmask; /* netmask value for subnets to store */
290#endif 293#endif
294 struct mtype_elem next; /* temporary storage for uadd */
291#ifdef IP_SET_HASH_WITH_NETS 295#ifdef IP_SET_HASH_WITH_NETS
292 struct net_prefixes nets[0]; /* book-keeping of prefixes */ 296 struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
293#endif 297#endif
294}; 298};
295#endif /* htype */
296 299
297#ifdef IP_SET_HASH_WITH_NETS 300#ifdef IP_SET_HASH_WITH_NETS
298/* Network cidr size book keeping when the hash stores different 301/* Network cidr size book keeping when the hash stores different
299 * sized networks. cidr == real cidr + 1 to support /0. 302 * sized networks. cidr == real cidr + 1 to support /0.
300 */ 303 */
301static void 304static void
302mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) 305mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
303{ 306{
304 int i, j; 307 int i, j;
305 308
306 /* Add in increasing prefix order, so larger cidr first */ 309 /* Add in increasing prefix order, so larger cidr first */
307 for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) { 310 for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
308 if (j != -1) { 311 if (j != -1) {
309 continue; 312 continue;
310 } else if (h->nets[i].cidr[n] < cidr) { 313 } else if (h->nets[i].cidr[n] < cidr) {
@@ -323,11 +326,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
323} 326}
324 327
325static void 328static void
326mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) 329mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
327{ 330{
328 u8 i, j, net_end = nets_length - 1; 331 u8 i, j, net_end = NLEN - 1;
329 332
330 for (i = 0; i < nets_length; i++) { 333 for (i = 0; i < NLEN; i++) {
331 if (h->nets[i].cidr[n] != cidr) 334 if (h->nets[i].cidr[n] != cidr)
332 continue; 335 continue;
333 h->nets[CIDR_POS(cidr)].nets[n]--; 336 h->nets[CIDR_POS(cidr)].nets[n]--;
@@ -343,24 +346,9 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
343 346
344/* Calculate the actual memory size of the set data */ 347/* Calculate the actual memory size of the set data */
345static size_t 348static size_t
346mtype_ahash_memsize(const struct htype *h, const struct htable *t, 349mtype_ahash_memsize(const struct htype *h, const struct htable *t)
347 u8 nets_length, size_t dsize)
348{ 350{
349 u32 i; 351 return sizeof(*h) + sizeof(*t);
350 struct hbucket *n;
351 size_t memsize = sizeof(*h) + sizeof(*t);
352
353#ifdef IP_SET_HASH_WITH_NETS
354 memsize += sizeof(struct net_prefixes) * nets_length;
355#endif
356 for (i = 0; i < jhash_size(t->htable_bits); i++) {
357 n = rcu_dereference_bh(hbucket(t, i));
358 if (!n)
359 continue;
360 memsize += sizeof(struct hbucket) + n->size * dsize;
361 }
362
363 return memsize;
364} 352}
365 353
366/* Get the ith element from the array block n */ 354/* Get the ith element from the array block n */
@@ -398,9 +386,10 @@ mtype_flush(struct ip_set *set)
398 kfree_rcu(n, rcu); 386 kfree_rcu(n, rcu);
399 } 387 }
400#ifdef IP_SET_HASH_WITH_NETS 388#ifdef IP_SET_HASH_WITH_NETS
401 memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); 389 memset(h->nets, 0, sizeof(h->nets));
402#endif 390#endif
403 h->elements = 0; 391 set->elements = 0;
392 set->ext_size = 0;
404} 393}
405 394
406/* Destroy the hashtable part of the set */ 395/* Destroy the hashtable part of the set */
@@ -444,11 +433,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
444{ 433{
445 struct htype *h = set->data; 434 struct htype *h = set->data;
446 435
447 init_timer(&h->gc); 436 setup_timer(&h->gc, gc, (unsigned long)set);
448 h->gc.data = (unsigned long)set; 437 mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
449 h->gc.function = gc;
450 h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
451 add_timer(&h->gc);
452 pr_debug("gc initialized, run in every %u\n", 438 pr_debug("gc initialized, run in every %u\n",
453 IPSET_GC_PERIOD(set->timeout)); 439 IPSET_GC_PERIOD(set->timeout));
454} 440}
@@ -473,12 +459,13 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
473 459
474/* Delete expired elements from the hashtable */ 460/* Delete expired elements from the hashtable */
475static void 461static void
476mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) 462mtype_expire(struct ip_set *set, struct htype *h)
477{ 463{
478 struct htable *t; 464 struct htable *t;
479 struct hbucket *n, *tmp; 465 struct hbucket *n, *tmp;
480 struct mtype_elem *data; 466 struct mtype_elem *data;
481 u32 i, j, d; 467 u32 i, j, d;
468 size_t dsize = set->dsize;
482#ifdef IP_SET_HASH_WITH_NETS 469#ifdef IP_SET_HASH_WITH_NETS
483 u8 k; 470 u8 k;
484#endif 471#endif
@@ -494,21 +481,20 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
494 continue; 481 continue;
495 } 482 }
496 data = ahash_data(n, j, dsize); 483 data = ahash_data(n, j, dsize);
497 if (ip_set_timeout_expired(ext_timeout(data, set))) { 484 if (!ip_set_timeout_expired(ext_timeout(data, set)))
498 pr_debug("expired %u/%u\n", i, j); 485 continue;
499 clear_bit(j, n->used); 486 pr_debug("expired %u/%u\n", i, j);
500 smp_mb__after_atomic(); 487 clear_bit(j, n->used);
488 smp_mb__after_atomic();
501#ifdef IP_SET_HASH_WITH_NETS 489#ifdef IP_SET_HASH_WITH_NETS
502 for (k = 0; k < IPSET_NET_COUNT; k++) 490 for (k = 0; k < IPSET_NET_COUNT; k++)
503 mtype_del_cidr(h, 491 mtype_del_cidr(h,
504 NCIDR_PUT(DCIDR_GET(data->cidr, 492 NCIDR_PUT(DCIDR_GET(data->cidr, k)),
505 k)), 493 k);
506 nets_length, k);
507#endif 494#endif
508 ip_set_ext_destroy(set, data); 495 ip_set_ext_destroy(set, data);
509 h->elements--; 496 set->elements--;
510 d++; 497 d++;
511 }
512 } 498 }
513 if (d >= AHASH_INIT_SIZE) { 499 if (d >= AHASH_INIT_SIZE) {
514 if (d >= n->size) { 500 if (d >= n->size) {
@@ -532,6 +518,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
532 d++; 518 d++;
533 } 519 }
534 tmp->pos = d; 520 tmp->pos = d;
521 set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
535 rcu_assign_pointer(hbucket(t, i), tmp); 522 rcu_assign_pointer(hbucket(t, i), tmp);
536 kfree_rcu(n, rcu); 523 kfree_rcu(n, rcu);
537 } 524 }
@@ -546,7 +533,7 @@ mtype_gc(unsigned long ul_set)
546 533
547 pr_debug("called\n"); 534 pr_debug("called\n");
548 spin_lock_bh(&set->lock); 535 spin_lock_bh(&set->lock);
549 mtype_expire(set, h, NLEN(set->family), set->dsize); 536 mtype_expire(set, h);
550 spin_unlock_bh(&set->lock); 537 spin_unlock_bh(&set->lock);
551 538
552 h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 539 h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
@@ -563,7 +550,7 @@ mtype_resize(struct ip_set *set, bool retried)
563 struct htype *h = set->data; 550 struct htype *h = set->data;
564 struct htable *t, *orig; 551 struct htable *t, *orig;
565 u8 htable_bits; 552 u8 htable_bits;
566 size_t dsize = set->dsize; 553 size_t extsize, dsize = set->dsize;
567#ifdef IP_SET_HASH_WITH_NETS 554#ifdef IP_SET_HASH_WITH_NETS
568 u8 flags; 555 u8 flags;
569 struct mtype_elem *tmp; 556 struct mtype_elem *tmp;
@@ -606,6 +593,7 @@ retry:
606 /* There can't be another parallel resizing, but dumping is possible */ 593 /* There can't be another parallel resizing, but dumping is possible */
607 atomic_set(&orig->ref, 1); 594 atomic_set(&orig->ref, 1);
608 atomic_inc(&orig->uref); 595 atomic_inc(&orig->uref);
596 extsize = 0;
609 pr_debug("attempt to resize set %s from %u to %u, t %p\n", 597 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
610 set->name, orig->htable_bits, htable_bits, orig); 598 set->name, orig->htable_bits, htable_bits, orig);
611 for (i = 0; i < jhash_size(orig->htable_bits); i++) { 599 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
@@ -636,6 +624,7 @@ retry:
636 goto cleanup; 624 goto cleanup;
637 } 625 }
638 m->size = AHASH_INIT_SIZE; 626 m->size = AHASH_INIT_SIZE;
627 extsize = ext_size(AHASH_INIT_SIZE, dsize);
639 RCU_INIT_POINTER(hbucket(t, key), m); 628 RCU_INIT_POINTER(hbucket(t, key), m);
640 } else if (m->pos >= m->size) { 629 } else if (m->pos >= m->size) {
641 struct hbucket *ht; 630 struct hbucket *ht;
@@ -655,6 +644,7 @@ retry:
655 memcpy(ht, m, sizeof(struct hbucket) + 644 memcpy(ht, m, sizeof(struct hbucket) +
656 m->size * dsize); 645 m->size * dsize);
657 ht->size = m->size + AHASH_INIT_SIZE; 646 ht->size = m->size + AHASH_INIT_SIZE;
647 extsize += ext_size(AHASH_INIT_SIZE, dsize);
658 kfree(m); 648 kfree(m);
659 m = ht; 649 m = ht;
660 RCU_INIT_POINTER(hbucket(t, key), ht); 650 RCU_INIT_POINTER(hbucket(t, key), ht);
@@ -668,6 +658,7 @@ retry:
668 } 658 }
669 } 659 }
670 rcu_assign_pointer(h->table, t); 660 rcu_assign_pointer(h->table, t);
661 set->ext_size = extsize;
671 662
672 spin_unlock_bh(&set->lock); 663 spin_unlock_bh(&set->lock);
673 664
@@ -715,11 +706,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
715 bool deleted = false, forceadd = false, reuse = false; 706 bool deleted = false, forceadd = false, reuse = false;
716 u32 key, multi = 0; 707 u32 key, multi = 0;
717 708
718 if (h->elements >= h->maxelem) { 709 if (set->elements >= h->maxelem) {
719 if (SET_WITH_TIMEOUT(set)) 710 if (SET_WITH_TIMEOUT(set))
720 /* FIXME: when set is full, we slow down here */ 711 /* FIXME: when set is full, we slow down here */
721 mtype_expire(set, h, NLEN(set->family), set->dsize); 712 mtype_expire(set, h);
722 if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) 713 if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
723 forceadd = true; 714 forceadd = true;
724 } 715 }
725 716
@@ -727,20 +718,15 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
727 key = HKEY(value, h->initval, t->htable_bits); 718 key = HKEY(value, h->initval, t->htable_bits);
728 n = __ipset_dereference_protected(hbucket(t, key), 1); 719 n = __ipset_dereference_protected(hbucket(t, key), 1);
729 if (!n) { 720 if (!n) {
730 if (forceadd) { 721 if (forceadd || set->elements >= h->maxelem)
731 if (net_ratelimit())
732 pr_warn("Set %s is full, maxelem %u reached\n",
733 set->name, h->maxelem);
734 return -IPSET_ERR_HASH_FULL;
735 } else if (h->elements >= h->maxelem) {
736 goto set_full; 722 goto set_full;
737 }
738 old = NULL; 723 old = NULL;
739 n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, 724 n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
740 GFP_ATOMIC); 725 GFP_ATOMIC);
741 if (!n) 726 if (!n)
742 return -ENOMEM; 727 return -ENOMEM;
743 n->size = AHASH_INIT_SIZE; 728 n->size = AHASH_INIT_SIZE;
729 set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
744 goto copy_elem; 730 goto copy_elem;
745 } 731 }
746 for (i = 0; i < n->pos; i++) { 732 for (i = 0; i < n->pos; i++) {
@@ -778,14 +764,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
778 for (i = 0; i < IPSET_NET_COUNT; i++) 764 for (i = 0; i < IPSET_NET_COUNT; i++)
779 mtype_del_cidr(h, 765 mtype_del_cidr(h,
780 NCIDR_PUT(DCIDR_GET(data->cidr, i)), 766 NCIDR_PUT(DCIDR_GET(data->cidr, i)),
781 NLEN(set->family), i); 767 i);
782#endif 768#endif
783 ip_set_ext_destroy(set, data); 769 ip_set_ext_destroy(set, data);
784 h->elements--; 770 set->elements--;
785 } 771 }
786 goto copy_data; 772 goto copy_data;
787 } 773 }
788 if (h->elements >= h->maxelem) 774 if (set->elements >= h->maxelem)
789 goto set_full; 775 goto set_full;
790 /* Create a new slot */ 776 /* Create a new slot */
791 if (n->pos >= n->size) { 777 if (n->pos >= n->size) {
@@ -804,17 +790,17 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
804 memcpy(n, old, sizeof(struct hbucket) + 790 memcpy(n, old, sizeof(struct hbucket) +
805 old->size * set->dsize); 791 old->size * set->dsize);
806 n->size = old->size + AHASH_INIT_SIZE; 792 n->size = old->size + AHASH_INIT_SIZE;
793 set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
807 } 794 }
808 795
809copy_elem: 796copy_elem:
810 j = n->pos++; 797 j = n->pos++;
811 data = ahash_data(n, j, set->dsize); 798 data = ahash_data(n, j, set->dsize);
812copy_data: 799copy_data:
813 h->elements++; 800 set->elements++;
814#ifdef IP_SET_HASH_WITH_NETS 801#ifdef IP_SET_HASH_WITH_NETS
815 for (i = 0; i < IPSET_NET_COUNT; i++) 802 for (i = 0; i < IPSET_NET_COUNT; i++)
816 mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), 803 mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
817 NLEN(set->family), i);
818#endif 804#endif
819 memcpy(data, d, sizeof(struct mtype_elem)); 805 memcpy(data, d, sizeof(struct mtype_elem));
820overwrite_extensions: 806overwrite_extensions:
@@ -824,7 +810,7 @@ overwrite_extensions:
824 if (SET_WITH_COUNTER(set)) 810 if (SET_WITH_COUNTER(set))
825 ip_set_init_counter(ext_counter(data, set), ext); 811 ip_set_init_counter(ext_counter(data, set), ext);
826 if (SET_WITH_COMMENT(set)) 812 if (SET_WITH_COMMENT(set))
827 ip_set_init_comment(ext_comment(data, set), ext); 813 ip_set_init_comment(set, ext_comment(data, set), ext);
828 if (SET_WITH_SKBINFO(set)) 814 if (SET_WITH_SKBINFO(set))
829 ip_set_init_skbinfo(ext_skbinfo(data, set), ext); 815 ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
830 /* Must come last for the case when timed out entry is reused */ 816 /* Must come last for the case when timed out entry is reused */
@@ -883,11 +869,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
883 smp_mb__after_atomic(); 869 smp_mb__after_atomic();
884 if (i + 1 == n->pos) 870 if (i + 1 == n->pos)
885 n->pos--; 871 n->pos--;
886 h->elements--; 872 set->elements--;
887#ifdef IP_SET_HASH_WITH_NETS 873#ifdef IP_SET_HASH_WITH_NETS
888 for (j = 0; j < IPSET_NET_COUNT; j++) 874 for (j = 0; j < IPSET_NET_COUNT; j++)
889 mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), 875 mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
890 NLEN(set->family), j); 876 j);
891#endif 877#endif
892 ip_set_ext_destroy(set, data); 878 ip_set_ext_destroy(set, data);
893 879
@@ -896,6 +882,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
896 k++; 882 k++;
897 } 883 }
898 if (n->pos == 0 && k == 0) { 884 if (n->pos == 0 && k == 0) {
885 set->ext_size -= ext_size(n->size, dsize);
899 rcu_assign_pointer(hbucket(t, key), NULL); 886 rcu_assign_pointer(hbucket(t, key), NULL);
900 kfree_rcu(n, rcu); 887 kfree_rcu(n, rcu);
901 } else if (k >= AHASH_INIT_SIZE) { 888 } else if (k >= AHASH_INIT_SIZE) {
@@ -914,6 +901,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
914 k++; 901 k++;
915 } 902 }
916 tmp->pos = k; 903 tmp->pos = k;
904 set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
917 rcu_assign_pointer(hbucket(t, key), tmp); 905 rcu_assign_pointer(hbucket(t, key), tmp);
918 kfree_rcu(n, rcu); 906 kfree_rcu(n, rcu);
919 } 907 }
@@ -957,14 +945,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
957 int i, j = 0; 945 int i, j = 0;
958#endif 946#endif
959 u32 key, multi = 0; 947 u32 key, multi = 0;
960 u8 nets_length = NLEN(set->family);
961 948
962 pr_debug("test by nets\n"); 949 pr_debug("test by nets\n");
963 for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) { 950 for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
964#if IPSET_NET_COUNT == 2 951#if IPSET_NET_COUNT == 2
965 mtype_data_reset_elem(d, &orig); 952 mtype_data_reset_elem(d, &orig);
966 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); 953 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
967 for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi; 954 for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi;
968 k++) { 955 k++) {
969 mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), 956 mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
970 true); 957 true);
@@ -1021,7 +1008,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
1021 * try all possible network sizes 1008 * try all possible network sizes
1022 */ 1009 */
1023 for (i = 0; i < IPSET_NET_COUNT; i++) 1010 for (i = 0; i < IPSET_NET_COUNT; i++)
1024 if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) 1011 if (DCIDR_GET(d->cidr, i) != HOST_MASK)
1025 break; 1012 break;
1026 if (i == IPSET_NET_COUNT) { 1013 if (i == IPSET_NET_COUNT) {
1027 ret = mtype_test_cidrs(set, d, ext, mext, flags); 1014 ret = mtype_test_cidrs(set, d, ext, mext, flags);
@@ -1062,7 +1049,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
1062 1049
1063 rcu_read_lock_bh(); 1050 rcu_read_lock_bh();
1064 t = rcu_dereference_bh_nfnl(h->table); 1051 t = rcu_dereference_bh_nfnl(h->table);
1065 memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); 1052 memsize = mtype_ahash_memsize(h, t) + set->ext_size;
1066 htable_bits = t->htable_bits; 1053 htable_bits = t->htable_bits;
1067 rcu_read_unlock_bh(); 1054 rcu_read_unlock_bh();
1068 1055
@@ -1083,7 +1070,8 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
1083 goto nla_put_failure; 1070 goto nla_put_failure;
1084#endif 1071#endif
1085 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 1072 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
1086 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) 1073 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
1074 nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
1087 goto nla_put_failure; 1075 goto nla_put_failure;
1088 if (unlikely(ip_set_put_flags(skb, set))) 1076 if (unlikely(ip_set_put_flags(skb, set)))
1089 goto nla_put_failure; 1077 goto nla_put_failure;
@@ -1238,41 +1226,35 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1238 struct htype *h; 1226 struct htype *h;
1239 struct htable *t; 1227 struct htable *t;
1240 1228
1229 pr_debug("Create set %s with family %s\n",
1230 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
1231
1241#ifndef IP_SET_PROTO_UNDEF 1232#ifndef IP_SET_PROTO_UNDEF
1242 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) 1233 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
1243 return -IPSET_ERR_INVALID_FAMILY; 1234 return -IPSET_ERR_INVALID_FAMILY;
1244#endif 1235#endif
1245 1236
1246#ifdef IP_SET_HASH_WITH_MARKMASK
1247 markmask = 0xffffffff;
1248#endif
1249#ifdef IP_SET_HASH_WITH_NETMASK
1250 netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
1251 pr_debug("Create set %s with family %s\n",
1252 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
1253#endif
1254
1255 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || 1237 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
1256 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || 1238 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
1257 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 1239 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
1258 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 1240 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
1259 return -IPSET_ERR_PROTOCOL; 1241 return -IPSET_ERR_PROTOCOL;
1242
1260#ifdef IP_SET_HASH_WITH_MARKMASK 1243#ifdef IP_SET_HASH_WITH_MARKMASK
1261 /* Separated condition in order to avoid directive in argument list */ 1244 /* Separated condition in order to avoid directive in argument list */
1262 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) 1245 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
1263 return -IPSET_ERR_PROTOCOL; 1246 return -IPSET_ERR_PROTOCOL;
1264#endif
1265 1247
1266 if (tb[IPSET_ATTR_HASHSIZE]) { 1248 markmask = 0xffffffff;
1267 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); 1249 if (tb[IPSET_ATTR_MARKMASK]) {
1268 if (hashsize < IPSET_MIMINAL_HASHSIZE) 1250 markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
1269 hashsize = IPSET_MIMINAL_HASHSIZE; 1251 if (markmask == 0)
1252 return -IPSET_ERR_INVALID_MARKMASK;
1270 } 1253 }
1271 1254#endif
1272 if (tb[IPSET_ATTR_MAXELEM])
1273 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
1274 1255
1275#ifdef IP_SET_HASH_WITH_NETMASK 1256#ifdef IP_SET_HASH_WITH_NETMASK
1257 netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
1276 if (tb[IPSET_ATTR_NETMASK]) { 1258 if (tb[IPSET_ATTR_NETMASK]) {
1277 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); 1259 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
1278 1260
@@ -1282,33 +1264,21 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1282 return -IPSET_ERR_INVALID_NETMASK; 1264 return -IPSET_ERR_INVALID_NETMASK;
1283 } 1265 }
1284#endif 1266#endif
1285#ifdef IP_SET_HASH_WITH_MARKMASK
1286 if (tb[IPSET_ATTR_MARKMASK]) {
1287 markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
1288 1267
1289 if (markmask == 0) 1268 if (tb[IPSET_ATTR_HASHSIZE]) {
1290 return -IPSET_ERR_INVALID_MARKMASK; 1269 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
1270 if (hashsize < IPSET_MIMINAL_HASHSIZE)
1271 hashsize = IPSET_MIMINAL_HASHSIZE;
1291 } 1272 }
1292#endif 1273
1274 if (tb[IPSET_ATTR_MAXELEM])
1275 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
1293 1276
1294 hsize = sizeof(*h); 1277 hsize = sizeof(*h);
1295#ifdef IP_SET_HASH_WITH_NETS
1296 hsize += sizeof(struct net_prefixes) * NLEN(set->family);
1297#endif
1298 h = kzalloc(hsize, GFP_KERNEL); 1278 h = kzalloc(hsize, GFP_KERNEL);
1299 if (!h) 1279 if (!h)
1300 return -ENOMEM; 1280 return -ENOMEM;
1301 1281
1302 h->maxelem = maxelem;
1303#ifdef IP_SET_HASH_WITH_NETMASK
1304 h->netmask = netmask;
1305#endif
1306#ifdef IP_SET_HASH_WITH_MARKMASK
1307 h->markmask = markmask;
1308#endif
1309 get_random_bytes(&h->initval, sizeof(h->initval));
1310 set->timeout = IPSET_NO_TIMEOUT;
1311
1312 hbits = htable_bits(hashsize); 1282 hbits = htable_bits(hashsize);
1313 hsize = htable_size(hbits); 1283 hsize = htable_size(hbits);
1314 if (hsize == 0) { 1284 if (hsize == 0) {
@@ -1320,8 +1290,17 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1320 kfree(h); 1290 kfree(h);
1321 return -ENOMEM; 1291 return -ENOMEM;
1322 } 1292 }
1293 h->maxelem = maxelem;
1294#ifdef IP_SET_HASH_WITH_NETMASK
1295 h->netmask = netmask;
1296#endif
1297#ifdef IP_SET_HASH_WITH_MARKMASK
1298 h->markmask = markmask;
1299#endif
1300 get_random_bytes(&h->initval, sizeof(h->initval));
1301
1323 t->htable_bits = hbits; 1302 t->htable_bits = hbits;
1324 rcu_assign_pointer(h->table, t); 1303 RCU_INIT_POINTER(h->table, t);
1325 1304
1326 set->data = h; 1305 set->data = h;
1327#ifndef IP_SET_PROTO_UNDEF 1306#ifndef IP_SET_PROTO_UNDEF
@@ -1339,6 +1318,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1339 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem))); 1318 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1340 } 1319 }
1341#endif 1320#endif
1321 set->timeout = IPSET_NO_TIMEOUT;
1342 if (tb[IPSET_ATTR_TIMEOUT]) { 1322 if (tb[IPSET_ATTR_TIMEOUT]) {
1343 set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 1323 set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
1344#ifndef IP_SET_PROTO_UNDEF 1324#ifndef IP_SET_PROTO_UNDEF
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 9d6bf19f7b78..20bfbd315f61 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -82,7 +82,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
82 const struct xt_action_param *par, 82 const struct xt_action_param *par,
83 enum ipset_adt adt, struct ip_set_adt_opt *opt) 83 enum ipset_adt adt, struct ip_set_adt_opt *opt)
84{ 84{
85 const struct hash_ip *h = set->data; 85 const struct hash_ip4 *h = set->data;
86 ipset_adtfn adtfn = set->variant->adt[adt]; 86 ipset_adtfn adtfn = set->variant->adt[adt];
87 struct hash_ip4_elem e = { 0 }; 87 struct hash_ip4_elem e = { 0 };
88 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 88 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -101,7 +101,7 @@ static int
101hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], 101hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
102 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 102 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
103{ 103{
104 const struct hash_ip *h = set->data; 104 const struct hash_ip4 *h = set->data;
105 ipset_adtfn adtfn = set->variant->adt[adt]; 105 ipset_adtfn adtfn = set->variant->adt[adt];
106 struct hash_ip4_elem e = { 0 }; 106 struct hash_ip4_elem e = { 0 };
107 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 107 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -199,7 +199,7 @@ nla_put_failure:
199} 199}
200 200
201static inline void 201static inline void
202hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e) 202hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
203{ 203{
204} 204}
205 205
@@ -217,7 +217,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
217 const struct xt_action_param *par, 217 const struct xt_action_param *par,
218 enum ipset_adt adt, struct ip_set_adt_opt *opt) 218 enum ipset_adt adt, struct ip_set_adt_opt *opt)
219{ 219{
220 const struct hash_ip *h = set->data; 220 const struct hash_ip6 *h = set->data;
221 ipset_adtfn adtfn = set->variant->adt[adt]; 221 ipset_adtfn adtfn = set->variant->adt[adt];
222 struct hash_ip6_elem e = { { .all = { 0 } } }; 222 struct hash_ip6_elem e = { { .all = { 0 } } };
223 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 223 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -234,7 +234,7 @@ static int
234hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], 234hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
235 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 235 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
236{ 236{
237 const struct hash_ip *h = set->data; 237 const struct hash_ip6 *h = set->data;
238 ipset_adtfn adtfn = set->variant->adt[adt]; 238 ipset_adtfn adtfn = set->variant->adt[adt];
239 struct hash_ip6_elem e = { { .all = { 0 } } }; 239 struct hash_ip6_elem e = { { .all = { 0 } } };
240 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 240 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
new file mode 100644
index 000000000000..1ab5ed2f6839
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -0,0 +1,315 @@
1/* Copyright (C) 2016 Tomasz Chilinski <tomasz.chilinski@chilan.com>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8/* Kernel module implementing an IP set type: the hash:ip,mac type */
9
10#include <linux/jhash.h>
11#include <linux/module.h>
12#include <linux/ip.h>
13#include <linux/etherdevice.h>
14#include <linux/skbuff.h>
15#include <linux/errno.h>
16#include <linux/random.h>
17#include <linux/if_ether.h>
18#include <net/ip.h>
19#include <net/ipv6.h>
20#include <net/netlink.h>
21#include <net/tcp.h>
22
23#include <linux/netfilter.h>
24#include <linux/netfilter/ipset/pfxlen.h>
25#include <linux/netfilter/ipset/ip_set.h>
26#include <linux/netfilter/ipset/ip_set_hash.h>
27
28#define IPSET_TYPE_REV_MIN 0
29#define IPSET_TYPE_REV_MAX 0
30
31MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>");
33IP_SET_MODULE_DESC("hash:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
34MODULE_ALIAS("ip_set_hash:ip,mac");
35
36/* Type specific function prefix */
37#define HTYPE hash_ipmac
38
39/* Zero valued element is not supported */
40static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
41
42/* IPv4 variant */
43
44/* Member elements */
45struct hash_ipmac4_elem {
46 /* Zero valued IP addresses cannot be stored */
47 __be32 ip;
48 union {
49 unsigned char ether[ETH_ALEN];
50 __be32 foo[2];
51 };
52};
53
54/* Common functions */
55
56static inline bool
57hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
58 const struct hash_ipmac4_elem *e2,
59 u32 *multi)
60{
61 return e1->ip == e2->ip && ether_addr_equal(e1->ether, e2->ether);
62}
63
64static bool
65hash_ipmac4_data_list(struct sk_buff *skb, const struct hash_ipmac4_elem *e)
66{
67 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip) ||
68 nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
69 goto nla_put_failure;
70 return false;
71
72nla_put_failure:
73 return true;
74}
75
76static inline void
77hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
78 const struct hash_ipmac4_elem *e)
79{
80 next->ip = e->ip;
81}
82
83#define MTYPE hash_ipmac4
84#define PF 4
85#define HOST_MASK 32
86#define HKEY_DATALEN sizeof(struct hash_ipmac4_elem)
87#include "ip_set_hash_gen.h"
88
89static int
90hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
91 const struct xt_action_param *par,
92 enum ipset_adt adt, struct ip_set_adt_opt *opt)
93{
94 ipset_adtfn adtfn = set->variant->adt[adt];
95 struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
96 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
97
98 /* MAC can be src only */
99 if (!(opt->flags & IPSET_DIM_TWO_SRC))
100 return 0;
101
102 if (skb_mac_header(skb) < skb->head ||
103 (skb_mac_header(skb) + ETH_HLEN) > skb->data)
104 return -EINVAL;
105
106 memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
107 if (ether_addr_equal(e.ether, invalid_ether))
108 return -EINVAL;
109
110 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
111
112 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
113}
114
115static int
116hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[],
117 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
118{
119 ipset_adtfn adtfn = set->variant->adt[adt];
120 struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
121 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
122 int ret;
123
124 if (unlikely(!tb[IPSET_ATTR_IP] ||
125 !tb[IPSET_ATTR_ETHER] ||
126 nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN ||
127 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
128 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
129 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
130 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
131 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
132 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
133 return -IPSET_ERR_PROTOCOL;
134
135 if (tb[IPSET_ATTR_LINENO])
136 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
137
138 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
139 ip_set_get_extensions(set, tb, &ext);
140 if (ret)
141 return ret;
142 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
143 if (ether_addr_equal(e.ether, invalid_ether))
144 return -IPSET_ERR_HASH_ELEM;
145
146 return adtfn(set, &e, &ext, &ext, flags);
147}
148
149/* IPv6 variant */
150
151/* Member elements */
152struct hash_ipmac6_elem {
153 /* Zero valued IP addresses cannot be stored */
154 union nf_inet_addr ip;
155 union {
156 unsigned char ether[ETH_ALEN];
157 __be32 foo[2];
158 };
159};
160
161/* Common functions */
162
163static inline bool
164hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
165 const struct hash_ipmac6_elem *e2,
166 u32 *multi)
167{
168 return ipv6_addr_equal(&e1->ip.in6, &e2->ip.in6) &&
169 ether_addr_equal(e1->ether, e2->ether);
170}
171
172static bool
173hash_ipmac6_data_list(struct sk_buff *skb, const struct hash_ipmac6_elem *e)
174{
175 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
176 nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
177 goto nla_put_failure;
178 return false;
179
180nla_put_failure:
181 return true;
182}
183
184static inline void
185hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
186 const struct hash_ipmac6_elem *e)
187{
188}
189
190#undef MTYPE
191#undef PF
192#undef HOST_MASK
193#undef HKEY_DATALEN
194
195#define MTYPE hash_ipmac6
196#define PF 6
197#define HOST_MASK 128
198#define HKEY_DATALEN sizeof(struct hash_ipmac6_elem)
199#define IP_SET_EMIT_CREATE
200#include "ip_set_hash_gen.h"
201
202static int
203hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
204 const struct xt_action_param *par,
205 enum ipset_adt adt, struct ip_set_adt_opt *opt)
206{
207 ipset_adtfn adtfn = set->variant->adt[adt];
208 struct hash_ipmac6_elem e = {
209 { .all = { 0 } },
210 { .foo[0] = 0, .foo[1] = 0 }
211 };
212 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
213
214 /* MAC can be src only */
215 if (!(opt->flags & IPSET_DIM_TWO_SRC))
216 return 0;
217
218 if (skb_mac_header(skb) < skb->head ||
219 (skb_mac_header(skb) + ETH_HLEN) > skb->data)
220 return -EINVAL;
221
222 memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
223 if (ether_addr_equal(e.ether, invalid_ether))
224 return -EINVAL;
225
226 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
227
228 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
229}
230
231static int
232hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[],
233 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
234{
235 ipset_adtfn adtfn = set->variant->adt[adt];
236 struct hash_ipmac6_elem e = {
237 { .all = { 0 } },
238 { .foo[0] = 0, .foo[1] = 0 }
239 };
240 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
241 int ret;
242
243 if (unlikely(!tb[IPSET_ATTR_IP] ||
244 !tb[IPSET_ATTR_ETHER] ||
245 nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN ||
246 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
247 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
248 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
249 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
250 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
251 !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
252 return -IPSET_ERR_PROTOCOL;
253
254 if (tb[IPSET_ATTR_LINENO])
255 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
256
257 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
258 ip_set_get_extensions(set, tb, &ext);
259 if (ret)
260 return ret;
261
262 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
263 if (ether_addr_equal(e.ether, invalid_ether))
264 return -IPSET_ERR_HASH_ELEM;
265
266 return adtfn(set, &e, &ext, &ext, flags);
267}
268
269static struct ip_set_type hash_ipmac_type __read_mostly = {
270 .name = "hash:ip,mac",
271 .protocol = IPSET_PROTOCOL,
272 .features = IPSET_TYPE_IP | IPSET_TYPE_MAC,
273 .dimension = IPSET_DIM_TWO,
274 .family = NFPROTO_UNSPEC,
275 .revision_min = IPSET_TYPE_REV_MIN,
276 .revision_max = IPSET_TYPE_REV_MAX,
277 .create = hash_ipmac_create,
278 .create_policy = {
279 [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
280 [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
281 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
282 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
283 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
284 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
285 },
286 .adt_policy = {
287 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
288 [IPSET_ATTR_ETHER] = { .type = NLA_BINARY,
289 .len = ETH_ALEN },
290 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
291 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
292 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
293 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
294 [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
295 [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
296 [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
297 [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
298 },
299 .me = THIS_MODULE,
300};
301
302static int __init
303hash_ipmac_init(void)
304{
305 return ip_set_type_register(&hash_ipmac_type);
306}
307
308static void __exit
309hash_ipmac_fini(void)
310{
311 ip_set_type_unregister(&hash_ipmac_type);
312}
313
314module_init(hash_ipmac_init);
315module_exit(hash_ipmac_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index a0695a2ab585..b64cf14e8352 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -85,7 +85,7 @@ hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
85 const struct xt_action_param *par, 85 const struct xt_action_param *par,
86 enum ipset_adt adt, struct ip_set_adt_opt *opt) 86 enum ipset_adt adt, struct ip_set_adt_opt *opt)
87{ 87{
88 const struct hash_ipmark *h = set->data; 88 const struct hash_ipmark4 *h = set->data;
89 ipset_adtfn adtfn = set->variant->adt[adt]; 89 ipset_adtfn adtfn = set->variant->adt[adt];
90 struct hash_ipmark4_elem e = { }; 90 struct hash_ipmark4_elem e = { };
91 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 91 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -101,7 +101,7 @@ static int
101hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], 101hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
102 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 102 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
103{ 103{
104 const struct hash_ipmark *h = set->data; 104 const struct hash_ipmark4 *h = set->data;
105 ipset_adtfn adtfn = set->variant->adt[adt]; 105 ipset_adtfn adtfn = set->variant->adt[adt];
106 struct hash_ipmark4_elem e = { }; 106 struct hash_ipmark4_elem e = { };
107 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 107 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -193,7 +193,7 @@ nla_put_failure:
193} 193}
194 194
195static inline void 195static inline void
196hash_ipmark6_data_next(struct hash_ipmark4_elem *next, 196hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
197 const struct hash_ipmark6_elem *d) 197 const struct hash_ipmark6_elem *d)
198{ 198{
199} 199}
@@ -211,7 +211,7 @@ hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
211 const struct xt_action_param *par, 211 const struct xt_action_param *par,
212 enum ipset_adt adt, struct ip_set_adt_opt *opt) 212 enum ipset_adt adt, struct ip_set_adt_opt *opt)
213{ 213{
214 const struct hash_ipmark *h = set->data; 214 const struct hash_ipmark6 *h = set->data;
215 ipset_adtfn adtfn = set->variant->adt[adt]; 215 ipset_adtfn adtfn = set->variant->adt[adt];
216 struct hash_ipmark6_elem e = { }; 216 struct hash_ipmark6_elem e = { };
217 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 217 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -227,7 +227,7 @@ static int
227hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], 227hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
228 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 228 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
229{ 229{
230 const struct hash_ipmark *h = set->data; 230 const struct hash_ipmark6 *h = set->data;
231 ipset_adtfn adtfn = set->variant->adt[adt]; 231 ipset_adtfn adtfn = set->variant->adt[adt];
232 struct hash_ipmark6_elem e = { }; 232 struct hash_ipmark6_elem e = { };
233 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 233 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 9d84b3dff603..f438740e6c6a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -108,7 +108,7 @@ static int
108hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], 108hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
109 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 109 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
110{ 110{
111 const struct hash_ipport *h = set->data; 111 const struct hash_ipport4 *h = set->data;
112 ipset_adtfn adtfn = set->variant->adt[adt]; 112 ipset_adtfn adtfn = set->variant->adt[adt];
113 struct hash_ipport4_elem e = { .ip = 0 }; 113 struct hash_ipport4_elem e = { .ip = 0 };
114 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 114 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -231,7 +231,7 @@ nla_put_failure:
231} 231}
232 232
233static inline void 233static inline void
234hash_ipport6_data_next(struct hash_ipport4_elem *next, 234hash_ipport6_data_next(struct hash_ipport6_elem *next,
235 const struct hash_ipport6_elem *d) 235 const struct hash_ipport6_elem *d)
236{ 236{
237 next->port = d->port; 237 next->port = d->port;
@@ -266,7 +266,7 @@ static int
266hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], 266hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
267 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 267 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
268{ 268{
269 const struct hash_ipport *h = set->data; 269 const struct hash_ipport6 *h = set->data;
270 ipset_adtfn adtfn = set->variant->adt[adt]; 270 ipset_adtfn adtfn = set->variant->adt[adt];
271 struct hash_ipport6_elem e = { .ip = { .all = { 0 } } }; 271 struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
272 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 272 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 215b7b942038..6215fb898c50 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -111,7 +111,7 @@ static int
111hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], 111hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
112 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 112 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
113{ 113{
114 const struct hash_ipportip *h = set->data; 114 const struct hash_ipportip4 *h = set->data;
115 ipset_adtfn adtfn = set->variant->adt[adt]; 115 ipset_adtfn adtfn = set->variant->adt[adt];
116 struct hash_ipportip4_elem e = { .ip = 0 }; 116 struct hash_ipportip4_elem e = { .ip = 0 };
117 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 117 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -241,7 +241,7 @@ nla_put_failure:
241} 241}
242 242
243static inline void 243static inline void
244hash_ipportip6_data_next(struct hash_ipportip4_elem *next, 244hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
245 const struct hash_ipportip6_elem *d) 245 const struct hash_ipportip6_elem *d)
246{ 246{
247 next->port = d->port; 247 next->port = d->port;
@@ -277,7 +277,7 @@ static int
277hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], 277hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
278 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 278 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
279{ 279{
280 const struct hash_ipportip *h = set->data; 280 const struct hash_ipportip6 *h = set->data;
281 ipset_adtfn adtfn = set->variant->adt[adt]; 281 ipset_adtfn adtfn = set->variant->adt[adt];
282 struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; 282 struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } };
283 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 283 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 9ca719625ea3..5ab1b99a53c2 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -138,7 +138,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
138 const struct xt_action_param *par, 138 const struct xt_action_param *par,
139 enum ipset_adt adt, struct ip_set_adt_opt *opt) 139 enum ipset_adt adt, struct ip_set_adt_opt *opt)
140{ 140{
141 const struct hash_ipportnet *h = set->data; 141 const struct hash_ipportnet4 *h = set->data;
142 ipset_adtfn adtfn = set->variant->adt[adt]; 142 ipset_adtfn adtfn = set->variant->adt[adt];
143 struct hash_ipportnet4_elem e = { 143 struct hash_ipportnet4_elem e = {
144 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 144 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -163,7 +163,7 @@ static int
163hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], 163hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
164 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 164 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
165{ 165{
166 const struct hash_ipportnet *h = set->data; 166 const struct hash_ipportnet4 *h = set->data;
167 ipset_adtfn adtfn = set->variant->adt[adt]; 167 ipset_adtfn adtfn = set->variant->adt[adt];
168 struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; 168 struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
169 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 169 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -370,7 +370,7 @@ nla_put_failure:
370} 370}
371 371
372static inline void 372static inline void
373hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next, 373hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
374 const struct hash_ipportnet6_elem *d) 374 const struct hash_ipportnet6_elem *d)
375{ 375{
376 next->port = d->port; 376 next->port = d->port;
@@ -389,7 +389,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
389 const struct xt_action_param *par, 389 const struct xt_action_param *par,
390 enum ipset_adt adt, struct ip_set_adt_opt *opt) 390 enum ipset_adt adt, struct ip_set_adt_opt *opt)
391{ 391{
392 const struct hash_ipportnet *h = set->data; 392 const struct hash_ipportnet6 *h = set->data;
393 ipset_adtfn adtfn = set->variant->adt[adt]; 393 ipset_adtfn adtfn = set->variant->adt[adt];
394 struct hash_ipportnet6_elem e = { 394 struct hash_ipportnet6_elem e = {
395 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 395 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -414,7 +414,7 @@ static int
414hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], 414hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
415 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 415 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
416{ 416{
417 const struct hash_ipportnet *h = set->data; 417 const struct hash_ipportnet6 *h = set->data;
418 ipset_adtfn adtfn = set->variant->adt[adt]; 418 ipset_adtfn adtfn = set->variant->adt[adt];
419 struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; 419 struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
420 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 420 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 3e4bffdc1cc0..5d9e895452e7 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -117,7 +117,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
117 const struct xt_action_param *par, 117 const struct xt_action_param *par,
118 enum ipset_adt adt, struct ip_set_adt_opt *opt) 118 enum ipset_adt adt, struct ip_set_adt_opt *opt)
119{ 119{
120 const struct hash_net *h = set->data; 120 const struct hash_net4 *h = set->data;
121 ipset_adtfn adtfn = set->variant->adt[adt]; 121 ipset_adtfn adtfn = set->variant->adt[adt];
122 struct hash_net4_elem e = { 122 struct hash_net4_elem e = {
123 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 123 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -139,7 +139,7 @@ static int
139hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], 139hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
140 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 140 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
141{ 141{
142 const struct hash_net *h = set->data; 142 const struct hash_net4 *h = set->data;
143 ipset_adtfn adtfn = set->variant->adt[adt]; 143 ipset_adtfn adtfn = set->variant->adt[adt];
144 struct hash_net4_elem e = { .cidr = HOST_MASK }; 144 struct hash_net4_elem e = { .cidr = HOST_MASK };
145 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 145 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -268,7 +268,7 @@ nla_put_failure:
268} 268}
269 269
270static inline void 270static inline void
271hash_net6_data_next(struct hash_net4_elem *next, 271hash_net6_data_next(struct hash_net6_elem *next,
272 const struct hash_net6_elem *d) 272 const struct hash_net6_elem *d)
273{ 273{
274} 274}
@@ -286,7 +286,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
286 const struct xt_action_param *par, 286 const struct xt_action_param *par,
287 enum ipset_adt adt, struct ip_set_adt_opt *opt) 287 enum ipset_adt adt, struct ip_set_adt_opt *opt)
288{ 288{
289 const struct hash_net *h = set->data; 289 const struct hash_net6 *h = set->data;
290 ipset_adtfn adtfn = set->variant->adt[adt]; 290 ipset_adtfn adtfn = set->variant->adt[adt];
291 struct hash_net6_elem e = { 291 struct hash_net6_elem e = {
292 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 292 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index f0f688db6213..44cf11939c91 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -156,7 +156,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
156 const struct xt_action_param *par, 156 const struct xt_action_param *par,
157 enum ipset_adt adt, struct ip_set_adt_opt *opt) 157 enum ipset_adt adt, struct ip_set_adt_opt *opt)
158{ 158{
159 struct hash_netiface *h = set->data; 159 struct hash_netiface4 *h = set->data;
160 ipset_adtfn adtfn = set->variant->adt[adt]; 160 ipset_adtfn adtfn = set->variant->adt[adt];
161 struct hash_netiface4_elem e = { 161 struct hash_netiface4_elem e = {
162 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 162 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -170,7 +170,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
170 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); 170 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
171 e.ip &= ip_set_netmask(e.cidr); 171 e.ip &= ip_set_netmask(e.cidr);
172 172
173#define IFACE(dir) (par->dir ? par->dir->name : "") 173#define IFACE(dir) (par->state->dir ? par->state->dir->name : "")
174#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) 174#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
175 175
176 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { 176 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
@@ -196,7 +196,7 @@ static int
196hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], 196hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
197 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 197 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
198{ 198{
199 struct hash_netiface *h = set->data; 199 struct hash_netiface4 *h = set->data;
200 ipset_adtfn adtfn = set->variant->adt[adt]; 200 ipset_adtfn adtfn = set->variant->adt[adt];
201 struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; 201 struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
202 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 202 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -348,7 +348,7 @@ nla_put_failure:
348} 348}
349 349
350static inline void 350static inline void
351hash_netiface6_data_next(struct hash_netiface4_elem *next, 351hash_netiface6_data_next(struct hash_netiface6_elem *next,
352 const struct hash_netiface6_elem *d) 352 const struct hash_netiface6_elem *d)
353{ 353{
354} 354}
@@ -367,7 +367,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
367 const struct xt_action_param *par, 367 const struct xt_action_param *par,
368 enum ipset_adt adt, struct ip_set_adt_opt *opt) 368 enum ipset_adt adt, struct ip_set_adt_opt *opt)
369{ 369{
370 struct hash_netiface *h = set->data; 370 struct hash_netiface6 *h = set->data;
371 ipset_adtfn adtfn = set->variant->adt[adt]; 371 ipset_adtfn adtfn = set->variant->adt[adt];
372 struct hash_netiface6_elem e = { 372 struct hash_netiface6_elem e = {
373 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 373 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index a93dfebffa81..db614e13b193 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -143,7 +143,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
143 const struct xt_action_param *par, 143 const struct xt_action_param *par,
144 enum ipset_adt adt, struct ip_set_adt_opt *opt) 144 enum ipset_adt adt, struct ip_set_adt_opt *opt)
145{ 145{
146 const struct hash_netnet *h = set->data; 146 const struct hash_netnet4 *h = set->data;
147 ipset_adtfn adtfn = set->variant->adt[adt]; 147 ipset_adtfn adtfn = set->variant->adt[adt];
148 struct hash_netnet4_elem e = { }; 148 struct hash_netnet4_elem e = { };
149 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 149 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -165,7 +165,7 @@ static int
165hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], 165hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
166 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 166 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
167{ 167{
168 const struct hash_netnet *h = set->data; 168 const struct hash_netnet4 *h = set->data;
169 ipset_adtfn adtfn = set->variant->adt[adt]; 169 ipset_adtfn adtfn = set->variant->adt[adt];
170 struct hash_netnet4_elem e = { }; 170 struct hash_netnet4_elem e = { };
171 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 171 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -352,7 +352,7 @@ nla_put_failure:
352} 352}
353 353
354static inline void 354static inline void
355hash_netnet6_data_next(struct hash_netnet4_elem *next, 355hash_netnet6_data_next(struct hash_netnet6_elem *next,
356 const struct hash_netnet6_elem *d) 356 const struct hash_netnet6_elem *d)
357{ 357{
358} 358}
@@ -377,7 +377,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
377 const struct xt_action_param *par, 377 const struct xt_action_param *par,
378 enum ipset_adt adt, struct ip_set_adt_opt *opt) 378 enum ipset_adt adt, struct ip_set_adt_opt *opt)
379{ 379{
380 const struct hash_netnet *h = set->data; 380 const struct hash_netnet6 *h = set->data;
381 ipset_adtfn adtfn = set->variant->adt[adt]; 381 ipset_adtfn adtfn = set->variant->adt[adt];
382 struct hash_netnet6_elem e = { }; 382 struct hash_netnet6_elem e = { };
383 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 383 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 731813e0f08c..54b64b6cd0cd 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -133,7 +133,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
133 const struct xt_action_param *par, 133 const struct xt_action_param *par,
134 enum ipset_adt adt, struct ip_set_adt_opt *opt) 134 enum ipset_adt adt, struct ip_set_adt_opt *opt)
135{ 135{
136 const struct hash_netport *h = set->data; 136 const struct hash_netport4 *h = set->data;
137 ipset_adtfn adtfn = set->variant->adt[adt]; 137 ipset_adtfn adtfn = set->variant->adt[adt];
138 struct hash_netport4_elem e = { 138 struct hash_netport4_elem e = {
139 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 139 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -157,7 +157,7 @@ static int
157hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], 157hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
158 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 158 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
159{ 159{
160 const struct hash_netport *h = set->data; 160 const struct hash_netport4 *h = set->data;
161 ipset_adtfn adtfn = set->variant->adt[adt]; 161 ipset_adtfn adtfn = set->variant->adt[adt];
162 struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; 162 struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
163 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 163 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -329,7 +329,7 @@ nla_put_failure:
329} 329}
330 330
331static inline void 331static inline void
332hash_netport6_data_next(struct hash_netport4_elem *next, 332hash_netport6_data_next(struct hash_netport6_elem *next,
333 const struct hash_netport6_elem *d) 333 const struct hash_netport6_elem *d)
334{ 334{
335 next->port = d->port; 335 next->port = d->port;
@@ -348,7 +348,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
348 const struct xt_action_param *par, 348 const struct xt_action_param *par,
349 enum ipset_adt adt, struct ip_set_adt_opt *opt) 349 enum ipset_adt adt, struct ip_set_adt_opt *opt)
350{ 350{
351 const struct hash_netport *h = set->data; 351 const struct hash_netport6 *h = set->data;
352 ipset_adtfn adtfn = set->variant->adt[adt]; 352 ipset_adtfn adtfn = set->variant->adt[adt];
353 struct hash_netport6_elem e = { 353 struct hash_netport6_elem e = {
354 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), 354 .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -372,7 +372,7 @@ static int
372hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], 372hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
373 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 373 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
374{ 374{
375 const struct hash_netport *h = set->data; 375 const struct hash_netport6 *h = set->data;
376 ipset_adtfn adtfn = set->variant->adt[adt]; 376 ipset_adtfn adtfn = set->variant->adt[adt];
377 struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 }; 377 struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 };
378 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 378 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 9a14c237830f..aff846960ac4 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -154,7 +154,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
154 const struct xt_action_param *par, 154 const struct xt_action_param *par,
155 enum ipset_adt adt, struct ip_set_adt_opt *opt) 155 enum ipset_adt adt, struct ip_set_adt_opt *opt)
156{ 156{
157 const struct hash_netportnet *h = set->data; 157 const struct hash_netportnet4 *h = set->data;
158 ipset_adtfn adtfn = set->variant->adt[adt]; 158 ipset_adtfn adtfn = set->variant->adt[adt];
159 struct hash_netportnet4_elem e = { }; 159 struct hash_netportnet4_elem e = { };
160 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 160 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -180,7 +180,7 @@ static int
180hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], 180hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
181 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 181 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
182{ 182{
183 const struct hash_netportnet *h = set->data; 183 const struct hash_netportnet4 *h = set->data;
184 ipset_adtfn adtfn = set->variant->adt[adt]; 184 ipset_adtfn adtfn = set->variant->adt[adt];
185 struct hash_netportnet4_elem e = { }; 185 struct hash_netportnet4_elem e = { };
186 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 186 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -406,7 +406,7 @@ nla_put_failure:
406} 406}
407 407
408static inline void 408static inline void
409hash_netportnet6_data_next(struct hash_netportnet4_elem *next, 409hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
410 const struct hash_netportnet6_elem *d) 410 const struct hash_netportnet6_elem *d)
411{ 411{
412 next->port = d->port; 412 next->port = d->port;
@@ -432,7 +432,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
432 const struct xt_action_param *par, 432 const struct xt_action_param *par,
433 enum ipset_adt adt, struct ip_set_adt_opt *opt) 433 enum ipset_adt adt, struct ip_set_adt_opt *opt)
434{ 434{
435 const struct hash_netportnet *h = set->data; 435 const struct hash_netportnet6 *h = set->data;
436 ipset_adtfn adtfn = set->variant->adt[adt]; 436 ipset_adtfn adtfn = set->variant->adt[adt];
437 struct hash_netportnet6_elem e = { }; 437 struct hash_netportnet6_elem e = { };
438 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 438 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -458,7 +458,7 @@ static int
458hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], 458hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
459 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 459 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
460{ 460{
461 const struct hash_netportnet *h = set->data; 461 const struct hash_netportnet6 *h = set->data;
462 ipset_adtfn adtfn = set->variant->adt[adt]; 462 ipset_adtfn adtfn = set->variant->adt[adt];
463 struct hash_netportnet6_elem e = { }; 463 struct hash_netportnet6_elem e = { };
464 struct ip_set_ext ext = IP_SET_INIT_UEXT(set); 464 struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a2a89e4e0a14..51077c53d76b 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -166,6 +166,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
166static inline void 166static inline void
167list_set_del(struct ip_set *set, struct set_elem *e) 167list_set_del(struct ip_set *set, struct set_elem *e)
168{ 168{
169 set->elements--;
169 list_del_rcu(&e->list); 170 list_del_rcu(&e->list);
170 call_rcu(&e->rcu, __list_set_del_rcu); 171 call_rcu(&e->rcu, __list_set_del_rcu);
171} 172}
@@ -227,7 +228,7 @@ list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext,
227 if (SET_WITH_COUNTER(set)) 228 if (SET_WITH_COUNTER(set))
228 ip_set_init_counter(ext_counter(e, set), ext); 229 ip_set_init_counter(ext_counter(e, set), ext);
229 if (SET_WITH_COMMENT(set)) 230 if (SET_WITH_COMMENT(set))
230 ip_set_init_comment(ext_comment(e, set), ext); 231 ip_set_init_comment(set, ext_comment(e, set), ext);
231 if (SET_WITH_SKBINFO(set)) 232 if (SET_WITH_SKBINFO(set))
232 ip_set_init_skbinfo(ext_skbinfo(e, set), ext); 233 ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
233 /* Update timeout last */ 234 /* Update timeout last */
@@ -309,6 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
309 list_add_rcu(&e->list, &prev->list); 310 list_add_rcu(&e->list, &prev->list);
310 else 311 else
311 list_add_tail_rcu(&e->list, &map->members); 312 list_add_tail_rcu(&e->list, &map->members);
313 set->elements++;
312 314
313 return 0; 315 return 0;
314} 316}
@@ -419,6 +421,8 @@ list_set_flush(struct ip_set *set)
419 421
420 list_for_each_entry_safe(e, n, &map->members, list) 422 list_for_each_entry_safe(e, n, &map->members, list)
421 list_set_del(set, e); 423 list_set_del(set, e);
424 set->elements = 0;
425 set->ext_size = 0;
422} 426}
423 427
424static void 428static void
@@ -441,12 +445,12 @@ list_set_destroy(struct ip_set *set)
441 set->data = NULL; 445 set->data = NULL;
442} 446}
443 447
444static int 448/* Calculate the actual memory size of the set data */
445list_set_head(struct ip_set *set, struct sk_buff *skb) 449static size_t
450list_set_memsize(const struct list_set *map, size_t dsize)
446{ 451{
447 const struct list_set *map = set->data;
448 struct nlattr *nested;
449 struct set_elem *e; 452 struct set_elem *e;
453 size_t memsize;
450 u32 n = 0; 454 u32 n = 0;
451 455
452 rcu_read_lock(); 456 rcu_read_lock();
@@ -454,13 +458,25 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
454 n++; 458 n++;
455 rcu_read_unlock(); 459 rcu_read_unlock();
456 460
461 memsize = sizeof(*map) + n * dsize;
462
463 return memsize;
464}
465
466static int
467list_set_head(struct ip_set *set, struct sk_buff *skb)
468{
469 const struct list_set *map = set->data;
470 struct nlattr *nested;
471 size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size;
472
457 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 473 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
458 if (!nested) 474 if (!nested)
459 goto nla_put_failure; 475 goto nla_put_failure;
460 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || 476 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
461 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 477 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
462 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 478 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
463 htonl(sizeof(*map) + n * set->dsize))) 479 nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
464 goto nla_put_failure; 480 goto nla_put_failure;
465 if (unlikely(ip_set_put_flags(skb, set))) 481 if (unlikely(ip_set_put_flags(skb, set)))
466 goto nla_put_failure; 482 goto nla_put_failure;
@@ -570,11 +586,8 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
570{ 586{
571 struct list_set *map = set->data; 587 struct list_set *map = set->data;
572 588
573 init_timer(&map->gc); 589 setup_timer(&map->gc, gc, (unsigned long)set);
574 map->gc.data = (unsigned long)set; 590 mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
575 map->gc.function = gc;
576 map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
577 add_timer(&map->gc);
578} 591}
579 592
580/* Create list:set type of sets */ 593/* Create list:set type of sets */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 2c1b498a7a27..db40050f8785 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -70,7 +70,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
70#endif 70#endif
71EXPORT_SYMBOL(ip_vs_new_conn_out); 71EXPORT_SYMBOL(ip_vs_new_conn_out);
72 72
73static int ip_vs_net_id __read_mostly; 73static unsigned int ip_vs_net_id __read_mostly;
74/* netns cnt used for uniqueness */ 74/* netns cnt used for uniqueness */
75static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); 75static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
76 76
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a6e44ef2ec9a..55e0169caa4c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -48,7 +48,7 @@
48#include <net/sock.h> 48#include <net/sock.h>
49#include <net/genetlink.h> 49#include <net/genetlink.h>
50 50
51#include <asm/uaccess.h> 51#include <linux/uaccess.h>
52 52
53#include <net/ip_vs.h> 53#include <net/ip_vs.h>
54 54
@@ -2840,14 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
2840 */ 2840 */
2841 2841
2842/* IPVS genetlink family */ 2842/* IPVS genetlink family */
2843static struct genl_family ip_vs_genl_family = { 2843static struct genl_family ip_vs_genl_family;
2844 .id = GENL_ID_GENERATE,
2845 .hdrsize = 0,
2846 .name = IPVS_GENL_NAME,
2847 .version = IPVS_GENL_VERSION,
2848 .maxattr = IPVS_CMD_ATTR_MAX,
2849 .netnsok = true, /* Make ipvsadm to work on netns */
2850};
2851 2844
2852/* Policy used for first-level command attributes */ 2845/* Policy used for first-level command attributes */
2853static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2846static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
@@ -3267,7 +3260,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3267 3260
3268 3261
3269 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); 3262 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
3270 if (IS_ERR(svc) || svc == NULL) 3263 if (IS_ERR_OR_NULL(svc))
3271 goto out_err; 3264 goto out_err;
3272 3265
3273 /* Dump the destinations */ 3266 /* Dump the destinations */
@@ -3872,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = {
3872 }, 3865 },
3873}; 3866};
3874 3867
3868static struct genl_family ip_vs_genl_family __ro_after_init = {
3869 .hdrsize = 0,
3870 .name = IPVS_GENL_NAME,
3871 .version = IPVS_GENL_VERSION,
3872 .maxattr = IPVS_CMD_ATTR_MAX,
3873 .netnsok = true, /* Make ipvsadm to work on netns */
3874 .module = THIS_MODULE,
3875 .ops = ip_vs_genl_ops,
3876 .n_ops = ARRAY_SIZE(ip_vs_genl_ops),
3877};
3878
3875static int __init ip_vs_genl_register(void) 3879static int __init ip_vs_genl_register(void)
3876{ 3880{
3877 return genl_register_family_with_ops(&ip_vs_genl_family, 3881 return genl_register_family(&ip_vs_genl_family);
3878 ip_vs_genl_ops);
3879} 3882}
3880 3883
3881static void ip_vs_genl_unregister(void) 3884static void ip_vs_genl_unregister(void)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 01d3d894de46..4e1a98fcc8c3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -254,6 +254,54 @@ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
254 return true; 254 return true;
255} 255}
256 256
257static inline bool decrement_ttl(struct netns_ipvs *ipvs,
258 int skb_af,
259 struct sk_buff *skb)
260{
261 struct net *net = ipvs->net;
262
263#ifdef CONFIG_IP_VS_IPV6
264 if (skb_af == AF_INET6) {
265 struct dst_entry *dst = skb_dst(skb);
266
267 /* check and decrement ttl */
268 if (ipv6_hdr(skb)->hop_limit <= 1) {
269 /* Force OUTPUT device used as source address */
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_TIME_EXCEED,
272 ICMPV6_EXC_HOPLIMIT, 0);
273 __IP6_INC_STATS(net, ip6_dst_idev(dst),
274 IPSTATS_MIB_INHDRERRORS);
275
276 return false;
277 }
278
279 /* don't propagate ttl change to cloned packets */
280 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
281 return false;
282
283 ipv6_hdr(skb)->hop_limit--;
284 } else
285#endif
286 {
287 if (ip_hdr(skb)->ttl <= 1) {
288 /* Tell the sender its packet died... */
289 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
290 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
291 return false;
292 }
293
294 /* don't propagate ttl change to cloned packets */
295 if (!skb_make_writable(skb, sizeof(struct iphdr)))
296 return false;
297
298 /* Decrease ttl */
299 ip_decrease_ttl(ip_hdr(skb));
300 }
301
302 return true;
303}
304
257/* Get route to destination or remote server */ 305/* Get route to destination or remote server */
258static int 306static int
259__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, 307__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
@@ -326,6 +374,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
326 return local; 374 return local;
327 } 375 }
328 376
377 if (!decrement_ttl(ipvs, skb_af, skb))
378 goto err_put;
379
329 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { 380 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
330 mtu = dst_mtu(&rt->dst); 381 mtu = dst_mtu(&rt->dst);
331 } else { 382 } else {
@@ -473,6 +524,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
473 return local; 524 return local;
474 } 525 }
475 526
527 if (!decrement_ttl(ipvs, skb_af, skb))
528 goto err_put;
529
476 /* MTU checking */ 530 /* MTU checking */
477 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) 531 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
478 mtu = dst_mtu(&rt->dst); 532 mtu = dst_mtu(&rt->dst);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0f87e5d21be7..3a073cd9fcf4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -783,7 +783,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
783 /* set conntrack timestamp, if enabled. */ 783 /* set conntrack timestamp, if enabled. */
784 tstamp = nf_conn_tstamp_find(ct); 784 tstamp = nf_conn_tstamp_find(ct);
785 if (tstamp) { 785 if (tstamp) {
786 if (skb->tstamp.tv64 == 0) 786 if (skb->tstamp == 0)
787 __net_timestamp(skb); 787 __net_timestamp(skb);
788 788
789 tstamp->start = ktime_to_ns(skb->tstamp); 789 tstamp->start = ktime_to_ns(skb->tstamp);
@@ -1338,7 +1338,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
1338 if (skb->nfct) 1338 if (skb->nfct)
1339 goto out; 1339 goto out;
1340 } 1340 }
1341 1341repeat:
1342 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, 1342 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
1343 l3proto, l4proto, &set_reply, &ctinfo); 1343 l3proto, l4proto, &set_reply, &ctinfo);
1344 if (!ct) { 1344 if (!ct) {
@@ -1370,6 +1370,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
1370 NF_CT_STAT_INC_ATOMIC(net, invalid); 1370 NF_CT_STAT_INC_ATOMIC(net, invalid);
1371 if (ret == -NF_DROP) 1371 if (ret == -NF_DROP)
1372 NF_CT_STAT_INC_ATOMIC(net, drop); 1372 NF_CT_STAT_INC_ATOMIC(net, drop);
1373 /* Special case: TCP tracker reports an attempt to reopen a
1374 * closed/aborted connection. We have to go back and create a
1375 * fresh conntrack.
1376 */
1377 if (ret == -NF_REPEAT)
1378 goto repeat;
1373 ret = -ret; 1379 ret = -ret;
1374 goto out; 1380 goto out;
1375 } 1381 }
@@ -1377,15 +1383,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
1377 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 1383 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1378 nf_conntrack_event_cache(IPCT_REPLY, ct); 1384 nf_conntrack_event_cache(IPCT_REPLY, ct);
1379out: 1385out:
1380 if (tmpl) { 1386 if (tmpl)
1381 /* Special case: we have to repeat this hook, assign the 1387 nf_ct_put(tmpl);
1382 * template again to this packet. We assume that this packet
1383 * has no conntrack assigned. This is used by nf_ct_tcp. */
1384 if (ret == NF_REPEAT)
1385 skb->nfct = (struct nf_conntrack *)tmpl;
1386 else
1387 nf_ct_put(tmpl);
1388 }
1389 1388
1390 return ret; 1389 return ret;
1391} 1390}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 8d2c7d8c666a..2d6ee1803415 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -125,6 +125,54 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
125} 125}
126EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); 126EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
127 127
128int nf_ct_netns_get(struct net *net, u8 nfproto)
129{
130 const struct nf_conntrack_l3proto *l3proto;
131 int ret;
132
133 might_sleep();
134
135 ret = nf_ct_l3proto_try_module_get(nfproto);
136 if (ret < 0)
137 return ret;
138
139 /* we already have a reference, can't fail */
140 rcu_read_lock();
141 l3proto = __nf_ct_l3proto_find(nfproto);
142 rcu_read_unlock();
143
144 if (!l3proto->net_ns_get)
145 return 0;
146
147 ret = l3proto->net_ns_get(net);
148 if (ret < 0)
149 nf_ct_l3proto_module_put(nfproto);
150
151 return ret;
152}
153EXPORT_SYMBOL_GPL(nf_ct_netns_get);
154
155void nf_ct_netns_put(struct net *net, u8 nfproto)
156{
157 const struct nf_conntrack_l3proto *l3proto;
158
159 might_sleep();
160
161 /* same as nf_conntrack_netns_get(), reference assumed */
162 rcu_read_lock();
163 l3proto = __nf_ct_l3proto_find(nfproto);
164 rcu_read_unlock();
165
166 if (WARN_ON(!l3proto))
167 return;
168
169 if (l3proto->net_ns_put)
170 l3proto->net_ns_put(net);
171
172 nf_ct_l3proto_module_put(nfproto);
173}
174EXPORT_SYMBOL_GPL(nf_ct_netns_put);
175
128struct nf_conntrack_l4proto * 176struct nf_conntrack_l4proto *
129nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) 177nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
130{ 178{
@@ -190,20 +238,19 @@ out_unlock:
190} 238}
191EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); 239EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
192 240
241#ifdef CONFIG_SYSCTL
242extern unsigned int nf_conntrack_default_on;
243
193int nf_ct_l3proto_pernet_register(struct net *net, 244int nf_ct_l3proto_pernet_register(struct net *net,
194 struct nf_conntrack_l3proto *proto) 245 struct nf_conntrack_l3proto *proto)
195{ 246{
196 int ret; 247 if (nf_conntrack_default_on == 0)
197 248 return 0;
198 if (proto->init_net) {
199 ret = proto->init_net(net);
200 if (ret < 0)
201 return ret;
202 }
203 249
204 return 0; 250 return proto->net_ns_get ? proto->net_ns_get(net) : 0;
205} 251}
206EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); 252EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
253#endif
207 254
208void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) 255void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
209{ 256{
@@ -224,6 +271,16 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
224void nf_ct_l3proto_pernet_unregister(struct net *net, 271void nf_ct_l3proto_pernet_unregister(struct net *net,
225 struct nf_conntrack_l3proto *proto) 272 struct nf_conntrack_l3proto *proto)
226{ 273{
274 /*
275 * nf_conntrack_default_on *might* have registered hooks.
276 * ->net_ns_put must cope with more puts() than get(), i.e.
277 * if nf_conntrack_default_on was 0 at time of
278 * nf_ct_l3proto_pernet_register invocation this net_ns_put()
279 * should be a noop.
280 */
281 if (proto->net_ns_put)
282 proto->net_ns_put(net);
283
227 /* Remove all contrack entries for this protocol */ 284 /* Remove all contrack entries for this protocol */
228 nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); 285 nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
229} 286}
@@ -281,15 +338,15 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
281 338
282/* FIXME: Allow NULL functions and sub in pointers to generic for 339/* FIXME: Allow NULL functions and sub in pointers to generic for
283 them. --RR */ 340 them. --RR */
284int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) 341int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
285{ 342{
286 int ret = 0; 343 int ret = 0;
287 344
288 if (l4proto->l3proto >= PF_MAX) 345 if (l4proto->l3proto >= PF_MAX)
289 return -EBUSY; 346 return -EBUSY;
290 347
291 if ((l4proto->to_nlattr && !l4proto->nlattr_size) 348 if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
292 || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) 349 (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
293 return -EINVAL; 350 return -EINVAL;
294 351
295 mutex_lock(&nf_ct_proto_mutex); 352 mutex_lock(&nf_ct_proto_mutex);
@@ -307,7 +364,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
307 } 364 }
308 365
309 for (i = 0; i < MAX_NF_CT_PROTO; i++) 366 for (i = 0; i < MAX_NF_CT_PROTO; i++)
310 RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic); 367 RCU_INIT_POINTER(proto_array[i],
368 &nf_conntrack_l4proto_generic);
311 369
312 /* Before making proto_array visible to lockless readers, 370 /* Before making proto_array visible to lockless readers,
313 * we must make sure its content is committed to memory. 371 * we must make sure its content is committed to memory.
@@ -335,10 +393,10 @@ out_unlock:
335 mutex_unlock(&nf_ct_proto_mutex); 393 mutex_unlock(&nf_ct_proto_mutex);
336 return ret; 394 return ret;
337} 395}
338EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); 396EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
339 397
340int nf_ct_l4proto_pernet_register(struct net *net, 398int nf_ct_l4proto_pernet_register_one(struct net *net,
341 struct nf_conntrack_l4proto *l4proto) 399 struct nf_conntrack_l4proto *l4proto)
342{ 400{
343 int ret = 0; 401 int ret = 0;
344 struct nf_proto_net *pn = NULL; 402 struct nf_proto_net *pn = NULL;
@@ -361,9 +419,9 @@ int nf_ct_l4proto_pernet_register(struct net *net,
361out: 419out:
362 return ret; 420 return ret;
363} 421}
364EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); 422EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
365 423
366void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) 424void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
367{ 425{
368 BUG_ON(l4proto->l3proto >= PF_MAX); 426 BUG_ON(l4proto->l3proto >= PF_MAX);
369 427
@@ -378,10 +436,10 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
378 436
379 synchronize_rcu(); 437 synchronize_rcu();
380} 438}
381EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); 439EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
382 440
383void nf_ct_l4proto_pernet_unregister(struct net *net, 441void nf_ct_l4proto_pernet_unregister_one(struct net *net,
384 struct nf_conntrack_l4proto *l4proto) 442 struct nf_conntrack_l4proto *l4proto)
385{ 443{
386 struct nf_proto_net *pn = NULL; 444 struct nf_proto_net *pn = NULL;
387 445
@@ -395,6 +453,66 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
395 /* Remove all contrack entries for this protocol */ 453 /* Remove all contrack entries for this protocol */
396 nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0); 454 nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
397} 455}
456EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
457
458int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
459 unsigned int num_proto)
460{
461 int ret = -EINVAL, ver;
462 unsigned int i;
463
464 for (i = 0; i < num_proto; i++) {
465 ret = nf_ct_l4proto_register_one(l4proto[i]);
466 if (ret < 0)
467 break;
468 }
469 if (i != num_proto) {
470 ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
471 pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
472 ver, l4proto[i]->name, ver);
473 nf_ct_l4proto_unregister(l4proto, i);
474 }
475 return ret;
476}
477EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
478
479int nf_ct_l4proto_pernet_register(struct net *net,
480 struct nf_conntrack_l4proto *l4proto[],
481 unsigned int num_proto)
482{
483 int ret = -EINVAL;
484 unsigned int i;
485
486 for (i = 0; i < num_proto; i++) {
487 ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
488 if (ret < 0)
489 break;
490 }
491 if (i != num_proto) {
492 pr_err("nf_conntrack_%s%d: pernet registration failed\n",
493 l4proto[i]->name,
494 l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
495 nf_ct_l4proto_pernet_unregister(net, l4proto, i);
496 }
497 return ret;
498}
499EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
500
501void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
502 unsigned int num_proto)
503{
504 while (num_proto-- != 0)
505 nf_ct_l4proto_unregister_one(l4proto[num_proto]);
506}
507EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
508
509void nf_ct_l4proto_pernet_unregister(struct net *net,
510 struct nf_conntrack_l4proto *l4proto[],
511 unsigned int num_proto)
512{
513 while (num_proto-- != 0)
514 nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
515}
398EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); 516EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
399 517
400int nf_conntrack_proto_pernet_init(struct net *net) 518int nf_conntrack_proto_pernet_init(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index a45bee52dccc..b68ce6ac13b3 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -9,7 +9,6 @@
9 * 9 *
10 */ 10 */
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/sysctl.h> 13#include <linux/sysctl.h>
15#include <linux/spinlock.h> 14#include <linux/spinlock.h>
@@ -384,17 +383,9 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
384 }, 383 },
385}; 384};
386 385
387/* this module per-net specifics */ 386static inline struct nf_dccp_net *dccp_pernet(struct net *net)
388static int dccp_net_id __read_mostly;
389struct dccp_net {
390 struct nf_proto_net pn;
391 int dccp_loose;
392 unsigned int dccp_timeout[CT_DCCP_MAX + 1];
393};
394
395static inline struct dccp_net *dccp_pernet(struct net *net)
396{ 387{
397 return net_generic(net, dccp_net_id); 388 return &net->ct.nf_ct_proto.dccp;
398} 389}
399 390
400static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 391static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -424,7 +415,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
424 unsigned int dataoff, unsigned int *timeouts) 415 unsigned int dataoff, unsigned int *timeouts)
425{ 416{
426 struct net *net = nf_ct_net(ct); 417 struct net *net = nf_ct_net(ct);
427 struct dccp_net *dn; 418 struct nf_dccp_net *dn;
428 struct dccp_hdr _dh, *dh; 419 struct dccp_hdr _dh, *dh;
429 const char *msg; 420 const char *msg;
430 u_int8_t state; 421 u_int8_t state;
@@ -719,7 +710,7 @@ static int dccp_nlattr_size(void)
719static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], 710static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
720 struct net *net, void *data) 711 struct net *net, void *data)
721{ 712{
722 struct dccp_net *dn = dccp_pernet(net); 713 struct nf_dccp_net *dn = dccp_pernet(net);
723 unsigned int *timeouts = data; 714 unsigned int *timeouts = data;
724 int i; 715 int i;
725 716
@@ -820,7 +811,7 @@ static struct ctl_table dccp_sysctl_table[] = {
820#endif /* CONFIG_SYSCTL */ 811#endif /* CONFIG_SYSCTL */
821 812
822static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, 813static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
823 struct dccp_net *dn) 814 struct nf_dccp_net *dn)
824{ 815{
825#ifdef CONFIG_SYSCTL 816#ifdef CONFIG_SYSCTL
826 if (pn->ctl_table) 817 if (pn->ctl_table)
@@ -850,7 +841,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
850 841
851static int dccp_init_net(struct net *net, u_int16_t proto) 842static int dccp_init_net(struct net *net, u_int16_t proto)
852{ 843{
853 struct dccp_net *dn = dccp_pernet(net); 844 struct nf_dccp_net *dn = dccp_pernet(net);
854 struct nf_proto_net *pn = &dn->pn; 845 struct nf_proto_net *pn = &dn->pn;
855 846
856 if (!pn->users) { 847 if (!pn->users) {
@@ -868,7 +859,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto)
868 return dccp_kmemdup_sysctl_table(net, pn, dn); 859 return dccp_kmemdup_sysctl_table(net, pn, dn);
869} 860}
870 861
871static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { 862struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
872 .l3proto = AF_INET, 863 .l3proto = AF_INET,
873 .l4proto = IPPROTO_DCCP, 864 .l4proto = IPPROTO_DCCP,
874 .name = "dccp", 865 .name = "dccp",
@@ -898,11 +889,11 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
898 .nla_policy = dccp_timeout_nla_policy, 889 .nla_policy = dccp_timeout_nla_policy,
899 }, 890 },
900#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 891#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
901 .net_id = &dccp_net_id,
902 .init_net = dccp_init_net, 892 .init_net = dccp_init_net,
903}; 893};
894EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
904 895
905static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { 896struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
906 .l3proto = AF_INET6, 897 .l3proto = AF_INET6,
907 .l4proto = IPPROTO_DCCP, 898 .l4proto = IPPROTO_DCCP,
908 .name = "dccp", 899 .name = "dccp",
@@ -932,78 +923,6 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
932 .nla_policy = dccp_timeout_nla_policy, 923 .nla_policy = dccp_timeout_nla_policy,
933 }, 924 },
934#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 925#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
935 .net_id = &dccp_net_id,
936 .init_net = dccp_init_net, 926 .init_net = dccp_init_net,
937}; 927};
938 928EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);
939static __net_init int dccp_net_init(struct net *net)
940{
941 int ret = 0;
942 ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4);
943 if (ret < 0) {
944 pr_err("nf_conntrack_dccp4: pernet registration failed.\n");
945 goto out;
946 }
947 ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6);
948 if (ret < 0) {
949 pr_err("nf_conntrack_dccp6: pernet registration failed.\n");
950 goto cleanup_dccp4;
951 }
952 return 0;
953cleanup_dccp4:
954 nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
955out:
956 return ret;
957}
958
959static __net_exit void dccp_net_exit(struct net *net)
960{
961 nf_ct_l4proto_pernet_unregister(net, &dccp_proto6);
962 nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
963}
964
965static struct pernet_operations dccp_net_ops = {
966 .init = dccp_net_init,
967 .exit = dccp_net_exit,
968 .id = &dccp_net_id,
969 .size = sizeof(struct dccp_net),
970};
971
972static int __init nf_conntrack_proto_dccp_init(void)
973{
974 int ret;
975
976 ret = register_pernet_subsys(&dccp_net_ops);
977 if (ret < 0)
978 goto out_pernet;
979
980 ret = nf_ct_l4proto_register(&dccp_proto4);
981 if (ret < 0)
982 goto out_dccp4;
983
984 ret = nf_ct_l4proto_register(&dccp_proto6);
985 if (ret < 0)
986 goto out_dccp6;
987
988 return 0;
989out_dccp6:
990 nf_ct_l4proto_unregister(&dccp_proto4);
991out_dccp4:
992 unregister_pernet_subsys(&dccp_net_ops);
993out_pernet:
994 return ret;
995}
996
997static void __exit nf_conntrack_proto_dccp_fini(void)
998{
999 nf_ct_l4proto_unregister(&dccp_proto6);
1000 nf_ct_l4proto_unregister(&dccp_proto4);
1001 unregister_pernet_subsys(&dccp_net_ops);
1002}
1003
1004module_init(nf_conntrack_proto_dccp_init);
1005module_exit(nf_conntrack_proto_dccp_fini);
1006
1007MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
1008MODULE_DESCRIPTION("DCCP connection tracking protocol helper");
1009MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9a715f88b2f1..87bb40a3feb5 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -53,7 +53,7 @@ static unsigned int gre_timeouts[GRE_CT_MAX] = {
53 [GRE_CT_REPLIED] = 180*HZ, 53 [GRE_CT_REPLIED] = 180*HZ,
54}; 54};
55 55
56static int proto_gre_net_id __read_mostly; 56static unsigned int proto_gre_net_id __read_mostly;
57struct netns_proto_gre { 57struct netns_proto_gre {
58 struct nf_proto_net nf; 58 struct nf_proto_net nf;
59 rwlock_t keymap_lock; 59 rwlock_t keymap_lock;
@@ -396,7 +396,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
396static int proto_gre_net_init(struct net *net) 396static int proto_gre_net_init(struct net *net)
397{ 397{
398 int ret = 0; 398 int ret = 0;
399 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4); 399
400 ret = nf_ct_l4proto_pernet_register_one(net,
401 &nf_conntrack_l4proto_gre4);
400 if (ret < 0) 402 if (ret < 0)
401 pr_err("nf_conntrack_gre4: pernet registration failed.\n"); 403 pr_err("nf_conntrack_gre4: pernet registration failed.\n");
402 return ret; 404 return ret;
@@ -404,7 +406,7 @@ static int proto_gre_net_init(struct net *net)
404 406
405static void proto_gre_net_exit(struct net *net) 407static void proto_gre_net_exit(struct net *net)
406{ 408{
407 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); 409 nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4);
408 nf_ct_gre_keymap_flush(net); 410 nf_ct_gre_keymap_flush(net);
409} 411}
410 412
@@ -422,8 +424,7 @@ static int __init nf_ct_proto_gre_init(void)
422 ret = register_pernet_subsys(&proto_gre_net_ops); 424 ret = register_pernet_subsys(&proto_gre_net_ops);
423 if (ret < 0) 425 if (ret < 0)
424 goto out_pernet; 426 goto out_pernet;
425 427 ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4);
426 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
427 if (ret < 0) 428 if (ret < 0)
428 goto out_gre4; 429 goto out_gre4;
429 430
@@ -436,7 +437,7 @@ out_pernet:
436 437
437static void __exit nf_ct_proto_gre_fini(void) 438static void __exit nf_ct_proto_gre_fini(void)
438{ 439{
439 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); 440 nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4);
440 unregister_pernet_subsys(&proto_gre_net_ops); 441 unregister_pernet_subsys(&proto_gre_net_ops);
441} 442}
442 443
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 982ea62606c7..a0efde38da44 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -15,7 +15,6 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/timer.h> 16#include <linux/timer.h>
17#include <linux/netfilter.h> 17#include <linux/netfilter.h>
18#include <linux/module.h>
19#include <linux/in.h> 18#include <linux/in.h>
20#include <linux/ip.h> 19#include <linux/ip.h>
21#include <linux/sctp.h> 20#include <linux/sctp.h>
@@ -144,15 +143,9 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
144 } 143 }
145}; 144};
146 145
147static int sctp_net_id __read_mostly; 146static inline struct nf_sctp_net *sctp_pernet(struct net *net)
148struct sctp_net {
149 struct nf_proto_net pn;
150 unsigned int timeouts[SCTP_CONNTRACK_MAX];
151};
152
153static inline struct sctp_net *sctp_pernet(struct net *net)
154{ 147{
155 return net_generic(net, sctp_net_id); 148 return &net->ct.nf_ct_proto.sctp;
156} 149}
157 150
158static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 151static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -600,7 +593,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
600 struct net *net, void *data) 593 struct net *net, void *data)
601{ 594{
602 unsigned int *timeouts = data; 595 unsigned int *timeouts = data;
603 struct sctp_net *sn = sctp_pernet(net); 596 struct nf_sctp_net *sn = sctp_pernet(net);
604 int i; 597 int i;
605 598
606 /* set default SCTP timeouts. */ 599 /* set default SCTP timeouts. */
@@ -708,7 +701,7 @@ static struct ctl_table sctp_sysctl_table[] = {
708#endif 701#endif
709 702
710static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, 703static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
711 struct sctp_net *sn) 704 struct nf_sctp_net *sn)
712{ 705{
713#ifdef CONFIG_SYSCTL 706#ifdef CONFIG_SYSCTL
714 if (pn->ctl_table) 707 if (pn->ctl_table)
@@ -735,7 +728,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
735 728
736static int sctp_init_net(struct net *net, u_int16_t proto) 729static int sctp_init_net(struct net *net, u_int16_t proto)
737{ 730{
738 struct sctp_net *sn = sctp_pernet(net); 731 struct nf_sctp_net *sn = sctp_pernet(net);
739 struct nf_proto_net *pn = &sn->pn; 732 struct nf_proto_net *pn = &sn->pn;
740 733
741 if (!pn->users) { 734 if (!pn->users) {
@@ -748,7 +741,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
748 return sctp_kmemdup_sysctl_table(pn, sn); 741 return sctp_kmemdup_sysctl_table(pn, sn);
749} 742}
750 743
751static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { 744struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
752 .l3proto = PF_INET, 745 .l3proto = PF_INET,
753 .l4proto = IPPROTO_SCTP, 746 .l4proto = IPPROTO_SCTP,
754 .name = "sctp", 747 .name = "sctp",
@@ -778,11 +771,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
778 .nla_policy = sctp_timeout_nla_policy, 771 .nla_policy = sctp_timeout_nla_policy,
779 }, 772 },
780#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 773#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
781 .net_id = &sctp_net_id,
782 .init_net = sctp_init_net, 774 .init_net = sctp_init_net,
783}; 775};
776EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
784 777
785static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { 778struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
786 .l3proto = PF_INET6, 779 .l3proto = PF_INET6,
787 .l4proto = IPPROTO_SCTP, 780 .l4proto = IPPROTO_SCTP,
788 .name = "sctp", 781 .name = "sctp",
@@ -812,81 +805,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
812 }, 805 },
813#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 806#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
814#endif 807#endif
815 .net_id = &sctp_net_id,
816 .init_net = sctp_init_net, 808 .init_net = sctp_init_net,
817}; 809};
818 810EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6);
819static int sctp_net_init(struct net *net)
820{
821 int ret = 0;
822
823 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4);
824 if (ret < 0) {
825 pr_err("nf_conntrack_sctp4: pernet registration failed.\n");
826 goto out;
827 }
828 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6);
829 if (ret < 0) {
830 pr_err("nf_conntrack_sctp6: pernet registration failed.\n");
831 goto cleanup_sctp4;
832 }
833 return 0;
834
835cleanup_sctp4:
836 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
837out:
838 return ret;
839}
840
841static void sctp_net_exit(struct net *net)
842{
843 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6);
844 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
845}
846
847static struct pernet_operations sctp_net_ops = {
848 .init = sctp_net_init,
849 .exit = sctp_net_exit,
850 .id = &sctp_net_id,
851 .size = sizeof(struct sctp_net),
852};
853
854static int __init nf_conntrack_proto_sctp_init(void)
855{
856 int ret;
857
858 ret = register_pernet_subsys(&sctp_net_ops);
859 if (ret < 0)
860 goto out_pernet;
861
862 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4);
863 if (ret < 0)
864 goto out_sctp4;
865
866 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6);
867 if (ret < 0)
868 goto out_sctp6;
869
870 return 0;
871out_sctp6:
872 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
873out_sctp4:
874 unregister_pernet_subsys(&sctp_net_ops);
875out_pernet:
876 return ret;
877}
878
879static void __exit nf_conntrack_proto_sctp_fini(void)
880{
881 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
882 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
883 unregister_pernet_subsys(&sctp_net_ops);
884}
885
886module_init(nf_conntrack_proto_sctp_init);
887module_exit(nf_conntrack_proto_sctp_fini);
888
889MODULE_LICENSE("GPL");
890MODULE_AUTHOR("Kiran Kumar Immidi");
891MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
892MODULE_ALIAS("ip_conntrack_proto_sctp");
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 029206e8dec4..c35f7bf05d8c 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -9,7 +9,6 @@
9 9
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/timer.h> 11#include <linux/timer.h>
12#include <linux/module.h>
13#include <linux/udp.h> 12#include <linux/udp.h>
14#include <linux/seq_file.h> 13#include <linux/seq_file.h>
15#include <linux/skbuff.h> 14#include <linux/skbuff.h>
@@ -24,26 +23,14 @@
24#include <net/netfilter/nf_conntrack_ecache.h> 23#include <net/netfilter/nf_conntrack_ecache.h>
25#include <net/netfilter/nf_log.h> 24#include <net/netfilter/nf_log.h>
26 25
27enum udplite_conntrack {
28 UDPLITE_CT_UNREPLIED,
29 UDPLITE_CT_REPLIED,
30 UDPLITE_CT_MAX
31};
32
33static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = { 26static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = {
34 [UDPLITE_CT_UNREPLIED] = 30*HZ, 27 [UDPLITE_CT_UNREPLIED] = 30*HZ,
35 [UDPLITE_CT_REPLIED] = 180*HZ, 28 [UDPLITE_CT_REPLIED] = 180*HZ,
36}; 29};
37 30
38static int udplite_net_id __read_mostly; 31static inline struct nf_udplite_net *udplite_pernet(struct net *net)
39struct udplite_net {
40 struct nf_proto_net pn;
41 unsigned int timeouts[UDPLITE_CT_MAX];
42};
43
44static inline struct udplite_net *udplite_pernet(struct net *net)
45{ 32{
46 return net_generic(net, udplite_net_id); 33 return &net->ct.nf_ct_proto.udplite;
47} 34}
48 35
49static bool udplite_pkt_to_tuple(const struct sk_buff *skb, 36static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
@@ -178,7 +165,7 @@ static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[],
178 struct net *net, void *data) 165 struct net *net, void *data)
179{ 166{
180 unsigned int *timeouts = data; 167 unsigned int *timeouts = data;
181 struct udplite_net *un = udplite_pernet(net); 168 struct nf_udplite_net *un = udplite_pernet(net);
182 169
183 /* set default timeouts for UDPlite. */ 170 /* set default timeouts for UDPlite. */
184 timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; 171 timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED];
@@ -237,7 +224,7 @@ static struct ctl_table udplite_sysctl_table[] = {
237#endif /* CONFIG_SYSCTL */ 224#endif /* CONFIG_SYSCTL */
238 225
239static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, 226static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn,
240 struct udplite_net *un) 227 struct nf_udplite_net *un)
241{ 228{
242#ifdef CONFIG_SYSCTL 229#ifdef CONFIG_SYSCTL
243 if (pn->ctl_table) 230 if (pn->ctl_table)
@@ -257,7 +244,7 @@ static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn,
257 244
258static int udplite_init_net(struct net *net, u_int16_t proto) 245static int udplite_init_net(struct net *net, u_int16_t proto)
259{ 246{
260 struct udplite_net *un = udplite_pernet(net); 247 struct nf_udplite_net *un = udplite_pernet(net);
261 struct nf_proto_net *pn = &un->pn; 248 struct nf_proto_net *pn = &un->pn;
262 249
263 if (!pn->users) { 250 if (!pn->users) {
@@ -270,7 +257,7 @@ static int udplite_init_net(struct net *net, u_int16_t proto)
270 return udplite_kmemdup_sysctl_table(pn, un); 257 return udplite_kmemdup_sysctl_table(pn, un);
271} 258}
272 259
273static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = 260struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
274{ 261{
275 .l3proto = PF_INET, 262 .l3proto = PF_INET,
276 .l4proto = IPPROTO_UDPLITE, 263 .l4proto = IPPROTO_UDPLITE,
@@ -299,11 +286,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
299 .nla_policy = udplite_timeout_nla_policy, 286 .nla_policy = udplite_timeout_nla_policy,
300 }, 287 },
301#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 288#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
302 .net_id = &udplite_net_id,
303 .init_net = udplite_init_net, 289 .init_net = udplite_init_net,
304}; 290};
291EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
305 292
306static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = 293struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
307{ 294{
308 .l3proto = PF_INET6, 295 .l3proto = PF_INET6,
309 .l4proto = IPPROTO_UDPLITE, 296 .l4proto = IPPROTO_UDPLITE,
@@ -332,78 +319,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
332 .nla_policy = udplite_timeout_nla_policy, 319 .nla_policy = udplite_timeout_nla_policy,
333 }, 320 },
334#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 321#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
335 .net_id = &udplite_net_id,
336 .init_net = udplite_init_net, 322 .init_net = udplite_init_net,
337}; 323};
338 324EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
339static int udplite_net_init(struct net *net)
340{
341 int ret = 0;
342
343 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4);
344 if (ret < 0) {
345 pr_err("nf_conntrack_udplite4: pernet registration failed.\n");
346 goto out;
347 }
348 ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6);
349 if (ret < 0) {
350 pr_err("nf_conntrack_udplite6: pernet registration failed.\n");
351 goto cleanup_udplite4;
352 }
353 return 0;
354
355cleanup_udplite4:
356 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
357out:
358 return ret;
359}
360
361static void udplite_net_exit(struct net *net)
362{
363 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6);
364 nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
365}
366
367static struct pernet_operations udplite_net_ops = {
368 .init = udplite_net_init,
369 .exit = udplite_net_exit,
370 .id = &udplite_net_id,
371 .size = sizeof(struct udplite_net),
372};
373
374static int __init nf_conntrack_proto_udplite_init(void)
375{
376 int ret;
377
378 ret = register_pernet_subsys(&udplite_net_ops);
379 if (ret < 0)
380 goto out_pernet;
381
382 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4);
383 if (ret < 0)
384 goto out_udplite4;
385
386 ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6);
387 if (ret < 0)
388 goto out_udplite6;
389
390 return 0;
391out_udplite6:
392 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
393out_udplite4:
394 unregister_pernet_subsys(&udplite_net_ops);
395out_pernet:
396 return ret;
397}
398
399static void __exit nf_conntrack_proto_udplite_exit(void)
400{
401 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
402 nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
403 unregister_pernet_subsys(&udplite_net_ops);
404}
405
406module_init(nf_conntrack_proto_udplite_init);
407module_exit(nf_conntrack_proto_udplite_exit);
408
409MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5f446cd9f3fd..d009ae663453 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -452,6 +452,9 @@ static int log_invalid_proto_max __read_mostly = 255;
452/* size the user *wants to set */ 452/* size the user *wants to set */
453static unsigned int nf_conntrack_htable_size_user __read_mostly; 453static unsigned int nf_conntrack_htable_size_user __read_mostly;
454 454
455extern unsigned int nf_conntrack_default_on;
456unsigned int nf_conntrack_default_on __read_mostly = 1;
457
455static int 458static int
456nf_conntrack_hash_sysctl(struct ctl_table *table, int write, 459nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
457 void __user *buffer, size_t *lenp, loff_t *ppos) 460 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -517,6 +520,13 @@ static struct ctl_table nf_ct_sysctl_table[] = {
517 .mode = 0644, 520 .mode = 0644,
518 .proc_handler = proc_dointvec, 521 .proc_handler = proc_dointvec,
519 }, 522 },
523 {
524 .procname = "nf_conntrack_default_on",
525 .data = &nf_conntrack_default_on,
526 .maxlen = sizeof(unsigned int),
527 .mode = 0644,
528 .proc_handler = proc_dointvec,
529 },
520 { } 530 { }
521}; 531};
522 532
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 7ec69723940f..c9d7f95768ab 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -14,24 +14,41 @@
14#include <linux/netfilter/nf_tables.h> 14#include <linux/netfilter/nf_tables.h>
15#include <net/netfilter/nf_tables.h> 15#include <net/netfilter/nf_tables.h>
16 16
17static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
18{
19 if (skb_mac_header_was_set(skb))
20 skb_push(skb, skb->mac_len);
21
22 skb->dev = dev;
23 dev_queue_xmit(skb);
24}
25
26void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
27{
28 struct net_device *dev;
29
30 dev = dev_get_by_index_rcu(nft_net(pkt), oif);
31 if (!dev) {
32 kfree_skb(pkt->skb);
33 return;
34 }
35
36 nf_do_netdev_egress(pkt->skb, dev);
37}
38EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress);
39
17void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) 40void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
18{ 41{
19 struct net_device *dev; 42 struct net_device *dev;
20 struct sk_buff *skb; 43 struct sk_buff *skb;
21 44
22 dev = dev_get_by_index_rcu(pkt->net, oif); 45 dev = dev_get_by_index_rcu(nft_net(pkt), oif);
23 if (dev == NULL) 46 if (dev == NULL)
24 return; 47 return;
25 48
26 skb = skb_clone(pkt->skb, GFP_ATOMIC); 49 skb = skb_clone(pkt->skb, GFP_ATOMIC);
27 if (skb == NULL) 50 if (skb)
28 return; 51 nf_do_netdev_egress(skb, dev);
29
30 if (skb_mac_header_was_set(skb))
31 skb_push(skb, skb->mac_len);
32
33 skb->dev = dev;
34 dev_queue_xmit(skb);
35} 52}
36EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); 53EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
37 54
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 9fdb655f85bc..c46d214d5323 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -11,11 +11,6 @@
11#define NFDEBUG(format, args...) 11#define NFDEBUG(format, args...)
12#endif 12#endif
13 13
14
15/* core.c */
16unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
17 struct nf_hook_entry **entryp);
18
19/* nf_queue.c */ 14/* nf_queue.c */
20int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, 15int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
21 struct nf_hook_entry **entryp, unsigned int verdict); 16 struct nf_hook_entry **entryp, unsigned int verdict);
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index 119fe1cb1ea9..dc61399e30be 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -175,6 +175,34 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
175} 175}
176EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); 176EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
177 177
178/* bridge and netdev logging families share this code. */
179void nf_log_l2packet(struct net *net, u_int8_t pf,
180 __be16 protocol,
181 unsigned int hooknum,
182 const struct sk_buff *skb,
183 const struct net_device *in,
184 const struct net_device *out,
185 const struct nf_loginfo *loginfo,
186 const char *prefix)
187{
188 switch (protocol) {
189 case htons(ETH_P_IP):
190 nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
191 loginfo, "%s", prefix);
192 break;
193 case htons(ETH_P_IPV6):
194 nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
195 loginfo, "%s", prefix);
196 break;
197 case htons(ETH_P_ARP):
198 case htons(ETH_P_RARP):
199 nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
200 loginfo, "%s", prefix);
201 break;
202 }
203}
204EXPORT_SYMBOL_GPL(nf_log_l2packet);
205
178static int __init nf_log_common_init(void) 206static int __init nf_log_common_init(void)
179{ 207{
180 return 0; 208 return 0;
diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
new file mode 100644
index 000000000000..350eb147754d
--- /dev/null
+++ b/net/netfilter/nf_log_netdev.c
@@ -0,0 +1,81 @@
1/*
2 * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/spinlock.h>
11#include <linux/skbuff.h>
12#include <linux/ip.h>
13#include <net/route.h>
14
15#include <linux/netfilter.h>
16#include <net/netfilter/nf_log.h>
17
18static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
19 unsigned int hooknum,
20 const struct sk_buff *skb,
21 const struct net_device *in,
22 const struct net_device *out,
23 const struct nf_loginfo *loginfo,
24 const char *prefix)
25{
26 nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out,
27 loginfo, prefix);
28}
29
30static struct nf_logger nf_netdev_logger __read_mostly = {
31 .name = "nf_log_netdev",
32 .type = NF_LOG_TYPE_LOG,
33 .logfn = nf_log_netdev_packet,
34 .me = THIS_MODULE,
35};
36
37static int __net_init nf_log_netdev_net_init(struct net *net)
38{
39 return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger);
40}
41
42static void __net_exit nf_log_netdev_net_exit(struct net *net)
43{
44 nf_log_unset(net, &nf_netdev_logger);
45}
46
47static struct pernet_operations nf_log_netdev_net_ops = {
48 .init = nf_log_netdev_net_init,
49 .exit = nf_log_netdev_net_exit,
50};
51
52static int __init nf_log_netdev_init(void)
53{
54 int ret;
55
56 /* Request to load the real packet loggers. */
57 nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG);
58 nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG);
59 nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG);
60
61 ret = register_pernet_subsys(&nf_log_netdev_net_ops);
62 if (ret < 0)
63 return ret;
64
65 nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger);
66 return 0;
67}
68
69static void __exit nf_log_netdev_exit(void)
70{
71 unregister_pernet_subsys(&nf_log_netdev_net_ops);
72 nf_log_unregister(&nf_netdev_logger);
73}
74
75module_init(nf_log_netdev_init);
76module_exit(nf_log_netdev_exit);
77
78MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
79MODULE_DESCRIPTION("Netfilter netdev packet logging");
80MODULE_LICENSE("GPL");
81MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5b9c884a452e..94b14c5a8b17 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -682,6 +682,18 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
682 &nf_nat_l4proto_tcp); 682 &nf_nat_l4proto_tcp);
683 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], 683 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
684 &nf_nat_l4proto_udp); 684 &nf_nat_l4proto_udp);
685#ifdef CONFIG_NF_NAT_PROTO_DCCP
686 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
687 &nf_nat_l4proto_dccp);
688#endif
689#ifdef CONFIG_NF_NAT_PROTO_SCTP
690 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
691 &nf_nat_l4proto_sctp);
692#endif
693#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
694 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE],
695 &nf_nat_l4proto_udplite);
696#endif
685 mutex_unlock(&nf_nat_proto_mutex); 697 mutex_unlock(&nf_nat_proto_mutex);
686 698
687 RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); 699 RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 15c47b246d0d..269fcd5dc34c 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -10,8 +10,6 @@
10 */ 10 */
11 11
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/skbuff.h> 13#include <linux/skbuff.h>
16#include <linux/dccp.h> 14#include <linux/dccp.h>
17 15
@@ -73,7 +71,7 @@ dccp_manip_pkt(struct sk_buff *skb,
73 return true; 71 return true;
74} 72}
75 73
76static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { 74const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
77 .l4proto = IPPROTO_DCCP, 75 .l4proto = IPPROTO_DCCP,
78 .manip_pkt = dccp_manip_pkt, 76 .manip_pkt = dccp_manip_pkt,
79 .in_range = nf_nat_l4proto_in_range, 77 .in_range = nf_nat_l4proto_in_range,
@@ -82,35 +80,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
82 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, 80 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
83#endif 81#endif
84}; 82};
85
86static int __init nf_nat_proto_dccp_init(void)
87{
88 int err;
89
90 err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
91 if (err < 0)
92 goto err1;
93 err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
94 if (err < 0)
95 goto err2;
96 return 0;
97
98err2:
99 nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
100err1:
101 return err;
102}
103
104static void __exit nf_nat_proto_dccp_fini(void)
105{
106 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
107 nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
108
109}
110
111module_init(nf_nat_proto_dccp_init);
112module_exit(nf_nat_proto_dccp_fini);
113
114MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
115MODULE_DESCRIPTION("DCCP NAT protocol helper");
116MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index cbc7ade1487b..31d358691af0 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -7,9 +7,7 @@
7 */ 7 */
8 8
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/sctp.h> 10#include <linux/sctp.h>
12#include <linux/module.h>
13#include <net/sctp/checksum.h> 11#include <net/sctp/checksum.h>
14 12
15#include <net/netfilter/nf_nat_l4proto.h> 13#include <net/netfilter/nf_nat_l4proto.h>
@@ -49,12 +47,15 @@ sctp_manip_pkt(struct sk_buff *skb,
49 hdr->dest = tuple->dst.u.sctp.port; 47 hdr->dest = tuple->dst.u.sctp.port;
50 } 48 }
51 49
52 hdr->checksum = sctp_compute_cksum(skb, hdroff); 50 if (skb->ip_summed != CHECKSUM_PARTIAL) {
51 hdr->checksum = sctp_compute_cksum(skb, hdroff);
52 skb->ip_summed = CHECKSUM_NONE;
53 }
53 54
54 return true; 55 return true;
55} 56}
56 57
57static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { 58const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
58 .l4proto = IPPROTO_SCTP, 59 .l4proto = IPPROTO_SCTP,
59 .manip_pkt = sctp_manip_pkt, 60 .manip_pkt = sctp_manip_pkt,
60 .in_range = nf_nat_l4proto_in_range, 61 .in_range = nf_nat_l4proto_in_range,
@@ -63,34 +64,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
63 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, 64 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
64#endif 65#endif
65}; 66};
66
67static int __init nf_nat_proto_sctp_init(void)
68{
69 int err;
70
71 err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
72 if (err < 0)
73 goto err1;
74 err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
75 if (err < 0)
76 goto err2;
77 return 0;
78
79err2:
80 nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
81err1:
82 return err;
83}
84
85static void __exit nf_nat_proto_sctp_exit(void)
86{
87 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
88 nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
89}
90
91module_init(nf_nat_proto_sctp_init);
92module_exit(nf_nat_proto_sctp_exit);
93
94MODULE_LICENSE("GPL");
95MODULE_DESCRIPTION("SCTP NAT protocol helper");
96MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
index 58340c97bd83..366bfbfd82a1 100644
--- a/net/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -8,11 +8,9 @@
8 */ 8 */
9 9
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/init.h>
12#include <linux/udp.h> 11#include <linux/udp.h>
13 12
14#include <linux/netfilter.h> 13#include <linux/netfilter.h>
15#include <linux/module.h>
16#include <net/netfilter/nf_nat.h> 14#include <net/netfilter/nf_nat.h>
17#include <net/netfilter/nf_nat_l3proto.h> 15#include <net/netfilter/nf_nat_l3proto.h>
18#include <net/netfilter/nf_nat_l4proto.h> 16#include <net/netfilter/nf_nat_l4proto.h>
@@ -64,7 +62,7 @@ udplite_manip_pkt(struct sk_buff *skb,
64 return true; 62 return true;
65} 63}
66 64
67static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { 65const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
68 .l4proto = IPPROTO_UDPLITE, 66 .l4proto = IPPROTO_UDPLITE,
69 .manip_pkt = udplite_manip_pkt, 67 .manip_pkt = udplite_manip_pkt,
70 .in_range = nf_nat_l4proto_in_range, 68 .in_range = nf_nat_l4proto_in_range,
@@ -73,34 +71,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
73 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, 71 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
74#endif 72#endif
75}; 73};
76
77static int __init nf_nat_proto_udplite_init(void)
78{
79 int err;
80
81 err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
82 if (err < 0)
83 goto err1;
84 err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
85 if (err < 0)
86 goto err2;
87 return 0;
88
89err2:
90 nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
91err1:
92 return err;
93}
94
95static void __exit nf_nat_proto_udplite_fini(void)
96{
97 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
98 nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
99}
100
101module_init(nf_nat_proto_udplite_init);
102module_exit(nf_nat_proto_udplite_fini);
103
104MODULE_LICENSE("GPL");
105MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
106MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 8f08d759844a..4a7662486f44 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
108} 108}
109 109
110static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, 110static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
111 unsigned int queuenum) 111 struct nf_hook_entry *hook_entry, unsigned int queuenum)
112{ 112{
113 int status = -ENOENT; 113 int status = -ENOENT;
114 struct nf_queue_entry *entry = NULL; 114 struct nf_queue_entry *entry = NULL;
@@ -136,6 +136,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
136 *entry = (struct nf_queue_entry) { 136 *entry = (struct nf_queue_entry) {
137 .skb = skb, 137 .skb = skb,
138 .state = *state, 138 .state = *state,
139 .hook = hook_entry,
139 .size = sizeof(*entry) + afinfo->route_key_size, 140 .size = sizeof(*entry) + afinfo->route_key_size,
140 }; 141 };
141 142
@@ -163,8 +164,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
163 struct nf_hook_entry *entry = *entryp; 164 struct nf_hook_entry *entry = *entryp;
164 int ret; 165 int ret;
165 166
166 RCU_INIT_POINTER(state->hook_entries, entry); 167 ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
167 ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
168 if (ret < 0) { 168 if (ret < 0) {
169 if (ret == -ESRCH && 169 if (ret == -ESRCH &&
170 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { 170 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
@@ -177,22 +177,38 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
177 return 0; 177 return 0;
178} 178}
179 179
180static unsigned int nf_iterate(struct sk_buff *skb,
181 struct nf_hook_state *state,
182 struct nf_hook_entry **entryp)
183{
184 unsigned int verdict;
185
186 do {
187repeat:
188 verdict = nf_hook_entry_hookfn((*entryp), skb, state);
189 if (verdict != NF_ACCEPT) {
190 if (verdict != NF_REPEAT)
191 return verdict;
192 goto repeat;
193 }
194 *entryp = rcu_dereference((*entryp)->next);
195 } while (*entryp);
196
197 return NF_ACCEPT;
198}
199
180void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) 200void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
181{ 201{
182 struct nf_hook_entry *hook_entry; 202 struct nf_hook_entry *hook_entry = entry->hook;
183 struct sk_buff *skb = entry->skb; 203 struct sk_buff *skb = entry->skb;
184 const struct nf_afinfo *afinfo; 204 const struct nf_afinfo *afinfo;
185 struct nf_hook_ops *elem;
186 int err; 205 int err;
187 206
188 hook_entry = rcu_dereference(entry->state.hook_entries);
189 elem = &hook_entry->ops;
190
191 nf_queue_entry_release_refs(entry); 207 nf_queue_entry_release_refs(entry);
192 208
193 /* Continue traversal iff userspace said ok... */ 209 /* Continue traversal iff userspace said ok... */
194 if (verdict == NF_REPEAT) 210 if (verdict == NF_REPEAT)
195 verdict = elem->hook(elem->priv, skb, &entry->state); 211 verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
196 212
197 if (verdict == NF_ACCEPT) { 213 if (verdict == NF_ACCEPT) {
198 afinfo = nf_get_afinfo(entry->state.pf); 214 afinfo = nf_get_afinfo(entry->state.pf);
@@ -200,8 +216,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
200 verdict = NF_DROP; 216 verdict = NF_DROP;
201 } 217 }
202 218
203 entry->state.thresh = INT_MIN;
204
205 if (verdict == NF_ACCEPT) { 219 if (verdict == NF_ACCEPT) {
206 hook_entry = rcu_dereference(hook_entry->next); 220 hook_entry = rcu_dereference(hook_entry->next);
207 if (hook_entry) 221 if (hook_entry)
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index c8a4a48bced9..7c6d1fbe38b9 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -24,7 +24,7 @@
24#include <net/netfilter/nf_conntrack_synproxy.h> 24#include <net/netfilter/nf_conntrack_synproxy.h>
25#include <net/netfilter/nf_conntrack_zones.h> 25#include <net/netfilter/nf_conntrack_zones.h>
26 26
27int synproxy_net_id; 27unsigned int synproxy_net_id;
28EXPORT_SYMBOL_GPL(synproxy_net_id); 28EXPORT_SYMBOL_GPL(synproxy_net_id);
29 29
30bool 30bool
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e5194f6f906c..a019a87e58ee 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -22,6 +22,7 @@
22#include <net/sock.h> 22#include <net/sock.h>
23 23
24static LIST_HEAD(nf_tables_expressions); 24static LIST_HEAD(nf_tables_expressions);
25static LIST_HEAD(nf_tables_objects);
25 26
26/** 27/**
27 * nft_register_afinfo - register nf_tables address family info 28 * nft_register_afinfo - register nf_tables address family info
@@ -110,12 +111,12 @@ static void nft_ctx_init(struct nft_ctx *ctx,
110 ctx->seq = nlh->nlmsg_seq; 111 ctx->seq = nlh->nlmsg_seq;
111} 112}
112 113
113static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, 114static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
114 u32 size) 115 int msg_type, u32 size, gfp_t gfp)
115{ 116{
116 struct nft_trans *trans; 117 struct nft_trans *trans;
117 118
118 trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL); 119 trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
119 if (trans == NULL) 120 if (trans == NULL)
120 return NULL; 121 return NULL;
121 122
@@ -125,6 +126,12 @@ static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
125 return trans; 126 return trans;
126} 127}
127 128
129static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
130 int msg_type, u32 size)
131{
132 return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
133}
134
128static void nft_trans_destroy(struct nft_trans *trans) 135static void nft_trans_destroy(struct nft_trans *trans)
129{ 136{
130 list_del(&trans->list); 137 list_del(&trans->list);
@@ -304,6 +311,38 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
304 return err; 311 return err;
305} 312}
306 313
314static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type,
315 struct nft_object *obj)
316{
317 struct nft_trans *trans;
318
319 trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj));
320 if (trans == NULL)
321 return -ENOMEM;
322
323 if (msg_type == NFT_MSG_NEWOBJ)
324 nft_activate_next(ctx->net, obj);
325
326 nft_trans_obj(trans) = obj;
327 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
328
329 return 0;
330}
331
332static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
333{
334 int err;
335
336 err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj);
337 if (err < 0)
338 return err;
339
340 nft_deactivate_next(ctx->net, obj);
341 ctx->table->use--;
342
343 return err;
344}
345
307/* 346/*
308 * Tables 347 * Tables
309 */ 348 */
@@ -688,6 +727,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
688 nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); 727 nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
689 INIT_LIST_HEAD(&table->chains); 728 INIT_LIST_HEAD(&table->chains);
690 INIT_LIST_HEAD(&table->sets); 729 INIT_LIST_HEAD(&table->sets);
730 INIT_LIST_HEAD(&table->objects);
691 table->flags = flags; 731 table->flags = flags;
692 732
693 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); 733 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
@@ -709,6 +749,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
709{ 749{
710 int err; 750 int err;
711 struct nft_chain *chain, *nc; 751 struct nft_chain *chain, *nc;
752 struct nft_object *obj, *ne;
712 struct nft_set *set, *ns; 753 struct nft_set *set, *ns;
713 754
714 list_for_each_entry(chain, &ctx->table->chains, list) { 755 list_for_each_entry(chain, &ctx->table->chains, list) {
@@ -735,6 +776,12 @@ static int nft_flush_table(struct nft_ctx *ctx)
735 goto out; 776 goto out;
736 } 777 }
737 778
779 list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
780 err = nft_delobj(ctx, obj);
781 if (err < 0)
782 goto out;
783 }
784
738 list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { 785 list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
739 if (!nft_is_active_next(ctx->net, chain)) 786 if (!nft_is_active_next(ctx->net, chain))
740 continue; 787 continue;
@@ -2411,6 +2458,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
2411 [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, 2458 [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
2412 [NFTA_SET_USERDATA] = { .type = NLA_BINARY, 2459 [NFTA_SET_USERDATA] = { .type = NLA_BINARY,
2413 .len = NFT_USERDATA_MAXLEN }, 2460 .len = NFT_USERDATA_MAXLEN },
2461 [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
2414}; 2462};
2415 2463
2416static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { 2464static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2462,6 +2510,7 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
2462 } 2510 }
2463 return ERR_PTR(-ENOENT); 2511 return ERR_PTR(-ENOENT);
2464} 2512}
2513EXPORT_SYMBOL_GPL(nf_tables_set_lookup);
2465 2514
2466struct nft_set *nf_tables_set_lookup_byid(const struct net *net, 2515struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
2467 const struct nlattr *nla, 2516 const struct nlattr *nla,
@@ -2480,6 +2529,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
2480 } 2529 }
2481 return ERR_PTR(-ENOENT); 2530 return ERR_PTR(-ENOENT);
2482} 2531}
2532EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid);
2483 2533
2484static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, 2534static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
2485 const char *name) 2535 const char *name)
@@ -2568,6 +2618,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
2568 if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) 2618 if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
2569 goto nla_put_failure; 2619 goto nla_put_failure;
2570 } 2620 }
2621 if (set->flags & NFT_SET_OBJECT &&
2622 nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
2623 goto nla_put_failure;
2571 2624
2572 if (set->timeout && 2625 if (set->timeout &&
2573 nla_put_be64(skb, NFTA_SET_TIMEOUT, 2626 nla_put_be64(skb, NFTA_SET_TIMEOUT,
@@ -2797,7 +2850,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
2797 unsigned int size; 2850 unsigned int size;
2798 bool create; 2851 bool create;
2799 u64 timeout; 2852 u64 timeout;
2800 u32 ktype, dtype, flags, policy, gc_int; 2853 u32 ktype, dtype, flags, policy, gc_int, objtype;
2801 struct nft_set_desc desc; 2854 struct nft_set_desc desc;
2802 unsigned char *udata; 2855 unsigned char *udata;
2803 u16 udlen; 2856 u16 udlen;
@@ -2827,11 +2880,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
2827 flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); 2880 flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
2828 if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | 2881 if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
2829 NFT_SET_INTERVAL | NFT_SET_TIMEOUT | 2882 NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
2830 NFT_SET_MAP | NFT_SET_EVAL)) 2883 NFT_SET_MAP | NFT_SET_EVAL |
2884 NFT_SET_OBJECT))
2831 return -EINVAL; 2885 return -EINVAL;
2832 /* Only one of both operations is supported */ 2886 /* Only one of these operations is supported */
2833 if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) == 2887 if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ==
2834 (NFT_SET_MAP | NFT_SET_EVAL)) 2888 (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT))
2835 return -EOPNOTSUPP; 2889 return -EOPNOTSUPP;
2836 } 2890 }
2837 2891
@@ -2856,6 +2910,19 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
2856 } else if (flags & NFT_SET_MAP) 2910 } else if (flags & NFT_SET_MAP)
2857 return -EINVAL; 2911 return -EINVAL;
2858 2912
2913 if (nla[NFTA_SET_OBJ_TYPE] != NULL) {
2914 if (!(flags & NFT_SET_OBJECT))
2915 return -EINVAL;
2916
2917 objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
2918 if (objtype == NFT_OBJECT_UNSPEC ||
2919 objtype > NFT_OBJECT_MAX)
2920 return -EINVAL;
2921 } else if (flags & NFT_SET_OBJECT)
2922 return -EINVAL;
2923 else
2924 objtype = NFT_OBJECT_UNSPEC;
2925
2859 timeout = 0; 2926 timeout = 0;
2860 if (nla[NFTA_SET_TIMEOUT] != NULL) { 2927 if (nla[NFTA_SET_TIMEOUT] != NULL) {
2861 if (!(flags & NFT_SET_TIMEOUT)) 2928 if (!(flags & NFT_SET_TIMEOUT))
@@ -2943,6 +3010,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
2943 set->ktype = ktype; 3010 set->ktype = ktype;
2944 set->klen = desc.klen; 3011 set->klen = desc.klen;
2945 set->dtype = dtype; 3012 set->dtype = dtype;
3013 set->objtype = objtype;
2946 set->dlen = desc.dlen; 3014 set->dlen = desc.dlen;
2947 set->flags = flags; 3015 set->flags = flags;
2948 set->size = desc.size; 3016 set->size = desc.size;
@@ -3064,6 +3132,7 @@ bind:
3064 list_add_tail_rcu(&binding->list, &set->bindings); 3132 list_add_tail_rcu(&binding->list, &set->bindings);
3065 return 0; 3133 return 0;
3066} 3134}
3135EXPORT_SYMBOL_GPL(nf_tables_bind_set);
3067 3136
3068void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, 3137void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
3069 struct nft_set_binding *binding) 3138 struct nft_set_binding *binding)
@@ -3074,6 +3143,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
3074 nft_is_active(ctx->net, set)) 3143 nft_is_active(ctx->net, set))
3075 nf_tables_set_destroy(ctx, set); 3144 nf_tables_set_destroy(ctx, set);
3076} 3145}
3146EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
3077 3147
3078const struct nft_set_ext_type nft_set_ext_types[] = { 3148const struct nft_set_ext_type nft_set_ext_types[] = {
3079 [NFT_SET_EXT_KEY] = { 3149 [NFT_SET_EXT_KEY] = {
@@ -3085,6 +3155,10 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
3085 [NFT_SET_EXT_EXPR] = { 3155 [NFT_SET_EXT_EXPR] = {
3086 .align = __alignof__(struct nft_expr), 3156 .align = __alignof__(struct nft_expr),
3087 }, 3157 },
3158 [NFT_SET_EXT_OBJREF] = {
3159 .len = sizeof(struct nft_object *),
3160 .align = __alignof__(struct nft_object *),
3161 },
3088 [NFT_SET_EXT_FLAGS] = { 3162 [NFT_SET_EXT_FLAGS] = {
3089 .len = sizeof(u8), 3163 .len = sizeof(u8),
3090 .align = __alignof__(u8), 3164 .align = __alignof__(u8),
@@ -3173,6 +3247,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
3173 nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0) 3247 nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0)
3174 goto nla_put_failure; 3248 goto nla_put_failure;
3175 3249
3250 if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
3251 nla_put_string(skb, NFTA_SET_ELEM_OBJREF,
3252 (*nft_set_ext_obj(ext))->name) < 0)
3253 goto nla_put_failure;
3254
3176 if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && 3255 if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
3177 nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, 3256 nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
3178 htonl(*nft_set_ext_flags(ext)))) 3257 htonl(*nft_set_ext_flags(ext))))
@@ -3467,7 +3546,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
3467 nft_data_uninit(nft_set_ext_data(ext), set->dtype); 3546 nft_data_uninit(nft_set_ext_data(ext), set->dtype);
3468 if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) 3547 if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
3469 nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); 3548 nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
3470 3549 if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
3550 (*nft_set_ext_obj(ext))->use--;
3471 kfree(elem); 3551 kfree(elem);
3472} 3552}
3473EXPORT_SYMBOL_GPL(nft_set_elem_destroy); 3553EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -3492,11 +3572,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3492 const struct nlattr *attr, u32 nlmsg_flags) 3572 const struct nlattr *attr, u32 nlmsg_flags)
3493{ 3573{
3494 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; 3574 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
3575 u8 genmask = nft_genmask_next(ctx->net);
3495 struct nft_data_desc d1, d2; 3576 struct nft_data_desc d1, d2;
3496 struct nft_set_ext_tmpl tmpl; 3577 struct nft_set_ext_tmpl tmpl;
3497 struct nft_set_ext *ext, *ext2; 3578 struct nft_set_ext *ext, *ext2;
3498 struct nft_set_elem elem; 3579 struct nft_set_elem elem;
3499 struct nft_set_binding *binding; 3580 struct nft_set_binding *binding;
3581 struct nft_object *obj = NULL;
3500 struct nft_userdata *udata; 3582 struct nft_userdata *udata;
3501 struct nft_data data; 3583 struct nft_data data;
3502 enum nft_registers dreg; 3584 enum nft_registers dreg;
@@ -3559,6 +3641,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3559 nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); 3641 nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
3560 } 3642 }
3561 3643
3644 if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
3645 if (!(set->flags & NFT_SET_OBJECT)) {
3646 err = -EINVAL;
3647 goto err2;
3648 }
3649 obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
3650 set->objtype, genmask);
3651 if (IS_ERR(obj)) {
3652 err = PTR_ERR(obj);
3653 goto err2;
3654 }
3655 nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
3656 }
3657
3562 if (nla[NFTA_SET_ELEM_DATA] != NULL) { 3658 if (nla[NFTA_SET_ELEM_DATA] != NULL) {
3563 err = nft_data_init(ctx, &data, sizeof(data), &d2, 3659 err = nft_data_init(ctx, &data, sizeof(data), &d2,
3564 nla[NFTA_SET_ELEM_DATA]); 3660 nla[NFTA_SET_ELEM_DATA]);
@@ -3617,6 +3713,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3617 udata->len = ulen - 1; 3713 udata->len = ulen - 1;
3618 nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); 3714 nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
3619 } 3715 }
3716 if (obj) {
3717 *nft_set_ext_obj(ext) = obj;
3718 obj->use++;
3719 }
3620 3720
3621 trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); 3721 trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
3622 if (trans == NULL) 3722 if (trans == NULL)
@@ -3626,10 +3726,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3626 err = set->ops->insert(ctx->net, set, &elem, &ext2); 3726 err = set->ops->insert(ctx->net, set, &elem, &ext2);
3627 if (err) { 3727 if (err) {
3628 if (err == -EEXIST) { 3728 if (err == -EEXIST) {
3629 if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && 3729 if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
3630 nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && 3730 nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
3631 memcmp(nft_set_ext_data(ext), 3731 memcmp(nft_set_ext_data(ext),
3632 nft_set_ext_data(ext2), set->dlen) != 0) 3732 nft_set_ext_data(ext2), set->dlen) != 0) ||
3733 (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
3734 nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) &&
3735 *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2)))
3633 err = -EBUSY; 3736 err = -EBUSY;
3634 else if (!(nlmsg_flags & NLM_F_EXCL)) 3737 else if (!(nlmsg_flags & NLM_F_EXCL))
3635 err = 0; 3738 err = 0;
@@ -3779,6 +3882,34 @@ err1:
3779 return err; 3882 return err;
3780} 3883}
3781 3884
3885static int nft_flush_set(const struct nft_ctx *ctx,
3886 const struct nft_set *set,
3887 const struct nft_set_iter *iter,
3888 const struct nft_set_elem *elem)
3889{
3890 struct nft_trans *trans;
3891 int err;
3892
3893 trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
3894 sizeof(struct nft_trans_elem), GFP_ATOMIC);
3895 if (!trans)
3896 return -ENOMEM;
3897
3898 if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) {
3899 err = -ENOENT;
3900 goto err1;
3901 }
3902
3903 nft_trans_elem_set(trans) = (struct nft_set *)set;
3904 nft_trans_elem(trans) = *((struct nft_set_elem *)elem);
3905 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
3906
3907 return 0;
3908err1:
3909 kfree(trans);
3910 return err;
3911}
3912
3782static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, 3913static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
3783 struct sk_buff *skb, const struct nlmsghdr *nlh, 3914 struct sk_buff *skb, const struct nlmsghdr *nlh,
3784 const struct nlattr * const nla[]) 3915 const struct nlattr * const nla[])
@@ -3789,9 +3920,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
3789 struct nft_ctx ctx; 3920 struct nft_ctx ctx;
3790 int rem, err = 0; 3921 int rem, err = 0;
3791 3922
3792 if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
3793 return -EINVAL;
3794
3795 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); 3923 err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
3796 if (err < 0) 3924 if (err < 0)
3797 return err; 3925 return err;
@@ -3803,6 +3931,18 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
3803 if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) 3931 if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
3804 return -EBUSY; 3932 return -EBUSY;
3805 3933
3934 if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
3935 struct nft_set_dump_args args = {
3936 .iter = {
3937 .genmask = genmask,
3938 .fn = nft_flush_set,
3939 },
3940 };
3941 set->ops->walk(&ctx, set, &args.iter);
3942
3943 return args.iter.err;
3944 }
3945
3806 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 3946 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
3807 err = nft_del_setelem(&ctx, set, attr); 3947 err = nft_del_setelem(&ctx, set, attr);
3808 if (err < 0) 3948 if (err < 0)
@@ -3838,6 +3978,500 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
3838} 3978}
3839EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); 3979EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
3840 3980
3981/*
3982 * Stateful objects
3983 */
3984
3985/**
3986 * nft_register_obj- register nf_tables stateful object type
3987 * @obj: object type
3988 *
3989 * Registers the object type for use with nf_tables. Returns zero on
3990 * success or a negative errno code otherwise.
3991 */
3992int nft_register_obj(struct nft_object_type *obj_type)
3993{
3994 if (obj_type->type == NFT_OBJECT_UNSPEC)
3995 return -EINVAL;
3996
3997 nfnl_lock(NFNL_SUBSYS_NFTABLES);
3998 list_add_rcu(&obj_type->list, &nf_tables_objects);
3999 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
4000 return 0;
4001}
4002EXPORT_SYMBOL_GPL(nft_register_obj);
4003
4004/**
4005 * nft_unregister_obj - unregister nf_tables object type
4006 * @obj: object type
4007 *
4008 * Unregisters the object type for use with nf_tables.
4009 */
4010void nft_unregister_obj(struct nft_object_type *obj_type)
4011{
4012 nfnl_lock(NFNL_SUBSYS_NFTABLES);
4013 list_del_rcu(&obj_type->list);
4014 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
4015}
4016EXPORT_SYMBOL_GPL(nft_unregister_obj);
4017
4018struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
4019 const struct nlattr *nla,
4020 u32 objtype, u8 genmask)
4021{
4022 struct nft_object *obj;
4023
4024 list_for_each_entry(obj, &table->objects, list) {
4025 if (!nla_strcmp(nla, obj->name) &&
4026 objtype == obj->type->type &&
4027 nft_active_genmask(obj, genmask))
4028 return obj;
4029 }
4030 return ERR_PTR(-ENOENT);
4031}
4032EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
4033
4034static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
4035 [NFTA_OBJ_TABLE] = { .type = NLA_STRING },
4036 [NFTA_OBJ_NAME] = { .type = NLA_STRING },
4037 [NFTA_OBJ_TYPE] = { .type = NLA_U32 },
4038 [NFTA_OBJ_DATA] = { .type = NLA_NESTED },
4039};
4040
4041static struct nft_object *nft_obj_init(const struct nft_object_type *type,
4042 const struct nlattr *attr)
4043{
4044 struct nlattr *tb[type->maxattr + 1];
4045 struct nft_object *obj;
4046 int err;
4047
4048 if (attr) {
4049 err = nla_parse_nested(tb, type->maxattr, attr, type->policy);
4050 if (err < 0)
4051 goto err1;
4052 } else {
4053 memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
4054 }
4055
4056 err = -ENOMEM;
4057 obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL);
4058 if (obj == NULL)
4059 goto err1;
4060
4061 err = type->init((const struct nlattr * const *)tb, obj);
4062 if (err < 0)
4063 goto err2;
4064
4065 obj->type = type;
4066 return obj;
4067err2:
4068 kfree(obj);
4069err1:
4070 return ERR_PTR(err);
4071}
4072
4073static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
4074 struct nft_object *obj, bool reset)
4075{
4076 struct nlattr *nest;
4077
4078 nest = nla_nest_start(skb, attr);
4079 if (!nest)
4080 goto nla_put_failure;
4081 if (obj->type->dump(skb, obj, reset) < 0)
4082 goto nla_put_failure;
4083 nla_nest_end(skb, nest);
4084 return 0;
4085
4086nla_put_failure:
4087 return -1;
4088}
4089
4090static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
4091{
4092 const struct nft_object_type *type;
4093
4094 list_for_each_entry(type, &nf_tables_objects, list) {
4095 if (objtype == type->type)
4096 return type;
4097 }
4098 return NULL;
4099}
4100
4101static const struct nft_object_type *nft_obj_type_get(u32 objtype)
4102{
4103 const struct nft_object_type *type;
4104
4105 type = __nft_obj_type_get(objtype);
4106 if (type != NULL && try_module_get(type->owner))
4107 return type;
4108
4109#ifdef CONFIG_MODULES
4110 if (type == NULL) {
4111 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
4112 request_module("nft-obj-%u", objtype);
4113 nfnl_lock(NFNL_SUBSYS_NFTABLES);
4114 if (__nft_obj_type_get(objtype))
4115 return ERR_PTR(-EAGAIN);
4116 }
4117#endif
4118 return ERR_PTR(-ENOENT);
4119}
4120
4121static int nf_tables_newobj(struct net *net, struct sock *nlsk,
4122 struct sk_buff *skb, const struct nlmsghdr *nlh,
4123 const struct nlattr * const nla[])
4124{
4125 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
4126 const struct nft_object_type *type;
4127 u8 genmask = nft_genmask_next(net);
4128 int family = nfmsg->nfgen_family;
4129 struct nft_af_info *afi;
4130 struct nft_table *table;
4131 struct nft_object *obj;
4132 struct nft_ctx ctx;
4133 u32 objtype;
4134 int err;
4135
4136 if (!nla[NFTA_OBJ_TYPE] ||
4137 !nla[NFTA_OBJ_NAME] ||
4138 !nla[NFTA_OBJ_DATA])
4139 return -EINVAL;
4140
4141 afi = nf_tables_afinfo_lookup(net, family, true);
4142 if (IS_ERR(afi))
4143 return PTR_ERR(afi);
4144
4145 table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
4146 if (IS_ERR(table))
4147 return PTR_ERR(table);
4148
4149 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
4150 obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
4151 if (IS_ERR(obj)) {
4152 err = PTR_ERR(obj);
4153 if (err != -ENOENT)
4154 return err;
4155
4156 obj = NULL;
4157 }
4158
4159 if (obj != NULL) {
4160 if (nlh->nlmsg_flags & NLM_F_EXCL)
4161 return -EEXIST;
4162
4163 return 0;
4164 }
4165
4166 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
4167
4168 type = nft_obj_type_get(objtype);
4169 if (IS_ERR(type))
4170 return PTR_ERR(type);
4171
4172 obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]);
4173 if (IS_ERR(obj)) {
4174 err = PTR_ERR(obj);
4175 goto err1;
4176 }
4177 obj->table = table;
4178 nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
4179
4180 err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
4181 if (err < 0)
4182 goto err2;
4183
4184 list_add_tail_rcu(&obj->list, &table->objects);
4185 table->use++;
4186 return 0;
4187err2:
4188 if (obj->type->destroy)
4189 obj->type->destroy(obj);
4190 kfree(obj);
4191err1:
4192 module_put(type->owner);
4193 return err;
4194}
4195
4196static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
4197 u32 portid, u32 seq, int event, u32 flags,
4198 int family, const struct nft_table *table,
4199 struct nft_object *obj, bool reset)
4200{
4201 struct nfgenmsg *nfmsg;
4202 struct nlmsghdr *nlh;
4203
4204 event |= NFNL_SUBSYS_NFTABLES << 8;
4205 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
4206 if (nlh == NULL)
4207 goto nla_put_failure;
4208
4209 nfmsg = nlmsg_data(nlh);
4210 nfmsg->nfgen_family = family;
4211 nfmsg->version = NFNETLINK_V0;
4212 nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
4213
4214 if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
4215 nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
4216 nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
4217 nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
4218 nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
4219 goto nla_put_failure;
4220
4221 nlmsg_end(skb, nlh);
4222 return 0;
4223
4224nla_put_failure:
4225 nlmsg_trim(skb, nlh);
4226 return -1;
4227}
4228
4229struct nft_obj_filter {
4230 char table[NFT_OBJ_MAXNAMELEN];
4231 u32 type;
4232};
4233
4234static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
4235{
4236 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
4237 const struct nft_af_info *afi;
4238 const struct nft_table *table;
4239 unsigned int idx = 0, s_idx = cb->args[0];
4240 struct nft_obj_filter *filter = cb->data;
4241 struct net *net = sock_net(skb->sk);
4242 int family = nfmsg->nfgen_family;
4243 struct nft_object *obj;
4244 bool reset = false;
4245
4246 if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
4247 reset = true;
4248
4249 rcu_read_lock();
4250 cb->seq = net->nft.base_seq;
4251
4252 list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
4253 if (family != NFPROTO_UNSPEC && family != afi->family)
4254 continue;
4255
4256 list_for_each_entry_rcu(table, &afi->tables, list) {
4257 list_for_each_entry_rcu(obj, &table->objects, list) {
4258 if (!nft_is_active(net, obj))
4259 goto cont;
4260 if (idx < s_idx)
4261 goto cont;
4262 if (idx > s_idx)
4263 memset(&cb->args[1], 0,
4264 sizeof(cb->args) - sizeof(cb->args[0]));
4265 if (filter->table[0] &&
4266 strcmp(filter->table, table->name))
4267 goto cont;
4268 if (filter->type != NFT_OBJECT_UNSPEC &&
4269 obj->type->type != filter->type)
4270 goto cont;
4271
4272 if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
4273 cb->nlh->nlmsg_seq,
4274 NFT_MSG_NEWOBJ,
4275 NLM_F_MULTI | NLM_F_APPEND,
4276 afi->family, table, obj, reset) < 0)
4277 goto done;
4278
4279 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
4280cont:
4281 idx++;
4282 }
4283 }
4284 }
4285done:
4286 rcu_read_unlock();
4287
4288 cb->args[0] = idx;
4289 return skb->len;
4290}
4291
4292static int nf_tables_dump_obj_done(struct netlink_callback *cb)
4293{
4294 kfree(cb->data);
4295
4296 return 0;
4297}
4298
4299static struct nft_obj_filter *
4300nft_obj_filter_alloc(const struct nlattr * const nla[])
4301{
4302 struct nft_obj_filter *filter;
4303
4304 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
4305 if (!filter)
4306 return ERR_PTR(-ENOMEM);
4307
4308 if (nla[NFTA_OBJ_TABLE])
4309 nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
4310 NFT_TABLE_MAXNAMELEN);
4311 if (nla[NFTA_OBJ_TYPE])
4312 filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
4313
4314 return filter;
4315}
4316
4317static int nf_tables_getobj(struct net *net, struct sock *nlsk,
4318 struct sk_buff *skb, const struct nlmsghdr *nlh,
4319 const struct nlattr * const nla[])
4320{
4321 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
4322 u8 genmask = nft_genmask_cur(net);
4323 int family = nfmsg->nfgen_family;
4324 const struct nft_af_info *afi;
4325 const struct nft_table *table;
4326 struct nft_object *obj;
4327 struct sk_buff *skb2;
4328 bool reset = false;
4329 u32 objtype;
4330 int err;
4331
4332 if (nlh->nlmsg_flags & NLM_F_DUMP) {
4333 struct netlink_dump_control c = {
4334 .dump = nf_tables_dump_obj,
4335 .done = nf_tables_dump_obj_done,
4336 };
4337
4338 if (nla[NFTA_OBJ_TABLE] ||
4339 nla[NFTA_OBJ_TYPE]) {
4340 struct nft_obj_filter *filter;
4341
4342 filter = nft_obj_filter_alloc(nla);
4343 if (IS_ERR(filter))
4344 return -ENOMEM;
4345
4346 c.data = filter;
4347 }
4348 return netlink_dump_start(nlsk, skb, nlh, &c);
4349 }
4350
4351 if (!nla[NFTA_OBJ_NAME] ||
4352 !nla[NFTA_OBJ_TYPE])
4353 return -EINVAL;
4354
4355 afi = nf_tables_afinfo_lookup(net, family, false);
4356 if (IS_ERR(afi))
4357 return PTR_ERR(afi);
4358
4359 table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
4360 if (IS_ERR(table))
4361 return PTR_ERR(table);
4362
4363 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
4364 obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
4365 if (IS_ERR(obj))
4366 return PTR_ERR(obj);
4367
4368 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
4369 if (!skb2)
4370 return -ENOMEM;
4371
4372 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
4373 reset = true;
4374
4375 err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
4376 nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
4377 family, table, obj, reset);
4378 if (err < 0)
4379 goto err;
4380
4381 return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
4382err:
4383 kfree_skb(skb2);
4384 return err;
4385
4386 return 0;
4387}
4388
4389static void nft_obj_destroy(struct nft_object *obj)
4390{
4391 if (obj->type->destroy)
4392 obj->type->destroy(obj);
4393
4394 module_put(obj->type->owner);
4395 kfree(obj);
4396}
4397
4398static int nf_tables_delobj(struct net *net, struct sock *nlsk,
4399 struct sk_buff *skb, const struct nlmsghdr *nlh,
4400 const struct nlattr * const nla[])
4401{
4402 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
4403 u8 genmask = nft_genmask_next(net);
4404 int family = nfmsg->nfgen_family;
4405 struct nft_af_info *afi;
4406 struct nft_table *table;
4407 struct nft_object *obj;
4408 struct nft_ctx ctx;
4409 u32 objtype;
4410
4411 if (!nla[NFTA_OBJ_TYPE] ||
4412 !nla[NFTA_OBJ_NAME])
4413 return -EINVAL;
4414
4415 afi = nf_tables_afinfo_lookup(net, family, true);
4416 if (IS_ERR(afi))
4417 return PTR_ERR(afi);
4418
4419 table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
4420 if (IS_ERR(table))
4421 return PTR_ERR(table);
4422
4423 objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
4424 obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
4425 if (IS_ERR(obj))
4426 return PTR_ERR(obj);
4427 if (obj->use > 0)
4428 return -EBUSY;
4429
4430 nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
4431
4432 return nft_delobj(&ctx, obj);
4433}
4434
4435int nft_obj_notify(struct net *net, struct nft_table *table,
4436 struct nft_object *obj, u32 portid, u32 seq, int event,
4437 int family, int report, gfp_t gfp)
4438{
4439 struct sk_buff *skb;
4440 int err;
4441
4442 if (!report &&
4443 !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
4444 return 0;
4445
4446 err = -ENOBUFS;
4447 skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
4448 if (skb == NULL)
4449 goto err;
4450
4451 err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family,
4452 table, obj, false);
4453 if (err < 0) {
4454 kfree_skb(skb);
4455 goto err;
4456 }
4457
4458 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
4459err:
4460 if (err < 0) {
4461 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
4462 }
4463 return err;
4464}
4465EXPORT_SYMBOL_GPL(nft_obj_notify);
4466
4467static int nf_tables_obj_notify(const struct nft_ctx *ctx,
4468 struct nft_object *obj, int event)
4469{
4470 return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
4471 ctx->seq, event, ctx->afi->family, ctx->report,
4472 GFP_KERNEL);
4473}
4474
3841static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, 4475static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
3842 u32 portid, u32 seq) 4476 u32 portid, u32 seq)
3843{ 4477{
@@ -3998,6 +4632,26 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
3998 [NFT_MSG_GETGEN] = { 4632 [NFT_MSG_GETGEN] = {
3999 .call = nf_tables_getgen, 4633 .call = nf_tables_getgen,
4000 }, 4634 },
4635 [NFT_MSG_NEWOBJ] = {
4636 .call_batch = nf_tables_newobj,
4637 .attr_count = NFTA_OBJ_MAX,
4638 .policy = nft_obj_policy,
4639 },
4640 [NFT_MSG_GETOBJ] = {
4641 .call = nf_tables_getobj,
4642 .attr_count = NFTA_OBJ_MAX,
4643 .policy = nft_obj_policy,
4644 },
4645 [NFT_MSG_DELOBJ] = {
4646 .call_batch = nf_tables_delobj,
4647 .attr_count = NFTA_OBJ_MAX,
4648 .policy = nft_obj_policy,
4649 },
4650 [NFT_MSG_GETOBJ_RESET] = {
4651 .call = nf_tables_getobj,
4652 .attr_count = NFTA_OBJ_MAX,
4653 .policy = nft_obj_policy,
4654 },
4001}; 4655};
4002 4656
4003static void nft_chain_commit_update(struct nft_trans *trans) 4657static void nft_chain_commit_update(struct nft_trans *trans)
@@ -4040,6 +4694,9 @@ static void nf_tables_commit_release(struct nft_trans *trans)
4040 nft_set_elem_destroy(nft_trans_elem_set(trans), 4694 nft_set_elem_destroy(nft_trans_elem_set(trans),
4041 nft_trans_elem(trans).priv, true); 4695 nft_trans_elem(trans).priv, true);
4042 break; 4696 break;
4697 case NFT_MSG_DELOBJ:
4698 nft_obj_destroy(nft_trans_obj(trans));
4699 break;
4043 } 4700 }
4044 kfree(trans); 4701 kfree(trans);
4045} 4702}
@@ -4147,6 +4804,17 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
4147 atomic_dec(&te->set->nelems); 4804 atomic_dec(&te->set->nelems);
4148 te->set->ndeact--; 4805 te->set->ndeact--;
4149 break; 4806 break;
4807 case NFT_MSG_NEWOBJ:
4808 nft_clear(net, nft_trans_obj(trans));
4809 nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
4810 NFT_MSG_NEWOBJ);
4811 nft_trans_destroy(trans);
4812 break;
4813 case NFT_MSG_DELOBJ:
4814 list_del_rcu(&nft_trans_obj(trans)->list);
4815 nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
4816 NFT_MSG_DELOBJ);
4817 break;
4150 } 4818 }
4151 } 4819 }
4152 4820
@@ -4181,6 +4849,9 @@ static void nf_tables_abort_release(struct nft_trans *trans)
4181 nft_set_elem_destroy(nft_trans_elem_set(trans), 4849 nft_set_elem_destroy(nft_trans_elem_set(trans),
4182 nft_trans_elem(trans).priv, true); 4850 nft_trans_elem(trans).priv, true);
4183 break; 4851 break;
4852 case NFT_MSG_NEWOBJ:
4853 nft_obj_destroy(nft_trans_obj(trans));
4854 break;
4184 } 4855 }
4185 kfree(trans); 4856 kfree(trans);
4186} 4857}
@@ -4261,6 +4932,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
4261 4932
4262 nft_trans_destroy(trans); 4933 nft_trans_destroy(trans);
4263 break; 4934 break;
4935 case NFT_MSG_NEWOBJ:
4936 trans->ctx.table->use--;
4937 list_del_rcu(&nft_trans_obj(trans)->list);
4938 break;
4939 case NFT_MSG_DELOBJ:
4940 trans->ctx.table->use++;
4941 nft_clear(trans->ctx.net, nft_trans_obj(trans));
4942 nft_trans_destroy(trans);
4943 break;
4264 } 4944 }
4265 } 4945 }
4266 4946
@@ -4807,6 +5487,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
4807{ 5487{
4808 struct nft_table *table, *nt; 5488 struct nft_table *table, *nt;
4809 struct nft_chain *chain, *nc; 5489 struct nft_chain *chain, *nc;
5490 struct nft_object *obj, *ne;
4810 struct nft_rule *rule, *nr; 5491 struct nft_rule *rule, *nr;
4811 struct nft_set *set, *ns; 5492 struct nft_set *set, *ns;
4812 struct nft_ctx ctx = { 5493 struct nft_ctx ctx = {
@@ -4833,6 +5514,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
4833 table->use--; 5514 table->use--;
4834 nft_set_destroy(set); 5515 nft_set_destroy(set);
4835 } 5516 }
5517 list_for_each_entry_safe(obj, ne, &table->objects, list) {
5518 list_del(&obj->list);
5519 table->use--;
5520 nft_obj_destroy(obj);
5521 }
4836 list_for_each_entry_safe(chain, nc, &table->chains, list) { 5522 list_for_each_entry_safe(chain, nc, &table->chains, list) {
4837 list_del(&chain->list); 5523 list_del(&chain->list);
4838 table->use--; 5524 table->use--;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 0dd5c695482f..65dbeadcb118 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -53,10 +53,10 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
53 53
54 nft_trace_notify(info); 54 nft_trace_notify(info);
55 55
56 nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, 56 nf_log_trace(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
57 pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", 57 nft_in(pkt), nft_out(pkt), &trace_loginfo,
58 chain->table->name, chain->name, comments[type], 58 "TRACE: %s:%s:%s:%u ",
59 rulenum); 59 chain->table->name, chain->name, comments[type], rulenum);
60} 60}
61 61
62static inline void nft_trace_packet(struct nft_traceinfo *info, 62static inline void nft_trace_packet(struct nft_traceinfo *info,
@@ -124,7 +124,7 @@ unsigned int
124nft_do_chain(struct nft_pktinfo *pkt, void *priv) 124nft_do_chain(struct nft_pktinfo *pkt, void *priv)
125{ 125{
126 const struct nft_chain *chain = priv, *basechain = chain; 126 const struct nft_chain *chain = priv, *basechain = chain;
127 const struct net *net = pkt->net; 127 const struct net *net = nft_net(pkt);
128 const struct nft_rule *rule; 128 const struct nft_rule *rule;
129 const struct nft_expr *expr, *last; 129 const struct nft_expr *expr, *last;
130 struct nft_regs regs; 130 struct nft_regs regs;
@@ -178,6 +178,7 @@ next_rule:
178 case NF_ACCEPT: 178 case NF_ACCEPT:
179 case NF_DROP: 179 case NF_DROP:
180 case NF_QUEUE: 180 case NF_QUEUE:
181 case NF_STOLEN:
181 nft_trace_packet(&info, chain, rule, 182 nft_trace_packet(&info, chain, rule,
182 rulenum, NFT_TRACETYPE_RULE); 183 rulenum, NFT_TRACETYPE_RULE);
183 return regs.verdict.code; 184 return regs.verdict.code;
@@ -231,68 +232,40 @@ next_rule:
231} 232}
232EXPORT_SYMBOL_GPL(nft_do_chain); 233EXPORT_SYMBOL_GPL(nft_do_chain);
233 234
235static struct nft_expr_type *nft_basic_types[] = {
236 &nft_imm_type,
237 &nft_cmp_type,
238 &nft_lookup_type,
239 &nft_bitwise_type,
240 &nft_byteorder_type,
241 &nft_payload_type,
242 &nft_dynset_type,
243 &nft_range_type,
244};
245
234int __init nf_tables_core_module_init(void) 246int __init nf_tables_core_module_init(void)
235{ 247{
236 int err; 248 int err, i;
237
238 err = nft_immediate_module_init();
239 if (err < 0)
240 goto err1;
241
242 err = nft_cmp_module_init();
243 if (err < 0)
244 goto err2;
245
246 err = nft_lookup_module_init();
247 if (err < 0)
248 goto err3;
249
250 err = nft_bitwise_module_init();
251 if (err < 0)
252 goto err4;
253 249
254 err = nft_byteorder_module_init(); 250 for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) {
255 if (err < 0) 251 err = nft_register_expr(nft_basic_types[i]);
256 goto err5; 252 if (err)
257 253 goto err;
258 err = nft_payload_module_init(); 254 }
259 if (err < 0)
260 goto err6;
261
262 err = nft_dynset_module_init();
263 if (err < 0)
264 goto err7;
265
266 err = nft_range_module_init();
267 if (err < 0)
268 goto err8;
269 255
270 return 0; 256 return 0;
271err8: 257
272 nft_dynset_module_exit(); 258err:
273err7: 259 while (i-- > 0)
274 nft_payload_module_exit(); 260 nft_unregister_expr(nft_basic_types[i]);
275err6:
276 nft_byteorder_module_exit();
277err5:
278 nft_bitwise_module_exit();
279err4:
280 nft_lookup_module_exit();
281err3:
282 nft_cmp_module_exit();
283err2:
284 nft_immediate_module_exit();
285err1:
286 return err; 261 return err;
287} 262}
288 263
289void nf_tables_core_module_exit(void) 264void nf_tables_core_module_exit(void)
290{ 265{
291 nft_dynset_module_exit(); 266 int i;
292 nft_payload_module_exit(); 267
293 nft_byteorder_module_exit(); 268 i = ARRAY_SIZE(nft_basic_types);
294 nft_bitwise_module_exit(); 269 while (i-- > 0)
295 nft_lookup_module_exit(); 270 nft_unregister_expr(nft_basic_types[i]);
296 nft_cmp_module_exit();
297 nft_immediate_module_exit();
298} 271}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index ab695f8e2d29..12eb9041dca2 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -171,7 +171,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
171 unsigned int size; 171 unsigned int size;
172 int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; 172 int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
173 173
174 if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) 174 if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
175 return; 175 return;
176 176
177 size = nlmsg_total_size(sizeof(struct nfgenmsg)) + 177 size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
@@ -207,7 +207,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
207 nfmsg->version = NFNETLINK_V0; 207 nfmsg->version = NFNETLINK_V0;
208 nfmsg->res_id = 0; 208 nfmsg->res_id = 0;
209 209
210 if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) 210 if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
211 goto nla_put_failure; 211 goto nla_put_failure;
212 212
213 if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) 213 if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
@@ -249,7 +249,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
249 goto nla_put_failure; 249 goto nla_put_failure;
250 250
251 if (!info->packet_dumped) { 251 if (!info->packet_dumped) {
252 if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) 252 if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt)))
253 goto nla_put_failure; 253 goto nla_put_failure;
254 254
255 if (nf_trace_fill_pkt_info(skb, pkt)) 255 if (nf_trace_fill_pkt_info(skb, pkt))
@@ -258,7 +258,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
258 } 258 }
259 259
260 nlmsg_end(skb, nlh); 260 nlmsg_end(skb, nlh);
261 nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); 261 nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
262 return; 262 return;
263 263
264 nla_put_failure: 264 nla_put_failure:
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 2278d9ab723b..a09fa9fd8f3d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -22,7 +22,7 @@
22#include <linux/sockios.h> 22#include <linux/sockios.h>
23#include <linux/net.h> 23#include <linux/net.h>
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <asm/uaccess.h> 25#include <linux/uaccess.h>
26#include <net/sock.h> 26#include <net/sock.h>
27#include <linux/init.h> 27#include <linux/init.h>
28 28
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index eb086a192c5a..08247bf7d7b8 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -80,7 +80,7 @@ struct nfulnl_instance {
80 80
81#define INSTANCE_BUCKETS 16 81#define INSTANCE_BUCKETS 16
82 82
83static int nfnl_log_net_id __read_mostly; 83static unsigned int nfnl_log_net_id __read_mostly;
84 84
85struct nfnl_log_net { 85struct nfnl_log_net {
86 spinlock_t instances_lock; 86 spinlock_t instances_lock;
@@ -330,7 +330,7 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
330 * message. WARNING: has to be <= 128k due to slab restrictions */ 330 * message. WARNING: has to be <= 128k due to slab restrictions */
331 331
332 n = max(inst_size, pkt_size); 332 n = max(inst_size, pkt_size);
333 skb = alloc_skb(n, GFP_ATOMIC); 333 skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
334 if (!skb) { 334 if (!skb) {
335 if (n > pkt_size) { 335 if (n > pkt_size) {
336 /* try to allocate only as much as we need for current 336 /* try to allocate only as much as we need for current
@@ -538,7 +538,7 @@ __build_packet_message(struct nfnl_log_net *log,
538 goto nla_put_failure; 538 goto nla_put_failure;
539 } 539 }
540 540
541 if (skb->tstamp.tv64) { 541 if (skb->tstamp) {
542 struct nfulnl_msg_packet_timestamp ts; 542 struct nfulnl_msg_packet_timestamp ts;
543 struct timespec64 kts = ktime_to_timespec64(skb->tstamp); 543 struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
544 ts.sec = cpu_to_be64(kts.tv_sec); 544 ts.sec = cpu_to_be64(kts.tv_sec);
@@ -1152,6 +1152,7 @@ MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
1152MODULE_ALIAS_NF_LOGGER(AF_INET6, 1); 1152MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
1153MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1); 1153MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
1154MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */ 1154MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
1155MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */
1155 1156
1156module_init(nfnetlink_log_init); 1157module_init(nfnetlink_log_init);
1157module_exit(nfnetlink_log_fini); 1158module_exit(nfnetlink_log_fini);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index af832c526048..3ee0b8a000a4 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -69,7 +69,7 @@ struct nfqnl_instance {
69 * Following fields are dirtied for each queued packet, 69 * Following fields are dirtied for each queued packet,
70 * keep them in same cache line if possible. 70 * keep them in same cache line if possible.
71 */ 71 */
72 spinlock_t lock; 72 spinlock_t lock ____cacheline_aligned_in_smp;
73 unsigned int queue_total; 73 unsigned int queue_total;
74 unsigned int id_sequence; /* 'sequence' of pkt ids */ 74 unsigned int id_sequence; /* 'sequence' of pkt ids */
75 struct list_head queue_list; /* packets in queue */ 75 struct list_head queue_list; /* packets in queue */
@@ -77,7 +77,7 @@ struct nfqnl_instance {
77 77
78typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); 78typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
79 79
80static int nfnl_queue_net_id __read_mostly; 80static unsigned int nfnl_queue_net_id __read_mostly;
81 81
82#define INSTANCE_BUCKETS 16 82#define INSTANCE_BUCKETS 16
83struct nfnl_queue_net { 83struct nfnl_queue_net {
@@ -384,7 +384,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
384 + nla_total_size(sizeof(u_int32_t)) /* skbinfo */ 384 + nla_total_size(sizeof(u_int32_t)) /* skbinfo */
385 + nla_total_size(sizeof(u_int32_t)); /* cap_len */ 385 + nla_total_size(sizeof(u_int32_t)); /* cap_len */
386 386
387 if (entskb->tstamp.tv64) 387 if (entskb->tstamp)
388 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); 388 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
389 389
390 size += nfqnl_get_bridge_size(entry); 390 size += nfqnl_get_bridge_size(entry);
@@ -555,7 +555,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
555 if (nfqnl_put_bridge(entry, skb) < 0) 555 if (nfqnl_put_bridge(entry, skb) < 0)
556 goto nla_put_failure; 556 goto nla_put_failure;
557 557
558 if (entskb->tstamp.tv64) { 558 if (entskb->tstamp) {
559 struct nfqnl_msg_packet_timestamp ts; 559 struct nfqnl_msg_packet_timestamp ts;
560 struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); 560 struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
561 561
@@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = {
919 919
920static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) 920static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
921{ 921{
922 return rcu_access_pointer(entry->state.hook_entries) == 922 return rcu_access_pointer(entry->hook) ==
923 (struct nf_hook_entry *)entry_ptr; 923 (struct nf_hook_entry *)entry_ptr;
924} 924}
925 925
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 31c15ed2e5fc..877d9acd91ef 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -121,7 +121,6 @@ nla_put_failure:
121 return -1; 121 return -1;
122} 122}
123 123
124static struct nft_expr_type nft_bitwise_type;
125static const struct nft_expr_ops nft_bitwise_ops = { 124static const struct nft_expr_ops nft_bitwise_ops = {
126 .type = &nft_bitwise_type, 125 .type = &nft_bitwise_type,
127 .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), 126 .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
@@ -130,20 +129,10 @@ static const struct nft_expr_ops nft_bitwise_ops = {
130 .dump = nft_bitwise_dump, 129 .dump = nft_bitwise_dump,
131}; 130};
132 131
133static struct nft_expr_type nft_bitwise_type __read_mostly = { 132struct nft_expr_type nft_bitwise_type __read_mostly = {
134 .name = "bitwise", 133 .name = "bitwise",
135 .ops = &nft_bitwise_ops, 134 .ops = &nft_bitwise_ops,
136 .policy = nft_bitwise_policy, 135 .policy = nft_bitwise_policy,
137 .maxattr = NFTA_BITWISE_MAX, 136 .maxattr = NFTA_BITWISE_MAX,
138 .owner = THIS_MODULE, 137 .owner = THIS_MODULE,
139}; 138};
140
141int __init nft_bitwise_module_init(void)
142{
143 return nft_register_expr(&nft_bitwise_type);
144}
145
146void nft_bitwise_module_exit(void)
147{
148 nft_unregister_expr(&nft_bitwise_type);
149}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index ee63d981268d..13d4e421a6b3 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -169,7 +169,6 @@ nla_put_failure:
169 return -1; 169 return -1;
170} 170}
171 171
172static struct nft_expr_type nft_byteorder_type;
173static const struct nft_expr_ops nft_byteorder_ops = { 172static const struct nft_expr_ops nft_byteorder_ops = {
174 .type = &nft_byteorder_type, 173 .type = &nft_byteorder_type,
175 .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), 174 .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
@@ -178,20 +177,10 @@ static const struct nft_expr_ops nft_byteorder_ops = {
178 .dump = nft_byteorder_dump, 177 .dump = nft_byteorder_dump,
179}; 178};
180 179
181static struct nft_expr_type nft_byteorder_type __read_mostly = { 180struct nft_expr_type nft_byteorder_type __read_mostly = {
182 .name = "byteorder", 181 .name = "byteorder",
183 .ops = &nft_byteorder_ops, 182 .ops = &nft_byteorder_ops,
184 .policy = nft_byteorder_policy, 183 .policy = nft_byteorder_policy,
185 .maxattr = NFTA_BYTEORDER_MAX, 184 .maxattr = NFTA_BYTEORDER_MAX,
186 .owner = THIS_MODULE, 185 .owner = THIS_MODULE,
187}; 186};
188
189int __init nft_byteorder_module_init(void)
190{
191 return nft_register_expr(&nft_byteorder_type);
192}
193
194void nft_byteorder_module_exit(void)
195{
196 nft_unregister_expr(&nft_byteorder_type);
197}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 2e53739812b1..2b96effeadc1 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -84,9 +84,6 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
84 if (err < 0) 84 if (err < 0)
85 return err; 85 return err;
86 86
87 if (desc.len > U8_MAX)
88 return -ERANGE;
89
90 priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); 87 priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
91 priv->len = desc.len; 88 priv->len = desc.len;
92 return 0; 89 return 0;
@@ -110,7 +107,6 @@ nla_put_failure:
110 return -1; 107 return -1;
111} 108}
112 109
113static struct nft_expr_type nft_cmp_type;
114static const struct nft_expr_ops nft_cmp_ops = { 110static const struct nft_expr_ops nft_cmp_ops = {
115 .type = &nft_cmp_type, 111 .type = &nft_cmp_type,
116 .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), 112 .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
@@ -211,20 +207,10 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
211 return &nft_cmp_ops; 207 return &nft_cmp_ops;
212} 208}
213 209
214static struct nft_expr_type nft_cmp_type __read_mostly = { 210struct nft_expr_type nft_cmp_type __read_mostly = {
215 .name = "cmp", 211 .name = "cmp",
216 .select_ops = nft_cmp_select_ops, 212 .select_ops = nft_cmp_select_ops,
217 .policy = nft_cmp_policy, 213 .policy = nft_cmp_policy,
218 .maxattr = NFTA_CMP_MAX, 214 .maxattr = NFTA_CMP_MAX,
219 .owner = THIS_MODULE, 215 .owner = THIS_MODULE,
220}; 216};
221
222int __init nft_cmp_module_init(void)
223{
224 return nft_register_expr(&nft_cmp_type);
225}
226
227void nft_cmp_module_exit(void)
228{
229 nft_unregister_expr(&nft_cmp_type);
230}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 77db8358ab14..7f8422213341 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -18,105 +18,197 @@
18#include <net/netfilter/nf_tables.h> 18#include <net/netfilter/nf_tables.h>
19 19
20struct nft_counter { 20struct nft_counter {
21 u64 bytes; 21 s64 bytes;
22 u64 packets; 22 s64 packets;
23};
24
25struct nft_counter_percpu {
26 struct nft_counter counter;
27 struct u64_stats_sync syncp;
28}; 23};
29 24
30struct nft_counter_percpu_priv { 25struct nft_counter_percpu_priv {
31 struct nft_counter_percpu __percpu *counter; 26 struct nft_counter __percpu *counter;
32}; 27};
33 28
34static void nft_counter_eval(const struct nft_expr *expr, 29static DEFINE_PER_CPU(seqcount_t, nft_counter_seq);
35 struct nft_regs *regs, 30
36 const struct nft_pktinfo *pkt) 31static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
32 struct nft_regs *regs,
33 const struct nft_pktinfo *pkt)
37{ 34{
38 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); 35 struct nft_counter *this_cpu;
39 struct nft_counter_percpu *this_cpu; 36 seqcount_t *myseq;
40 37
41 local_bh_disable(); 38 local_bh_disable();
42 this_cpu = this_cpu_ptr(priv->counter); 39 this_cpu = this_cpu_ptr(priv->counter);
43 u64_stats_update_begin(&this_cpu->syncp); 40 myseq = this_cpu_ptr(&nft_counter_seq);
44 this_cpu->counter.bytes += pkt->skb->len; 41
45 this_cpu->counter.packets++; 42 write_seqcount_begin(myseq);
46 u64_stats_update_end(&this_cpu->syncp); 43
44 this_cpu->bytes += pkt->skb->len;
45 this_cpu->packets++;
46
47 write_seqcount_end(myseq);
47 local_bh_enable(); 48 local_bh_enable();
48} 49}
49 50
50static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter, 51static inline void nft_counter_obj_eval(struct nft_object *obj,
52 struct nft_regs *regs,
53 const struct nft_pktinfo *pkt)
54{
55 struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
56
57 nft_counter_do_eval(priv, regs, pkt);
58}
59
60static int nft_counter_do_init(const struct nlattr * const tb[],
61 struct nft_counter_percpu_priv *priv)
62{
63 struct nft_counter __percpu *cpu_stats;
64 struct nft_counter *this_cpu;
65
66 cpu_stats = alloc_percpu(struct nft_counter);
67 if (cpu_stats == NULL)
68 return -ENOMEM;
69
70 preempt_disable();
71 this_cpu = this_cpu_ptr(cpu_stats);
72 if (tb[NFTA_COUNTER_PACKETS]) {
73 this_cpu->packets =
74 be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
75 }
76 if (tb[NFTA_COUNTER_BYTES]) {
77 this_cpu->bytes =
78 be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
79 }
80 preempt_enable();
81 priv->counter = cpu_stats;
82 return 0;
83}
84
85static int nft_counter_obj_init(const struct nlattr * const tb[],
86 struct nft_object *obj)
87{
88 struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
89
90 return nft_counter_do_init(tb, priv);
91}
92
93static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv)
94{
95 free_percpu(priv->counter);
96}
97
98static void nft_counter_obj_destroy(struct nft_object *obj)
99{
100 struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
101
102 nft_counter_do_destroy(priv);
103}
104
105static void nft_counter_reset(struct nft_counter_percpu_priv __percpu *priv,
51 struct nft_counter *total) 106 struct nft_counter *total)
52{ 107{
53 const struct nft_counter_percpu *cpu_stats; 108 struct nft_counter *this_cpu;
109
110 local_bh_disable();
111 this_cpu = this_cpu_ptr(priv->counter);
112 this_cpu->packets -= total->packets;
113 this_cpu->bytes -= total->bytes;
114 local_bh_enable();
115}
116
117static void nft_counter_fetch(struct nft_counter_percpu_priv *priv,
118 struct nft_counter *total)
119{
120 struct nft_counter *this_cpu;
121 const seqcount_t *myseq;
54 u64 bytes, packets; 122 u64 bytes, packets;
55 unsigned int seq; 123 unsigned int seq;
56 int cpu; 124 int cpu;
57 125
58 memset(total, 0, sizeof(*total)); 126 memset(total, 0, sizeof(*total));
59 for_each_possible_cpu(cpu) { 127 for_each_possible_cpu(cpu) {
60 cpu_stats = per_cpu_ptr(counter, cpu); 128 myseq = per_cpu_ptr(&nft_counter_seq, cpu);
129 this_cpu = per_cpu_ptr(priv->counter, cpu);
61 do { 130 do {
62 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); 131 seq = read_seqcount_begin(myseq);
63 bytes = cpu_stats->counter.bytes; 132 bytes = this_cpu->bytes;
64 packets = cpu_stats->counter.packets; 133 packets = this_cpu->packets;
65 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); 134 } while (read_seqcount_retry(myseq, seq));
66 135
67 total->packets += packets; 136 total->bytes += bytes;
68 total->bytes += bytes; 137 total->packets += packets;
69 } 138 }
70} 139}
71 140
72static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) 141static int nft_counter_do_dump(struct sk_buff *skb,
142 struct nft_counter_percpu_priv *priv,
143 bool reset)
73{ 144{
74 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
75 struct nft_counter total; 145 struct nft_counter total;
76 146
77 nft_counter_fetch(priv->counter, &total); 147 nft_counter_fetch(priv, &total);
78 148
79 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), 149 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes),
80 NFTA_COUNTER_PAD) || 150 NFTA_COUNTER_PAD) ||
81 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets), 151 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets),
82 NFTA_COUNTER_PAD)) 152 NFTA_COUNTER_PAD))
83 goto nla_put_failure; 153 goto nla_put_failure;
154
155 if (reset)
156 nft_counter_reset(priv, &total);
157
84 return 0; 158 return 0;
85 159
86nla_put_failure: 160nla_put_failure:
87 return -1; 161 return -1;
88} 162}
89 163
164static int nft_counter_obj_dump(struct sk_buff *skb,
165 struct nft_object *obj, bool reset)
166{
167 struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
168
169 return nft_counter_do_dump(skb, priv, reset);
170}
171
90static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { 172static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
91 [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, 173 [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
92 [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, 174 [NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
93}; 175};
94 176
177static struct nft_object_type nft_counter_obj __read_mostly = {
178 .type = NFT_OBJECT_COUNTER,
179 .size = sizeof(struct nft_counter_percpu_priv),
180 .maxattr = NFTA_COUNTER_MAX,
181 .policy = nft_counter_policy,
182 .eval = nft_counter_obj_eval,
183 .init = nft_counter_obj_init,
184 .destroy = nft_counter_obj_destroy,
185 .dump = nft_counter_obj_dump,
186 .owner = THIS_MODULE,
187};
188
189static void nft_counter_eval(const struct nft_expr *expr,
190 struct nft_regs *regs,
191 const struct nft_pktinfo *pkt)
192{
193 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
194
195 nft_counter_do_eval(priv, regs, pkt);
196}
197
198static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
199{
200 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
201
202 return nft_counter_do_dump(skb, priv, false);
203}
204
95static int nft_counter_init(const struct nft_ctx *ctx, 205static int nft_counter_init(const struct nft_ctx *ctx,
96 const struct nft_expr *expr, 206 const struct nft_expr *expr,
97 const struct nlattr * const tb[]) 207 const struct nlattr * const tb[])
98{ 208{
99 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); 209 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
100 struct nft_counter_percpu __percpu *cpu_stats;
101 struct nft_counter_percpu *this_cpu;
102
103 cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
104 if (cpu_stats == NULL)
105 return -ENOMEM;
106 210
107 preempt_disable(); 211 return nft_counter_do_init(tb, priv);
108 this_cpu = this_cpu_ptr(cpu_stats);
109 if (tb[NFTA_COUNTER_PACKETS]) {
110 this_cpu->counter.packets =
111 be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
112 }
113 if (tb[NFTA_COUNTER_BYTES]) {
114 this_cpu->counter.bytes =
115 be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
116 }
117 preempt_enable();
118 priv->counter = cpu_stats;
119 return 0;
120} 212}
121 213
122static void nft_counter_destroy(const struct nft_ctx *ctx, 214static void nft_counter_destroy(const struct nft_ctx *ctx,
@@ -124,28 +216,27 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
124{ 216{
125 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); 217 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
126 218
127 free_percpu(priv->counter); 219 nft_counter_do_destroy(priv);
128} 220}
129 221
130static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) 222static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
131{ 223{
132 struct nft_counter_percpu_priv *priv = nft_expr_priv(src); 224 struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
133 struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); 225 struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
134 struct nft_counter_percpu __percpu *cpu_stats; 226 struct nft_counter __percpu *cpu_stats;
135 struct nft_counter_percpu *this_cpu; 227 struct nft_counter *this_cpu;
136 struct nft_counter total; 228 struct nft_counter total;
137 229
138 nft_counter_fetch(priv->counter, &total); 230 nft_counter_fetch(priv, &total);
139 231
140 cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu, 232 cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
141 GFP_ATOMIC);
142 if (cpu_stats == NULL) 233 if (cpu_stats == NULL)
143 return -ENOMEM; 234 return -ENOMEM;
144 235
145 preempt_disable(); 236 preempt_disable();
146 this_cpu = this_cpu_ptr(cpu_stats); 237 this_cpu = this_cpu_ptr(cpu_stats);
147 this_cpu->counter.packets = total.packets; 238 this_cpu->packets = total.packets;
148 this_cpu->counter.bytes = total.bytes; 239 this_cpu->bytes = total.bytes;
149 preempt_enable(); 240 preempt_enable();
150 241
151 priv_clone->counter = cpu_stats; 242 priv_clone->counter = cpu_stats;
@@ -174,12 +265,29 @@ static struct nft_expr_type nft_counter_type __read_mostly = {
174 265
175static int __init nft_counter_module_init(void) 266static int __init nft_counter_module_init(void)
176{ 267{
177 return nft_register_expr(&nft_counter_type); 268 int cpu, err;
269
270 for_each_possible_cpu(cpu)
271 seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
272
273 err = nft_register_obj(&nft_counter_obj);
274 if (err < 0)
275 return err;
276
277 err = nft_register_expr(&nft_counter_type);
278 if (err < 0)
279 goto err1;
280
281 return 0;
282err1:
283 nft_unregister_obj(&nft_counter_obj);
284 return err;
178} 285}
179 286
180static void __exit nft_counter_module_exit(void) 287static void __exit nft_counter_module_exit(void)
181{ 288{
182 nft_unregister_expr(&nft_counter_type); 289 nft_unregister_expr(&nft_counter_type);
290 nft_unregister_obj(&nft_counter_obj);
183} 291}
184 292
185module_init(nft_counter_module_init); 293module_init(nft_counter_module_init);
@@ -188,3 +296,4 @@ module_exit(nft_counter_module_exit);
188MODULE_LICENSE("GPL"); 296MODULE_LICENSE("GPL");
189MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 297MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
190MODULE_ALIAS_NFT_EXPR("counter"); 298MODULE_ALIAS_NFT_EXPR("counter");
299MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index d7b0d171172a..e6baeaebe653 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> 2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -207,37 +208,37 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
207 [NFTA_CT_SREG] = { .type = NLA_U32 }, 208 [NFTA_CT_SREG] = { .type = NLA_U32 },
208}; 209};
209 210
210static int nft_ct_l3proto_try_module_get(uint8_t family) 211static int nft_ct_netns_get(struct net *net, uint8_t family)
211{ 212{
212 int err; 213 int err;
213 214
214 if (family == NFPROTO_INET) { 215 if (family == NFPROTO_INET) {
215 err = nf_ct_l3proto_try_module_get(NFPROTO_IPV4); 216 err = nf_ct_netns_get(net, NFPROTO_IPV4);
216 if (err < 0) 217 if (err < 0)
217 goto err1; 218 goto err1;
218 err = nf_ct_l3proto_try_module_get(NFPROTO_IPV6); 219 err = nf_ct_netns_get(net, NFPROTO_IPV6);
219 if (err < 0) 220 if (err < 0)
220 goto err2; 221 goto err2;
221 } else { 222 } else {
222 err = nf_ct_l3proto_try_module_get(family); 223 err = nf_ct_netns_get(net, family);
223 if (err < 0) 224 if (err < 0)
224 goto err1; 225 goto err1;
225 } 226 }
226 return 0; 227 return 0;
227 228
228err2: 229err2:
229 nf_ct_l3proto_module_put(NFPROTO_IPV4); 230 nf_ct_netns_put(net, NFPROTO_IPV4);
230err1: 231err1:
231 return err; 232 return err;
232} 233}
233 234
234static void nft_ct_l3proto_module_put(uint8_t family) 235static void nft_ct_netns_put(struct net *net, uint8_t family)
235{ 236{
236 if (family == NFPROTO_INET) { 237 if (family == NFPROTO_INET) {
237 nf_ct_l3proto_module_put(NFPROTO_IPV4); 238 nf_ct_netns_put(net, NFPROTO_IPV4);
238 nf_ct_l3proto_module_put(NFPROTO_IPV6); 239 nf_ct_netns_put(net, NFPROTO_IPV6);
239 } else 240 } else
240 nf_ct_l3proto_module_put(family); 241 nf_ct_netns_put(net, family);
241} 242}
242 243
243static int nft_ct_get_init(const struct nft_ctx *ctx, 244static int nft_ct_get_init(const struct nft_ctx *ctx,
@@ -341,7 +342,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
341 if (err < 0) 342 if (err < 0)
342 return err; 343 return err;
343 344
344 err = nft_ct_l3proto_try_module_get(ctx->afi->family); 345 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
345 if (err < 0) 346 if (err < 0)
346 return err; 347 return err;
347 348
@@ -389,7 +390,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
389 if (err < 0) 390 if (err < 0)
390 goto err1; 391 goto err1;
391 392
392 err = nft_ct_l3proto_try_module_get(ctx->afi->family); 393 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
393 if (err < 0) 394 if (err < 0)
394 goto err1; 395 goto err1;
395 396
@@ -404,7 +405,7 @@ err1:
404static void nft_ct_get_destroy(const struct nft_ctx *ctx, 405static void nft_ct_get_destroy(const struct nft_ctx *ctx,
405 const struct nft_expr *expr) 406 const struct nft_expr *expr)
406{ 407{
407 nft_ct_l3proto_module_put(ctx->afi->family); 408 nf_ct_netns_put(ctx->net, ctx->afi->family);
408} 409}
409 410
410static void nft_ct_set_destroy(const struct nft_ctx *ctx, 411static void nft_ct_set_destroy(const struct nft_ctx *ctx,
@@ -422,7 +423,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
422 break; 423 break;
423 } 424 }
424 425
425 nft_ct_l3proto_module_put(ctx->afi->family); 426 nft_ct_netns_put(ctx->net, ctx->afi->family);
426} 427}
427 428
428static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) 429static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -518,15 +519,61 @@ static struct nft_expr_type nft_ct_type __read_mostly = {
518 .owner = THIS_MODULE, 519 .owner = THIS_MODULE,
519}; 520};
520 521
522static void nft_notrack_eval(const struct nft_expr *expr,
523 struct nft_regs *regs,
524 const struct nft_pktinfo *pkt)
525{
526 struct sk_buff *skb = pkt->skb;
527 enum ip_conntrack_info ctinfo;
528 struct nf_conn *ct;
529
530 ct = nf_ct_get(pkt->skb, &ctinfo);
531 /* Previously seen (loopback or untracked)? Ignore. */
532 if (ct)
533 return;
534
535 ct = nf_ct_untracked_get();
536 atomic_inc(&ct->ct_general.use);
537 skb->nfct = &ct->ct_general;
538 skb->nfctinfo = IP_CT_NEW;
539}
540
541static struct nft_expr_type nft_notrack_type;
542static const struct nft_expr_ops nft_notrack_ops = {
543 .type = &nft_notrack_type,
544 .size = NFT_EXPR_SIZE(0),
545 .eval = nft_notrack_eval,
546};
547
548static struct nft_expr_type nft_notrack_type __read_mostly = {
549 .name = "notrack",
550 .ops = &nft_notrack_ops,
551 .owner = THIS_MODULE,
552};
553
521static int __init nft_ct_module_init(void) 554static int __init nft_ct_module_init(void)
522{ 555{
556 int err;
557
523 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE); 558 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
524 559
525 return nft_register_expr(&nft_ct_type); 560 err = nft_register_expr(&nft_ct_type);
561 if (err < 0)
562 return err;
563
564 err = nft_register_expr(&nft_notrack_type);
565 if (err < 0)
566 goto err1;
567
568 return 0;
569err1:
570 nft_unregister_expr(&nft_ct_type);
571 return err;
526} 572}
527 573
528static void __exit nft_ct_module_exit(void) 574static void __exit nft_ct_module_exit(void)
529{ 575{
576 nft_unregister_expr(&nft_notrack_type);
530 nft_unregister_expr(&nft_ct_type); 577 nft_unregister_expr(&nft_ct_type);
531} 578}
532 579
@@ -536,3 +583,4 @@ module_exit(nft_ct_module_exit);
536MODULE_LICENSE("GPL"); 583MODULE_LICENSE("GPL");
537MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 584MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
538MODULE_ALIAS_NFT_EXPR("ct"); 585MODULE_ALIAS_NFT_EXPR("ct");
586MODULE_ALIAS_NFT_EXPR("notrack");
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 31ca94793aa9..7de2f46734a4 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -268,7 +268,6 @@ nla_put_failure:
268 return -1; 268 return -1;
269} 269}
270 270
271static struct nft_expr_type nft_dynset_type;
272static const struct nft_expr_ops nft_dynset_ops = { 271static const struct nft_expr_ops nft_dynset_ops = {
273 .type = &nft_dynset_type, 272 .type = &nft_dynset_type,
274 .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), 273 .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
@@ -278,20 +277,10 @@ static const struct nft_expr_ops nft_dynset_ops = {
278 .dump = nft_dynset_dump, 277 .dump = nft_dynset_dump,
279}; 278};
280 279
281static struct nft_expr_type nft_dynset_type __read_mostly = { 280struct nft_expr_type nft_dynset_type __read_mostly = {
282 .name = "dynset", 281 .name = "dynset",
283 .ops = &nft_dynset_ops, 282 .ops = &nft_dynset_ops,
284 .policy = nft_dynset_policy, 283 .policy = nft_dynset_policy,
285 .maxattr = NFTA_DYNSET_MAX, 284 .maxattr = NFTA_DYNSET_MAX,
286 .owner = THIS_MODULE, 285 .owner = THIS_MODULE,
287}; 286};
288
289int __init nft_dynset_module_init(void)
290{
291 return nft_register_expr(&nft_dynset_type);
292}
293
294void nft_dynset_module_exit(void)
295{
296 nft_unregister_expr(&nft_dynset_type);
297}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
new file mode 100644
index 000000000000..29a4906adc27
--- /dev/null
+++ b/net/netfilter/nft_fib.c
@@ -0,0 +1,159 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 *
6 * Generic part shared by ipv4 and ipv6 backends.
7 */
8
9#include <linux/kernel.h>
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/netlink.h>
13#include <linux/netfilter.h>
14#include <linux/netfilter/nf_tables.h>
15#include <net/netfilter/nf_tables_core.h>
16#include <net/netfilter/nf_tables.h>
17#include <net/netfilter/nft_fib.h>
18
19const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
20 [NFTA_FIB_DREG] = { .type = NLA_U32 },
21 [NFTA_FIB_RESULT] = { .type = NLA_U32 },
22 [NFTA_FIB_FLAGS] = { .type = NLA_U32 },
23};
24EXPORT_SYMBOL(nft_fib_policy);
25
26#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
27 NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
28
29int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
30 const struct nft_data **data)
31{
32 const struct nft_fib *priv = nft_expr_priv(expr);
33 unsigned int hooks;
34
35 switch (priv->result) {
36 case NFT_FIB_RESULT_OIF: /* fallthrough */
37 case NFT_FIB_RESULT_OIFNAME:
38 hooks = (1 << NF_INET_PRE_ROUTING);
39 break;
40 case NFT_FIB_RESULT_ADDRTYPE:
41 if (priv->flags & NFTA_FIB_F_IIF)
42 hooks = (1 << NF_INET_PRE_ROUTING) |
43 (1 << NF_INET_LOCAL_IN) |
44 (1 << NF_INET_FORWARD);
45 else if (priv->flags & NFTA_FIB_F_OIF)
46 hooks = (1 << NF_INET_LOCAL_OUT) |
47 (1 << NF_INET_POST_ROUTING) |
48 (1 << NF_INET_FORWARD);
49 else
50 hooks = (1 << NF_INET_LOCAL_IN) |
51 (1 << NF_INET_LOCAL_OUT) |
52 (1 << NF_INET_FORWARD) |
53 (1 << NF_INET_PRE_ROUTING) |
54 (1 << NF_INET_POST_ROUTING);
55
56 break;
57 default:
58 return -EINVAL;
59 }
60
61 return nft_chain_validate_hooks(ctx->chain, hooks);
62}
63EXPORT_SYMBOL_GPL(nft_fib_validate);
64
65int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
66 const struct nlattr * const tb[])
67{
68 struct nft_fib *priv = nft_expr_priv(expr);
69 unsigned int len;
70 int err;
71
72 if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS])
73 return -EINVAL;
74
75 priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
76
77 if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
78 return -EINVAL;
79
80 if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
81 (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR))
82 return -EINVAL;
83 if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) ==
84 (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF))
85 return -EINVAL;
86 if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
87 return -EINVAL;
88
89 priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
90 priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
91
92 switch (priv->result) {
93 case NFT_FIB_RESULT_OIF:
94 if (priv->flags & NFTA_FIB_F_OIF)
95 return -EINVAL;
96 len = sizeof(int);
97 break;
98 case NFT_FIB_RESULT_OIFNAME:
99 if (priv->flags & NFTA_FIB_F_OIF)
100 return -EINVAL;
101 len = IFNAMSIZ;
102 break;
103 case NFT_FIB_RESULT_ADDRTYPE:
104 len = sizeof(u32);
105 break;
106 default:
107 return -EINVAL;
108 }
109
110 err = nft_validate_register_store(ctx, priv->dreg, NULL,
111 NFT_DATA_VALUE, len);
112 if (err < 0)
113 return err;
114
115 return nft_fib_validate(ctx, expr, NULL);
116}
117EXPORT_SYMBOL_GPL(nft_fib_init);
118
119int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
120{
121 const struct nft_fib *priv = nft_expr_priv(expr);
122
123 if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg))
124 return -1;
125
126 if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result)))
127 return -1;
128
129 if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags)))
130 return -1;
131
132 return 0;
133}
134EXPORT_SYMBOL_GPL(nft_fib_dump);
135
136void nft_fib_store_result(void *reg, enum nft_fib_result r,
137 const struct nft_pktinfo *pkt, int index)
138{
139 struct net_device *dev;
140 u32 *dreg = reg;
141
142 switch (r) {
143 case NFT_FIB_RESULT_OIF:
144 *dreg = index;
145 break;
146 case NFT_FIB_RESULT_OIFNAME:
147 dev = dev_get_by_index_rcu(nft_net(pkt), index);
148 strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
149 break;
150 default:
151 WARN_ON_ONCE(1);
152 *dreg = 0;
153 break;
154 }
155}
156EXPORT_SYMBOL_GPL(nft_fib_store_result);
157
158MODULE_LICENSE("GPL");
159MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
new file mode 100644
index 000000000000..9120fc7228f4
--- /dev/null
+++ b/net/netfilter/nft_fib_inet.c
@@ -0,0 +1,82 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 */
6
7#include <linux/kernel.h>
8#include <linux/init.h>
9#include <linux/module.h>
10#include <linux/netlink.h>
11#include <linux/netfilter.h>
12#include <linux/netfilter/nf_tables.h>
13#include <net/netfilter/nf_tables_core.h>
14#include <net/netfilter/nf_tables.h>
15
16#include <net/netfilter/nft_fib.h>
17
18static void nft_fib_inet_eval(const struct nft_expr *expr,
19 struct nft_regs *regs,
20 const struct nft_pktinfo *pkt)
21{
22 const struct nft_fib *priv = nft_expr_priv(expr);
23
24 switch (nft_pf(pkt)) {
25 case NFPROTO_IPV4:
26 switch (priv->result) {
27 case NFT_FIB_RESULT_OIF:
28 case NFT_FIB_RESULT_OIFNAME:
29 return nft_fib4_eval(expr, regs, pkt);
30 case NFT_FIB_RESULT_ADDRTYPE:
31 return nft_fib4_eval_type(expr, regs, pkt);
32 }
33 break;
34 case NFPROTO_IPV6:
35 switch (priv->result) {
36 case NFT_FIB_RESULT_OIF:
37 case NFT_FIB_RESULT_OIFNAME:
38 return nft_fib6_eval(expr, regs, pkt);
39 case NFT_FIB_RESULT_ADDRTYPE:
40 return nft_fib6_eval_type(expr, regs, pkt);
41 }
42 break;
43 }
44
45 regs->verdict.code = NF_DROP;
46}
47
48static struct nft_expr_type nft_fib_inet_type;
49static const struct nft_expr_ops nft_fib_inet_ops = {
50 .type = &nft_fib_inet_type,
51 .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
52 .eval = nft_fib_inet_eval,
53 .init = nft_fib_init,
54 .dump = nft_fib_dump,
55 .validate = nft_fib_validate,
56};
57
58static struct nft_expr_type nft_fib_inet_type __read_mostly = {
59 .family = NFPROTO_INET,
60 .name = "fib",
61 .ops = &nft_fib_inet_ops,
62 .policy = nft_fib_policy,
63 .maxattr = NFTA_FIB_MAX,
64 .owner = THIS_MODULE,
65};
66
67static int __init nft_fib_inet_module_init(void)
68{
69 return nft_register_expr(&nft_fib_inet_type);
70}
71
72static void __exit nft_fib_inet_module_exit(void)
73{
74 nft_unregister_expr(&nft_fib_inet_type);
75}
76
77module_init(nft_fib_inet_module_init);
78module_exit(nft_fib_inet_module_exit);
79
80MODULE_LICENSE("GPL");
81MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
82MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 763ebc3e0b2b..ce13a50b9189 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -26,8 +26,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
26 struct nft_fwd_netdev *priv = nft_expr_priv(expr); 26 struct nft_fwd_netdev *priv = nft_expr_priv(expr);
27 int oif = regs->data[priv->sreg_dev]; 27 int oif = regs->data[priv->sreg_dev];
28 28
29 nf_dup_netdev_egress(pkt, oif); 29 nf_fwd_netdev_egress(pkt, oif);
30 regs->verdict.code = NF_DROP; 30 regs->verdict.code = NF_STOLEN;
31} 31}
32 32
33static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { 33static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index d5447a22275c..eb2721af898d 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -58,7 +58,6 @@ static int nft_hash_init(const struct nft_ctx *ctx,
58 if (!tb[NFTA_HASH_SREG] || 58 if (!tb[NFTA_HASH_SREG] ||
59 !tb[NFTA_HASH_DREG] || 59 !tb[NFTA_HASH_DREG] ||
60 !tb[NFTA_HASH_LEN] || 60 !tb[NFTA_HASH_LEN] ||
61 !tb[NFTA_HASH_SEED] ||
62 !tb[NFTA_HASH_MODULUS]) 61 !tb[NFTA_HASH_MODULUS])
63 return -EINVAL; 62 return -EINVAL;
64 63
@@ -83,7 +82,10 @@ static int nft_hash_init(const struct nft_ctx *ctx,
83 if (priv->offset + priv->modulus - 1 < priv->offset) 82 if (priv->offset + priv->modulus - 1 < priv->offset)
84 return -EOVERFLOW; 83 return -EOVERFLOW;
85 84
86 priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); 85 if (tb[NFTA_HASH_SEED])
86 priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
87 else
88 get_random_bytes(&priv->seed, sizeof(priv->seed));
87 89
88 return nft_validate_register_load(priv->sreg, len) && 90 return nft_validate_register_load(priv->sreg, len) &&
89 nft_validate_register_store(ctx, priv->dreg, NULL, 91 nft_validate_register_store(ctx, priv->dreg, NULL,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index d17018ff54e6..728baf88295a 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -54,9 +54,6 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
54 if (err < 0) 54 if (err < 0)
55 return err; 55 return err;
56 56
57 if (desc.len > U8_MAX)
58 return -ERANGE;
59
60 priv->dlen = desc.len; 57 priv->dlen = desc.len;
61 58
62 priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); 59 priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
@@ -105,7 +102,6 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
105 return 0; 102 return 0;
106} 103}
107 104
108static struct nft_expr_type nft_imm_type;
109static const struct nft_expr_ops nft_imm_ops = { 105static const struct nft_expr_ops nft_imm_ops = {
110 .type = &nft_imm_type, 106 .type = &nft_imm_type,
111 .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), 107 .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -116,20 +112,10 @@ static const struct nft_expr_ops nft_imm_ops = {
116 .validate = nft_immediate_validate, 112 .validate = nft_immediate_validate,
117}; 113};
118 114
119static struct nft_expr_type nft_imm_type __read_mostly = { 115struct nft_expr_type nft_imm_type __read_mostly = {
120 .name = "immediate", 116 .name = "immediate",
121 .ops = &nft_imm_ops, 117 .ops = &nft_imm_ops,
122 .policy = nft_immediate_policy, 118 .policy = nft_immediate_policy,
123 .maxattr = NFTA_IMMEDIATE_MAX, 119 .maxattr = NFTA_IMMEDIATE_MAX,
124 .owner = THIS_MODULE, 120 .owner = THIS_MODULE,
125}; 121};
126
127int __init nft_immediate_module_init(void)
128{
129 return nft_register_expr(&nft_imm_type);
130}
131
132void nft_immediate_module_exit(void)
133{
134 nft_unregister_expr(&nft_imm_type);
135}
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 1b01404bb33f..6271e40a3dd6 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -32,8 +32,9 @@ static void nft_log_eval(const struct nft_expr *expr,
32{ 32{
33 const struct nft_log *priv = nft_expr_priv(expr); 33 const struct nft_log *priv = nft_expr_priv(expr);
34 34
35 nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, 35 nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
36 pkt->out, &priv->loginfo, "%s", priv->prefix); 36 nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s",
37 priv->prefix);
37} 38}
38 39
39static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { 40static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 8166b6994cc7..d4f97fa7e21d 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -35,9 +35,8 @@ static void nft_lookup_eval(const struct nft_expr *expr,
35 const struct nft_set_ext *ext; 35 const struct nft_set_ext *ext;
36 bool found; 36 bool found;
37 37
38 found = set->ops->lookup(pkt->net, set, &regs->data[priv->sreg], &ext) ^ 38 found = set->ops->lookup(nft_net(pkt), set, &regs->data[priv->sreg],
39 priv->invert; 39 &ext) ^ priv->invert;
40
41 if (!found) { 40 if (!found) {
42 regs->verdict.code = NFT_BREAK; 41 regs->verdict.code = NFT_BREAK;
43 return; 42 return;
@@ -155,7 +154,6 @@ nla_put_failure:
155 return -1; 154 return -1;
156} 155}
157 156
158static struct nft_expr_type nft_lookup_type;
159static const struct nft_expr_ops nft_lookup_ops = { 157static const struct nft_expr_ops nft_lookup_ops = {
160 .type = &nft_lookup_type, 158 .type = &nft_lookup_type,
161 .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), 159 .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
@@ -165,20 +163,10 @@ static const struct nft_expr_ops nft_lookup_ops = {
165 .dump = nft_lookup_dump, 163 .dump = nft_lookup_dump,
166}; 164};
167 165
168static struct nft_expr_type nft_lookup_type __read_mostly = { 166struct nft_expr_type nft_lookup_type __read_mostly = {
169 .name = "lookup", 167 .name = "lookup",
170 .ops = &nft_lookup_ops, 168 .ops = &nft_lookup_ops,
171 .policy = nft_lookup_policy, 169 .policy = nft_lookup_policy,
172 .maxattr = NFTA_LOOKUP_MAX, 170 .maxattr = NFTA_LOOKUP_MAX,
173 .owner = THIS_MODULE, 171 .owner = THIS_MODULE,
174}; 172};
175
176int __init nft_lookup_module_init(void)
177{
178 return nft_register_expr(&nft_lookup_type);
179}
180
181void nft_lookup_module_exit(void)
182{
183 nft_unregister_expr(&nft_lookup_type);
184}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 81b5ad6165ac..11ce016cd479 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> 2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -77,7 +77,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
77 } 77 }
78 } 78 }
79 79
80 return 0; 80 return nf_ct_netns_get(ctx->net, ctx->afi->family);
81} 81}
82EXPORT_SYMBOL_GPL(nft_masq_init); 82EXPORT_SYMBOL_GPL(nft_masq_init);
83 83
@@ -105,4 +105,4 @@ nla_put_failure:
105EXPORT_SYMBOL_GPL(nft_masq_dump); 105EXPORT_SYMBOL_GPL(nft_masq_dump);
106 106
107MODULE_LICENSE("GPL"); 107MODULE_LICENSE("GPL");
108MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); 108MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 6c1e0246706e..66c7f4b4c49b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -36,7 +36,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
36{ 36{
37 const struct nft_meta *priv = nft_expr_priv(expr); 37 const struct nft_meta *priv = nft_expr_priv(expr);
38 const struct sk_buff *skb = pkt->skb; 38 const struct sk_buff *skb = pkt->skb;
39 const struct net_device *in = pkt->in, *out = pkt->out; 39 const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
40 struct sock *sk; 40 struct sock *sk;
41 u32 *dest = &regs->data[priv->dreg]; 41 u32 *dest = &regs->data[priv->dreg];
42 42
@@ -49,7 +49,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
49 *(__be16 *)dest = skb->protocol; 49 *(__be16 *)dest = skb->protocol;
50 break; 50 break;
51 case NFT_META_NFPROTO: 51 case NFT_META_NFPROTO:
52 *dest = pkt->pf; 52 *dest = nft_pf(pkt);
53 break; 53 break;
54 case NFT_META_L4PROTO: 54 case NFT_META_L4PROTO:
55 if (!pkt->tprot_set) 55 if (!pkt->tprot_set)
@@ -146,7 +146,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
146 break; 146 break;
147 } 147 }
148 148
149 switch (pkt->pf) { 149 switch (nft_pf(pkt)) {
150 case NFPROTO_IPV4: 150 case NFPROTO_IPV4:
151 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) 151 if (ipv4_is_multicast(ip_hdr(skb)->daddr))
152 *dest = PACKET_MULTICAST; 152 *dest = PACKET_MULTICAST;
@@ -310,6 +310,11 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
310 case NFPROTO_NETDEV: 310 case NFPROTO_NETDEV:
311 hooks = 1 << NF_NETDEV_INGRESS; 311 hooks = 1 << NF_NETDEV_INGRESS;
312 break; 312 break;
313 case NFPROTO_IPV4:
314 case NFPROTO_IPV6:
315 case NFPROTO_INET:
316 hooks = 1 << NF_INET_PRE_ROUTING;
317 break;
313 default: 318 default:
314 return -EOPNOTSUPP; 319 return -EOPNOTSUPP;
315 } 320 }
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index ee2d71753746..19a7bf3236f9 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -209,7 +209,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
209 return -EINVAL; 209 return -EINVAL;
210 } 210 }
211 211
212 return 0; 212 return nf_ct_netns_get(ctx->net, family);
213} 213}
214 214
215static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) 215static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -257,12 +257,21 @@ nla_put_failure:
257 return -1; 257 return -1;
258} 258}
259 259
260static void
261nft_nat_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
262{
263 const struct nft_nat *priv = nft_expr_priv(expr);
264
265 nf_ct_netns_put(ctx->net, priv->family);
266}
267
260static struct nft_expr_type nft_nat_type; 268static struct nft_expr_type nft_nat_type;
261static const struct nft_expr_ops nft_nat_ops = { 269static const struct nft_expr_ops nft_nat_ops = {
262 .type = &nft_nat_type, 270 .type = &nft_nat_type,
263 .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), 271 .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
264 .eval = nft_nat_eval, 272 .eval = nft_nat_eval,
265 .init = nft_nat_init, 273 .init = nft_nat_init,
274 .destroy = nft_nat_destroy,
266 .dump = nft_nat_dump, 275 .dump = nft_nat_dump,
267 .validate = nft_nat_validate, 276 .validate = nft_nat_validate,
268}; 277};
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 55bc5ab78d4a..a66b36097b8f 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -65,7 +65,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
65 return -EOVERFLOW; 65 return -EOVERFLOW;
66 66
67 priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); 67 priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
68 atomic_set(&priv->counter, 0); 68 atomic_set(&priv->counter, priv->modulus - 1);
69 69
70 return nft_validate_register_store(ctx, priv->dreg, NULL, 70 return nft_validate_register_store(ctx, priv->dreg, NULL,
71 NFT_DATA_VALUE, sizeof(u32)); 71 NFT_DATA_VALUE, sizeof(u32));
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
new file mode 100644
index 000000000000..415a65ba2b85
--- /dev/null
+++ b/net/netfilter/nft_objref.c
@@ -0,0 +1,226 @@
1/*
2 * Copyright (c) 2012-2016 Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 */
8
9#include <linux/init.h>
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/netlink.h>
13#include <linux/netfilter.h>
14#include <linux/netfilter/nf_tables.h>
15#include <net/netfilter/nf_tables.h>
16
17#define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr))
18
19static void nft_objref_eval(const struct nft_expr *expr,
20 struct nft_regs *regs,
21 const struct nft_pktinfo *pkt)
22{
23 struct nft_object *obj = nft_objref_priv(expr);
24
25 obj->type->eval(obj, regs, pkt);
26}
27
28static int nft_objref_init(const struct nft_ctx *ctx,
29 const struct nft_expr *expr,
30 const struct nlattr * const tb[])
31{
32 struct nft_object *obj = nft_objref_priv(expr);
33 u8 genmask = nft_genmask_next(ctx->net);
34 u32 objtype;
35
36 if (!tb[NFTA_OBJREF_IMM_NAME] ||
37 !tb[NFTA_OBJREF_IMM_TYPE])
38 return -EINVAL;
39
40 objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
41 obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
42 genmask);
43 if (IS_ERR(obj))
44 return -ENOENT;
45
46 nft_objref_priv(expr) = obj;
47 obj->use++;
48
49 return 0;
50}
51
52static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
53{
54 const struct nft_object *obj = nft_objref_priv(expr);
55
56 if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
57 nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
58 goto nla_put_failure;
59
60 return 0;
61
62nla_put_failure:
63 return -1;
64}
65
66static void nft_objref_destroy(const struct nft_ctx *ctx,
67 const struct nft_expr *expr)
68{
69 struct nft_object *obj = nft_objref_priv(expr);
70
71 obj->use--;
72}
73
74static struct nft_expr_type nft_objref_type;
75static const struct nft_expr_ops nft_objref_ops = {
76 .type = &nft_objref_type,
77 .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)),
78 .eval = nft_objref_eval,
79 .init = nft_objref_init,
80 .destroy = nft_objref_destroy,
81 .dump = nft_objref_dump,
82};
83
84struct nft_objref_map {
85 struct nft_set *set;
86 enum nft_registers sreg:8;
87 struct nft_set_binding binding;
88};
89
90static void nft_objref_map_eval(const struct nft_expr *expr,
91 struct nft_regs *regs,
92 const struct nft_pktinfo *pkt)
93{
94 struct nft_objref_map *priv = nft_expr_priv(expr);
95 const struct nft_set *set = priv->set;
96 const struct nft_set_ext *ext;
97 struct nft_object *obj;
98 bool found;
99
100 found = set->ops->lookup(nft_net(pkt), set, &regs->data[priv->sreg],
101 &ext);
102 if (!found) {
103 regs->verdict.code = NFT_BREAK;
104 return;
105 }
106 obj = *nft_set_ext_obj(ext);
107 obj->type->eval(obj, regs, pkt);
108}
109
110static int nft_objref_map_init(const struct nft_ctx *ctx,
111 const struct nft_expr *expr,
112 const struct nlattr * const tb[])
113{
114 struct nft_objref_map *priv = nft_expr_priv(expr);
115 u8 genmask = nft_genmask_next(ctx->net);
116 struct nft_set *set;
117 int err;
118
119 set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask);
120 if (IS_ERR(set)) {
121 if (tb[NFTA_OBJREF_SET_ID]) {
122 set = nf_tables_set_lookup_byid(ctx->net,
123 tb[NFTA_OBJREF_SET_ID],
124 genmask);
125 }
126 if (IS_ERR(set))
127 return PTR_ERR(set);
128 }
129
130 if (!(set->flags & NFT_SET_OBJECT))
131 return -EINVAL;
132
133 priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]);
134 err = nft_validate_register_load(priv->sreg, set->klen);
135 if (err < 0)
136 return err;
137
138 priv->binding.flags = set->flags & NFT_SET_OBJECT;
139
140 err = nf_tables_bind_set(ctx, set, &priv->binding);
141 if (err < 0)
142 return err;
143
144 priv->set = set;
145 return 0;
146}
147
148static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr)
149{
150 const struct nft_objref_map *priv = nft_expr_priv(expr);
151
152 if (nft_dump_register(skb, NFTA_OBJREF_SET_SREG, priv->sreg) ||
153 nla_put_string(skb, NFTA_OBJREF_SET_NAME, priv->set->name))
154 goto nla_put_failure;
155
156 return 0;
157
158nla_put_failure:
159 return -1;
160}
161
162static void nft_objref_map_destroy(const struct nft_ctx *ctx,
163 const struct nft_expr *expr)
164{
165 struct nft_objref_map *priv = nft_expr_priv(expr);
166
167 nf_tables_unbind_set(ctx, priv->set, &priv->binding);
168}
169
170static struct nft_expr_type nft_objref_type;
171static const struct nft_expr_ops nft_objref_map_ops = {
172 .type = &nft_objref_type,
173 .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
174 .eval = nft_objref_map_eval,
175 .init = nft_objref_map_init,
176 .destroy = nft_objref_map_destroy,
177 .dump = nft_objref_map_dump,
178};
179
180static const struct nft_expr_ops *
181nft_objref_select_ops(const struct nft_ctx *ctx,
182 const struct nlattr * const tb[])
183{
184 if (tb[NFTA_OBJREF_SET_SREG] &&
185 (tb[NFTA_OBJREF_SET_NAME] ||
186 tb[NFTA_OBJREF_SET_ID]))
187 return &nft_objref_map_ops;
188 else if (tb[NFTA_OBJREF_IMM_NAME] &&
189 tb[NFTA_OBJREF_IMM_TYPE])
190 return &nft_objref_ops;
191
192 return ERR_PTR(-EOPNOTSUPP);
193}
194
195static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = {
196 [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING },
197 [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 },
198 [NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 },
199 [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING },
200 [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 },
201};
202
203static struct nft_expr_type nft_objref_type __read_mostly = {
204 .name = "objref",
205 .select_ops = nft_objref_select_ops,
206 .policy = nft_objref_policy,
207 .maxattr = NFTA_OBJREF_MAX,
208 .owner = THIS_MODULE,
209};
210
211static int __init nft_objref_module_init(void)
212{
213 return nft_register_expr(&nft_objref_type);
214}
215
216static void __exit nft_objref_module_exit(void)
217{
218 nft_unregister_expr(&nft_objref_type);
219}
220
221module_init(nft_objref_module_init);
222module_exit(nft_objref_module_exit);
223
224MODULE_LICENSE("GPL");
225MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
226MODULE_ALIAS_NFT_EXPR("objref");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index b2f88617611a..36d2b1096546 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> 2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -17,6 +18,10 @@
17#include <linux/netfilter/nf_tables.h> 18#include <linux/netfilter/nf_tables.h>
18#include <net/netfilter/nf_tables_core.h> 19#include <net/netfilter/nf_tables_core.h>
19#include <net/netfilter/nf_tables.h> 20#include <net/netfilter/nf_tables.h>
21/* For layer 4 checksum field offset. */
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/icmpv6.h>
20 25
21/* add vlan header into the user buffer for if tag was removed by offloads */ 26/* add vlan header into the user buffer for if tag was removed by offloads */
22static bool 27static bool
@@ -148,7 +153,6 @@ nla_put_failure:
148 return -1; 153 return -1;
149} 154}
150 155
151static struct nft_expr_type nft_payload_type;
152static const struct nft_expr_ops nft_payload_ops = { 156static const struct nft_expr_ops nft_payload_ops = {
153 .type = &nft_payload_type, 157 .type = &nft_payload_type,
154 .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), 158 .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
@@ -165,6 +169,87 @@ const struct nft_expr_ops nft_payload_fast_ops = {
165 .dump = nft_payload_dump, 169 .dump = nft_payload_dump,
166}; 170};
167 171
172static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
173{
174 *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum));
175 if (*sum == 0)
176 *sum = CSUM_MANGLED_0;
177}
178
179static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
180{
181 struct udphdr *uh, _uh;
182
183 uh = skb_header_pointer(skb, thoff, sizeof(_uh), &_uh);
184 if (!uh)
185 return false;
186
187 return uh->check;
188}
189
190static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
191 struct sk_buff *skb,
192 unsigned int *l4csum_offset)
193{
194 switch (pkt->tprot) {
195 case IPPROTO_TCP:
196 *l4csum_offset = offsetof(struct tcphdr, check);
197 break;
198 case IPPROTO_UDP:
199 if (!nft_payload_udp_checksum(skb, pkt->xt.thoff))
200 return -1;
201 /* Fall through. */
202 case IPPROTO_UDPLITE:
203 *l4csum_offset = offsetof(struct udphdr, check);
204 break;
205 case IPPROTO_ICMPV6:
206 *l4csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
207 break;
208 default:
209 return -1;
210 }
211
212 *l4csum_offset += pkt->xt.thoff;
213 return 0;
214}
215
216static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
217 struct sk_buff *skb,
218 __wsum fsum, __wsum tsum)
219{
220 int l4csum_offset;
221 __sum16 sum;
222
223 /* If we cannot determine layer 4 checksum offset or this packet doesn't
224 * require layer 4 checksum recalculation, skip this packet.
225 */
226 if (nft_payload_l4csum_offset(pkt, skb, &l4csum_offset) < 0)
227 return 0;
228
229 if (skb_copy_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
230 return -1;
231
232 /* Checksum mangling for an arbitrary amount of bytes, based on
233 * inet_proto_csum_replace*() functions.
234 */
235 if (skb->ip_summed != CHECKSUM_PARTIAL) {
236 nft_csum_replace(&sum, fsum, tsum);
237 if (skb->ip_summed == CHECKSUM_COMPLETE) {
238 skb->csum = ~csum_add(csum_sub(~(skb->csum), fsum),
239 tsum);
240 }
241 } else {
242 sum = ~csum_fold(csum_add(csum_sub(csum_unfold(sum), fsum),
243 tsum));
244 }
245
246 if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) ||
247 skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
248 return -1;
249
250 return 0;
251}
252
168static void nft_payload_set_eval(const struct nft_expr *expr, 253static void nft_payload_set_eval(const struct nft_expr *expr,
169 struct nft_regs *regs, 254 struct nft_regs *regs,
170 const struct nft_pktinfo *pkt) 255 const struct nft_pktinfo *pkt)
@@ -205,14 +290,15 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
205 290
206 fsum = skb_checksum(skb, offset, priv->len, 0); 291 fsum = skb_checksum(skb, offset, priv->len, 0);
207 tsum = csum_partial(src, priv->len, 0); 292 tsum = csum_partial(src, priv->len, 0);
208 sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), 293 nft_csum_replace(&sum, fsum, tsum);
209 tsum));
210 if (sum == 0)
211 sum = CSUM_MANGLED_0;
212 294
213 if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || 295 if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
214 skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) 296 skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
215 goto err; 297 goto err;
298
299 if (priv->csum_flags &&
300 nft_payload_l4csum_update(pkt, skb, fsum, tsum) < 0)
301 goto err;
216 } 302 }
217 303
218 if (!skb_make_writable(skb, max(offset + priv->len, 0)) || 304 if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
@@ -241,6 +327,15 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
241 if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) 327 if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
242 priv->csum_offset = 328 priv->csum_offset =
243 ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); 329 ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
330 if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
331 u32 flags;
332
333 flags = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_FLAGS]));
334 if (flags & ~NFT_PAYLOAD_L4CSUM_PSEUDOHDR)
335 return -EINVAL;
336
337 priv->csum_flags = flags;
338 }
244 339
245 switch (priv->csum_type) { 340 switch (priv->csum_type) {
246 case NFT_PAYLOAD_CSUM_NONE: 341 case NFT_PAYLOAD_CSUM_NONE:
@@ -263,7 +358,8 @@ static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr
263 nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || 358 nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) ||
264 nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || 359 nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) ||
265 nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, 360 nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET,
266 htonl(priv->csum_offset))) 361 htonl(priv->csum_offset)) ||
362 nla_put_be32(skb, NFTA_PAYLOAD_CSUM_FLAGS, htonl(priv->csum_flags)))
267 goto nla_put_failure; 363 goto nla_put_failure;
268 return 0; 364 return 0;
269 365
@@ -320,20 +416,10 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
320 return &nft_payload_ops; 416 return &nft_payload_ops;
321} 417}
322 418
323static struct nft_expr_type nft_payload_type __read_mostly = { 419struct nft_expr_type nft_payload_type __read_mostly = {
324 .name = "payload", 420 .name = "payload",
325 .select_ops = nft_payload_select_ops, 421 .select_ops = nft_payload_select_ops,
326 .policy = nft_payload_policy, 422 .policy = nft_payload_policy,
327 .maxattr = NFTA_PAYLOAD_MAX, 423 .maxattr = NFTA_PAYLOAD_MAX,
328 .owner = THIS_MODULE, 424 .owner = THIS_MODULE,
329}; 425};
330
331int __init nft_payload_module_init(void)
332{
333 return nft_register_expr(&nft_payload_type);
334}
335
336void nft_payload_module_exit(void)
337{
338 nft_unregister_expr(&nft_payload_type);
339}
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 393d359a1889..3e19fa1230dc 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -43,7 +43,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
43 queue = priv->queuenum + cpu % priv->queues_total; 43 queue = priv->queuenum + cpu % priv->queues_total;
44 } else { 44 } else {
45 queue = nfqueue_hash(pkt->skb, queue, 45 queue = nfqueue_hash(pkt->skb, queue,
46 priv->queues_total, pkt->pf, 46 priv->queues_total, nft_pf(pkt),
47 jhash_initval); 47 jhash_initval);
48 } 48 }
49 } 49 }
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index c00104c07095..bd6efc53f26d 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -17,38 +17,59 @@
17 17
18struct nft_quota { 18struct nft_quota {
19 u64 quota; 19 u64 quota;
20 bool invert; 20 unsigned long flags;
21 atomic64_t remain; 21 atomic64_t consumed;
22}; 22};
23 23
24static inline bool nft_overquota(struct nft_quota *priv, 24static inline bool nft_overquota(struct nft_quota *priv,
25 const struct nft_pktinfo *pkt) 25 const struct sk_buff *skb)
26{ 26{
27 return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0; 27 return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota;
28} 28}
29 29
30static void nft_quota_eval(const struct nft_expr *expr, 30static inline bool nft_quota_invert(struct nft_quota *priv)
31 struct nft_regs *regs,
32 const struct nft_pktinfo *pkt)
33{ 31{
34 struct nft_quota *priv = nft_expr_priv(expr); 32 return priv->flags & NFT_QUOTA_F_INV;
33}
35 34
36 if (nft_overquota(priv, pkt) ^ priv->invert) 35static inline void nft_quota_do_eval(struct nft_quota *priv,
36 struct nft_regs *regs,
37 const struct nft_pktinfo *pkt)
38{
39 if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv))
37 regs->verdict.code = NFT_BREAK; 40 regs->verdict.code = NFT_BREAK;
38} 41}
39 42
40static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { 43static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
41 [NFTA_QUOTA_BYTES] = { .type = NLA_U64 }, 44 [NFTA_QUOTA_BYTES] = { .type = NLA_U64 },
42 [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, 45 [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 },
46 [NFTA_QUOTA_CONSUMED] = { .type = NLA_U64 },
43}; 47};
44 48
45static int nft_quota_init(const struct nft_ctx *ctx, 49#define NFT_QUOTA_DEPLETED_BIT 1 /* From NFT_QUOTA_F_DEPLETED. */
46 const struct nft_expr *expr, 50
47 const struct nlattr * const tb[]) 51static void nft_quota_obj_eval(struct nft_object *obj,
52 struct nft_regs *regs,
53 const struct nft_pktinfo *pkt)
48{ 54{
49 struct nft_quota *priv = nft_expr_priv(expr); 55 struct nft_quota *priv = nft_obj_data(obj);
50 u32 flags = 0; 56 bool overquota;
51 u64 quota; 57
58 overquota = nft_overquota(priv, pkt->skb);
59 if (overquota ^ nft_quota_invert(priv))
60 regs->verdict.code = NFT_BREAK;
61
62 if (overquota &&
63 !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
64 nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0,
65 NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
66}
67
68static int nft_quota_do_init(const struct nlattr * const tb[],
69 struct nft_quota *priv)
70{
71 unsigned long flags = 0;
72 u64 quota, consumed = 0;
52 73
53 if (!tb[NFTA_QUOTA_BYTES]) 74 if (!tb[NFTA_QUOTA_BYTES])
54 return -EINVAL; 75 return -EINVAL;
@@ -57,26 +78,60 @@ static int nft_quota_init(const struct nft_ctx *ctx,
57 if (quota > S64_MAX) 78 if (quota > S64_MAX)
58 return -EOVERFLOW; 79 return -EOVERFLOW;
59 80
81 if (tb[NFTA_QUOTA_CONSUMED]) {
82 consumed = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_CONSUMED]));
83 if (consumed > quota)
84 return -EINVAL;
85 }
86
60 if (tb[NFTA_QUOTA_FLAGS]) { 87 if (tb[NFTA_QUOTA_FLAGS]) {
61 flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS])); 88 flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
62 if (flags & ~NFT_QUOTA_F_INV) 89 if (flags & ~NFT_QUOTA_F_INV)
63 return -EINVAL; 90 return -EINVAL;
91 if (flags & NFT_QUOTA_F_DEPLETED)
92 return -EOPNOTSUPP;
64 } 93 }
65 94
66 priv->quota = quota; 95 priv->quota = quota;
67 priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false; 96 priv->flags = flags;
68 atomic64_set(&priv->remain, quota); 97 atomic64_set(&priv->consumed, consumed);
69 98
70 return 0; 99 return 0;
71} 100}
72 101
73static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) 102static int nft_quota_obj_init(const struct nlattr * const tb[],
103 struct nft_object *obj)
104{
105 struct nft_quota *priv = nft_obj_data(obj);
106
107 return nft_quota_do_init(tb, priv);
108}
109
110static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
111 bool reset)
74{ 112{
75 const struct nft_quota *priv = nft_expr_priv(expr); 113 u32 flags = priv->flags;
76 u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0; 114 u64 consumed;
115
116 if (reset) {
117 consumed = atomic64_xchg(&priv->consumed, 0);
118 if (test_and_clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
119 flags |= NFT_QUOTA_F_DEPLETED;
120 } else {
121 consumed = atomic64_read(&priv->consumed);
122 }
123
124 /* Since we inconditionally increment consumed quota for each packet
125 * that we see, don't go over the quota boundary in what we send to
126 * userspace.
127 */
128 if (consumed > priv->quota)
129 consumed = priv->quota;
77 130
78 if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota), 131 if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
79 NFTA_QUOTA_PAD) || 132 NFTA_QUOTA_PAD) ||
133 nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed),
134 NFTA_QUOTA_PAD) ||
80 nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) 135 nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
81 goto nla_put_failure; 136 goto nla_put_failure;
82 return 0; 137 return 0;
@@ -85,6 +140,50 @@ nla_put_failure:
85 return -1; 140 return -1;
86} 141}
87 142
143static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
144 bool reset)
145{
146 struct nft_quota *priv = nft_obj_data(obj);
147
148 return nft_quota_do_dump(skb, priv, reset);
149}
150
151static struct nft_object_type nft_quota_obj __read_mostly = {
152 .type = NFT_OBJECT_QUOTA,
153 .size = sizeof(struct nft_quota),
154 .maxattr = NFTA_QUOTA_MAX,
155 .policy = nft_quota_policy,
156 .init = nft_quota_obj_init,
157 .eval = nft_quota_obj_eval,
158 .dump = nft_quota_obj_dump,
159 .owner = THIS_MODULE,
160};
161
162static void nft_quota_eval(const struct nft_expr *expr,
163 struct nft_regs *regs,
164 const struct nft_pktinfo *pkt)
165{
166 struct nft_quota *priv = nft_expr_priv(expr);
167
168 nft_quota_do_eval(priv, regs, pkt);
169}
170
171static int nft_quota_init(const struct nft_ctx *ctx,
172 const struct nft_expr *expr,
173 const struct nlattr * const tb[])
174{
175 struct nft_quota *priv = nft_expr_priv(expr);
176
177 return nft_quota_do_init(tb, priv);
178}
179
180static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
181{
182 struct nft_quota *priv = nft_expr_priv(expr);
183
184 return nft_quota_do_dump(skb, priv, false);
185}
186
88static struct nft_expr_type nft_quota_type; 187static struct nft_expr_type nft_quota_type;
89static const struct nft_expr_ops nft_quota_ops = { 188static const struct nft_expr_ops nft_quota_ops = {
90 .type = &nft_quota_type, 189 .type = &nft_quota_type,
@@ -105,12 +204,26 @@ static struct nft_expr_type nft_quota_type __read_mostly = {
105 204
106static int __init nft_quota_module_init(void) 205static int __init nft_quota_module_init(void)
107{ 206{
108 return nft_register_expr(&nft_quota_type); 207 int err;
208
209 err = nft_register_obj(&nft_quota_obj);
210 if (err < 0)
211 return err;
212
213 err = nft_register_expr(&nft_quota_type);
214 if (err < 0)
215 goto err1;
216
217 return 0;
218err1:
219 nft_unregister_obj(&nft_quota_obj);
220 return err;
109} 221}
110 222
111static void __exit nft_quota_module_exit(void) 223static void __exit nft_quota_module_exit(void)
112{ 224{
113 nft_unregister_expr(&nft_quota_type); 225 nft_unregister_expr(&nft_quota_type);
226 nft_unregister_obj(&nft_quota_obj);
114} 227}
115 228
116module_init(nft_quota_module_init); 229module_init(nft_quota_module_init);
@@ -119,3 +232,4 @@ module_exit(nft_quota_module_exit);
119MODULE_LICENSE("GPL"); 232MODULE_LICENSE("GPL");
120MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 233MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
121MODULE_ALIAS_NFT_EXPR("quota"); 234MODULE_ALIAS_NFT_EXPR("quota");
235MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA);
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 8f0aaaea1376..9edc74eedc10 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -128,7 +128,6 @@ nla_put_failure:
128 return -1; 128 return -1;
129} 129}
130 130
131static struct nft_expr_type nft_range_type;
132static const struct nft_expr_ops nft_range_ops = { 131static const struct nft_expr_ops nft_range_ops = {
133 .type = &nft_range_type, 132 .type = &nft_range_type,
134 .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), 133 .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)),
@@ -137,20 +136,10 @@ static const struct nft_expr_ops nft_range_ops = {
137 .dump = nft_range_dump, 136 .dump = nft_range_dump,
138}; 137};
139 138
140static struct nft_expr_type nft_range_type __read_mostly = { 139struct nft_expr_type nft_range_type __read_mostly = {
141 .name = "range", 140 .name = "range",
142 .ops = &nft_range_ops, 141 .ops = &nft_range_ops,
143 .policy = nft_range_policy, 142 .policy = nft_range_policy,
144 .maxattr = NFTA_RANGE_MAX, 143 .maxattr = NFTA_RANGE_MAX,
145 .owner = THIS_MODULE, 144 .owner = THIS_MODULE,
146}; 145};
147
148int __init nft_range_module_init(void)
149{
150 return nft_register_expr(&nft_range_type);
151}
152
153void nft_range_module_exit(void)
154{
155 nft_unregister_expr(&nft_range_type);
156}
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 03f7bf40ae75..40dcd05146d5 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> 2 * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
@@ -79,7 +79,7 @@ int nft_redir_init(const struct nft_ctx *ctx,
79 return -EINVAL; 79 return -EINVAL;
80 } 80 }
81 81
82 return 0; 82 return nf_ct_netns_get(ctx->net, ctx->afi->family);
83} 83}
84EXPORT_SYMBOL_GPL(nft_redir_init); 84EXPORT_SYMBOL_GPL(nft_redir_init);
85 85
@@ -108,4 +108,4 @@ nla_put_failure:
108EXPORT_SYMBOL_GPL(nft_redir_dump); 108EXPORT_SYMBOL_GPL(nft_redir_dump);
109 109
110MODULE_LICENSE("GPL"); 110MODULE_LICENSE("GPL");
111MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); 111MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index e79d9ca2ffee..9e90a02cb104 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -23,36 +23,36 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
23{ 23{
24 struct nft_reject *priv = nft_expr_priv(expr); 24 struct nft_reject *priv = nft_expr_priv(expr);
25 25
26 switch (pkt->pf) { 26 switch (nft_pf(pkt)) {
27 case NFPROTO_IPV4: 27 case NFPROTO_IPV4:
28 switch (priv->type) { 28 switch (priv->type) {
29 case NFT_REJECT_ICMP_UNREACH: 29 case NFT_REJECT_ICMP_UNREACH:
30 nf_send_unreach(pkt->skb, priv->icmp_code, 30 nf_send_unreach(pkt->skb, priv->icmp_code,
31 pkt->hook); 31 nft_hook(pkt));
32 break; 32 break;
33 case NFT_REJECT_TCP_RST: 33 case NFT_REJECT_TCP_RST:
34 nf_send_reset(pkt->net, pkt->skb, pkt->hook); 34 nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt));
35 break; 35 break;
36 case NFT_REJECT_ICMPX_UNREACH: 36 case NFT_REJECT_ICMPX_UNREACH:
37 nf_send_unreach(pkt->skb, 37 nf_send_unreach(pkt->skb,
38 nft_reject_icmp_code(priv->icmp_code), 38 nft_reject_icmp_code(priv->icmp_code),
39 pkt->hook); 39 nft_hook(pkt));
40 break; 40 break;
41 } 41 }
42 break; 42 break;
43 case NFPROTO_IPV6: 43 case NFPROTO_IPV6:
44 switch (priv->type) { 44 switch (priv->type) {
45 case NFT_REJECT_ICMP_UNREACH: 45 case NFT_REJECT_ICMP_UNREACH:
46 nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, 46 nf_send_unreach6(nft_net(pkt), pkt->skb,
47 pkt->hook); 47 priv->icmp_code, nft_hook(pkt));
48 break; 48 break;
49 case NFT_REJECT_TCP_RST: 49 case NFT_REJECT_TCP_RST:
50 nf_send_reset6(pkt->net, pkt->skb, pkt->hook); 50 nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt));
51 break; 51 break;
52 case NFT_REJECT_ICMPX_UNREACH: 52 case NFT_REJECT_ICMPX_UNREACH:
53 nf_send_unreach6(pkt->net, pkt->skb, 53 nf_send_unreach6(nft_net(pkt), pkt->skb,
54 nft_reject_icmpv6_code(priv->icmp_code), 54 nft_reject_icmpv6_code(priv->icmp_code),
55 pkt->hook); 55 nft_hook(pkt));
56 break; 56 break;
57 } 57 }
58 break; 58 break;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
new file mode 100644
index 000000000000..d3eb640bc784
--- /dev/null
+++ b/net/netfilter/nft_rt.c
@@ -0,0 +1,153 @@
1/*
2 * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/kernel.h>
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/netlink.h>
13#include <linux/netfilter.h>
14#include <linux/netfilter/nf_tables.h>
15#include <net/dst.h>
16#include <net/ip6_route.h>
17#include <net/route.h>
18#include <net/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables_core.h>
20
21struct nft_rt {
22 enum nft_rt_keys key:8;
23 enum nft_registers dreg:8;
24};
25
26void nft_rt_get_eval(const struct nft_expr *expr,
27 struct nft_regs *regs,
28 const struct nft_pktinfo *pkt)
29{
30 const struct nft_rt *priv = nft_expr_priv(expr);
31 const struct sk_buff *skb = pkt->skb;
32 u32 *dest = &regs->data[priv->dreg];
33 const struct dst_entry *dst;
34
35 dst = skb_dst(skb);
36 if (!dst)
37 goto err;
38
39 switch (priv->key) {
40#ifdef CONFIG_IP_ROUTE_CLASSID
41 case NFT_RT_CLASSID:
42 *dest = dst->tclassid;
43 break;
44#endif
45 case NFT_RT_NEXTHOP4:
46 if (nft_pf(pkt) != NFPROTO_IPV4)
47 goto err;
48
49 *dest = rt_nexthop((const struct rtable *)dst,
50 ip_hdr(skb)->daddr);
51 break;
52 case NFT_RT_NEXTHOP6:
53 if (nft_pf(pkt) != NFPROTO_IPV6)
54 goto err;
55
56 memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
57 &ipv6_hdr(skb)->daddr),
58 sizeof(struct in6_addr));
59 break;
60 default:
61 WARN_ON(1);
62 goto err;
63 }
64 return;
65
66err:
67 regs->verdict.code = NFT_BREAK;
68}
69
70const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
71 [NFTA_RT_DREG] = { .type = NLA_U32 },
72 [NFTA_RT_KEY] = { .type = NLA_U32 },
73};
74
75int nft_rt_get_init(const struct nft_ctx *ctx,
76 const struct nft_expr *expr,
77 const struct nlattr * const tb[])
78{
79 struct nft_rt *priv = nft_expr_priv(expr);
80 unsigned int len;
81
82 if (tb[NFTA_RT_KEY] == NULL ||
83 tb[NFTA_RT_DREG] == NULL)
84 return -EINVAL;
85
86 priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY]));
87 switch (priv->key) {
88#ifdef CONFIG_IP_ROUTE_CLASSID
89 case NFT_RT_CLASSID:
90#endif
91 case NFT_RT_NEXTHOP4:
92 len = sizeof(u32);
93 break;
94 case NFT_RT_NEXTHOP6:
95 len = sizeof(struct in6_addr);
96 break;
97 default:
98 return -EOPNOTSUPP;
99 }
100
101 priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]);
102 return nft_validate_register_store(ctx, priv->dreg, NULL,
103 NFT_DATA_VALUE, len);
104}
105
106int nft_rt_get_dump(struct sk_buff *skb,
107 const struct nft_expr *expr)
108{
109 const struct nft_rt *priv = nft_expr_priv(expr);
110
111 if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key)))
112 goto nla_put_failure;
113 if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg))
114 goto nla_put_failure;
115 return 0;
116
117nla_put_failure:
118 return -1;
119}
120
121static struct nft_expr_type nft_rt_type;
122static const struct nft_expr_ops nft_rt_get_ops = {
123 .type = &nft_rt_type,
124 .size = NFT_EXPR_SIZE(sizeof(struct nft_rt)),
125 .eval = nft_rt_get_eval,
126 .init = nft_rt_get_init,
127 .dump = nft_rt_get_dump,
128};
129
130static struct nft_expr_type nft_rt_type __read_mostly = {
131 .name = "rt",
132 .ops = &nft_rt_get_ops,
133 .policy = nft_rt_policy,
134 .maxattr = NFTA_RT_MAX,
135 .owner = THIS_MODULE,
136};
137
138static int __init nft_rt_module_init(void)
139{
140 return nft_register_expr(&nft_rt_type);
141}
142
143static void __exit nft_rt_module_exit(void)
144{
145 nft_unregister_expr(&nft_rt_type);
146}
147
148module_init(nft_rt_module_init);
149module_exit(nft_rt_module_exit);
150
151MODULE_LICENSE("GPL");
152MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>");
153MODULE_ALIAS_NFT_EXPR("rt");
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index a3dface3e6e6..1e20e2bbb6d9 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -167,6 +167,19 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
167 nft_set_elem_clear_busy(&he->ext); 167 nft_set_elem_clear_busy(&he->ext);
168} 168}
169 169
170static bool nft_hash_deactivate_one(const struct net *net,
171 const struct nft_set *set, void *priv)
172{
173 struct nft_hash_elem *he = priv;
174
175 if (!nft_set_elem_mark_busy(&he->ext) ||
176 !nft_is_active(net, &he->ext)) {
177 nft_set_elem_change_active(net, set, &he->ext);
178 return true;
179 }
180 return false;
181}
182
170static void *nft_hash_deactivate(const struct net *net, 183static void *nft_hash_deactivate(const struct net *net,
171 const struct nft_set *set, 184 const struct nft_set *set,
172 const struct nft_set_elem *elem) 185 const struct nft_set_elem *elem)
@@ -181,13 +194,10 @@ static void *nft_hash_deactivate(const struct net *net,
181 194
182 rcu_read_lock(); 195 rcu_read_lock();
183 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); 196 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
184 if (he != NULL) { 197 if (he != NULL &&
185 if (!nft_set_elem_mark_busy(&he->ext) || 198 !nft_hash_deactivate_one(net, set, he))
186 !nft_is_active(net, &he->ext)) 199 he = NULL;
187 nft_set_elem_change_active(net, set, &he->ext); 200
188 else
189 he = NULL;
190 }
191 rcu_read_unlock(); 201 rcu_read_unlock();
192 202
193 return he; 203 return he;
@@ -387,6 +397,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
387 .insert = nft_hash_insert, 397 .insert = nft_hash_insert,
388 .activate = nft_hash_activate, 398 .activate = nft_hash_activate,
389 .deactivate = nft_hash_deactivate, 399 .deactivate = nft_hash_deactivate,
400 .deactivate_one = nft_hash_deactivate_one,
390 .remove = nft_hash_remove, 401 .remove = nft_hash_remove,
391 .lookup = nft_hash_lookup, 402 .lookup = nft_hash_lookup,
392 .update = nft_hash_update, 403 .update = nft_hash_update,
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 36493a7cae88..08376e50f6cd 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -171,6 +171,15 @@ static void nft_rbtree_activate(const struct net *net,
171 nft_set_elem_change_active(net, set, &rbe->ext); 171 nft_set_elem_change_active(net, set, &rbe->ext);
172} 172}
173 173
174static bool nft_rbtree_deactivate_one(const struct net *net,
175 const struct nft_set *set, void *priv)
176{
177 struct nft_rbtree_elem *rbe = priv;
178
179 nft_set_elem_change_active(net, set, &rbe->ext);
180 return true;
181}
182
174static void *nft_rbtree_deactivate(const struct net *net, 183static void *nft_rbtree_deactivate(const struct net *net,
175 const struct nft_set *set, 184 const struct nft_set *set,
176 const struct nft_set_elem *elem) 185 const struct nft_set_elem *elem)
@@ -204,7 +213,7 @@ static void *nft_rbtree_deactivate(const struct net *net,
204 parent = parent->rb_right; 213 parent = parent->rb_right;
205 continue; 214 continue;
206 } 215 }
207 nft_set_elem_change_active(net, set, &rbe->ext); 216 nft_rbtree_deactivate_one(net, set, rbe);
208 return rbe; 217 return rbe;
209 } 218 }
210 } 219 }
@@ -295,6 +304,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
295 .insert = nft_rbtree_insert, 304 .insert = nft_rbtree_insert,
296 .remove = nft_rbtree_remove, 305 .remove = nft_rbtree_remove,
297 .deactivate = nft_rbtree_deactivate, 306 .deactivate = nft_rbtree_deactivate,
307 .deactivate_one = nft_rbtree_deactivate_one,
298 .activate = nft_rbtree_activate, 308 .activate = nft_rbtree_activate,
299 .lookup = nft_rbtree_lookup, 309 .lookup = nft_rbtree_lookup,
300 .walk = nft_rbtree_walk, 310 .walk = nft_rbtree_walk,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index fc4977456c30..2ff499680cc6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
40MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); 40MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
41 41
42#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 42#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
43#define XT_PCPU_BLOCK_SIZE 4096
43 44
44struct compat_delta { 45struct compat_delta {
45 unsigned int offset; /* offset in kernel */ 46 unsigned int offset; /* offset in kernel */
@@ -958,7 +959,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
958 if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) 959 if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
959 info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); 960 info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
960 if (!info) { 961 if (!info) {
961 info = vmalloc(sz); 962 info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN |
963 __GFP_NORETRY | __GFP_HIGHMEM,
964 PAGE_KERNEL);
962 if (!info) 965 if (!info)
963 return NULL; 966 return NULL;
964 } 967 }
@@ -982,7 +985,7 @@ void xt_free_table_info(struct xt_table_info *info)
982} 985}
983EXPORT_SYMBOL(xt_free_table_info); 986EXPORT_SYMBOL(xt_free_table_info);
984 987
985/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ 988/* Find table by name, grabs mutex & ref. Returns NULL on error. */
986struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, 989struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
987 const char *name) 990 const char *name)
988{ 991{
@@ -1615,6 +1618,59 @@ void xt_proto_fini(struct net *net, u_int8_t af)
1615} 1618}
1616EXPORT_SYMBOL_GPL(xt_proto_fini); 1619EXPORT_SYMBOL_GPL(xt_proto_fini);
1617 1620
1621/**
1622 * xt_percpu_counter_alloc - allocate x_tables rule counter
1623 *
1624 * @state: pointer to xt_percpu allocation state
1625 * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
1626 *
1627 * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
1628 * contain the address of the real (percpu) counter.
1629 *
1630 * Rule evaluation needs to use xt_get_this_cpu_counter() helper
1631 * to fetch the real percpu counter.
1632 *
1633 * To speed up allocation and improve data locality, a 4kb block is
1634 * allocated.
1635 *
1636 * xt_percpu_counter_alloc_state contains the base address of the
1637 * allocated page and the current sub-offset.
1638 *
1639 * returns false on error.
1640 */
1641bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
1642 struct xt_counters *counter)
1643{
1644 BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
1645
1646 if (nr_cpu_ids <= 1)
1647 return true;
1648
1649 if (!state->mem) {
1650 state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
1651 XT_PCPU_BLOCK_SIZE);
1652 if (!state->mem)
1653 return false;
1654 }
1655 counter->pcnt = (__force unsigned long)(state->mem + state->off);
1656 state->off += sizeof(*counter);
1657 if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
1658 state->mem = NULL;
1659 state->off = 0;
1660 }
1661 return true;
1662}
1663EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
1664
1665void xt_percpu_counter_free(struct xt_counters *counters)
1666{
1667 unsigned long pcnt = counters->pcnt;
1668
1669 if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
1670 free_percpu((void __percpu *)pcnt);
1671}
1672EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
1673
1618static int __net_init xt_net_init(struct net *net) 1674static int __net_init xt_net_init(struct net *net)
1619{ 1675{
1620 int i; 1676 int i;
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 4973cbddc446..19247a17e511 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -132,9 +132,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
132 goto errout; 132 goto errout;
133 133
134 audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", 134 audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
135 info->type, par->hooknum, skb->len, 135 info->type, xt_hooknum(par), skb->len,
136 par->in ? par->in->name : "?", 136 xt_in(par) ? xt_inname(par) : "?",
137 par->out ? par->out->name : "?"); 137 xt_out(par) ? xt_outname(par) : "?");
138 138
139 if (skb->mark) 139 if (skb->mark)
140 audit_log_format(ab, " mark=%#x", skb->mark); 140 audit_log_format(ab, " mark=%#x", skb->mark);
@@ -144,7 +144,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
144 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 144 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
145 ntohs(eth_hdr(skb)->h_proto)); 145 ntohs(eth_hdr(skb)->h_proto));
146 146
147 if (par->family == NFPROTO_BRIDGE) { 147 if (xt_family(par) == NFPROTO_BRIDGE) {
148 switch (eth_hdr(skb)->h_proto) { 148 switch (eth_hdr(skb)->h_proto) {
149 case htons(ETH_P_IP): 149 case htons(ETH_P_IP):
150 audit_ip4(ab, skb); 150 audit_ip4(ab, skb);
@@ -157,7 +157,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
157 } 157 }
158 } 158 }
159 159
160 switch (par->family) { 160 switch (xt_family(par)) {
161 case NFPROTO_IPV4: 161 case NFPROTO_IPV4:
162 audit_ip4(ab, skb); 162 audit_ip4(ab, skb);
163 break; 163 break;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index e04dc282e3bb..da56c06a443c 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -106,7 +106,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
106 return -EINVAL; 106 return -EINVAL;
107 } 107 }
108 108
109 ret = nf_ct_l3proto_try_module_get(par->family); 109 ret = nf_ct_netns_get(par->net, par->family);
110 if (ret < 0) 110 if (ret < 0)
111 pr_info("cannot load conntrack support for proto=%u\n", 111 pr_info("cannot load conntrack support for proto=%u\n",
112 par->family); 112 par->family);
@@ -115,7 +115,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
115 115
116static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par) 116static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
117{ 117{
118 nf_ct_l3proto_module_put(par->family); 118 nf_ct_netns_put(par->net, par->family);
119} 119}
120 120
121static struct xt_target connsecmark_tg_reg __read_mostly = { 121static struct xt_target connsecmark_tg_reg __read_mostly = {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 6669e68d589e..95c750358747 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -216,7 +216,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
216 goto err1; 216 goto err1;
217#endif 217#endif
218 218
219 ret = nf_ct_l3proto_try_module_get(par->family); 219 ret = nf_ct_netns_get(par->net, par->family);
220 if (ret < 0) 220 if (ret < 0)
221 goto err1; 221 goto err1;
222 222
@@ -260,7 +260,7 @@ out:
260err3: 260err3:
261 nf_ct_tmpl_free(ct); 261 nf_ct_tmpl_free(ct);
262err2: 262err2:
263 nf_ct_l3proto_module_put(par->family); 263 nf_ct_netns_put(par->net, par->family);
264err1: 264err1:
265 return ret; 265 return ret;
266} 266}
@@ -341,7 +341,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
341 if (help) 341 if (help)
342 module_put(help->helper->me); 342 module_put(help->helper->me);
343 343
344 nf_ct_l3proto_module_put(par->family); 344 nf_ct_netns_put(par->net, par->family);
345 345
346 xt_ct_destroy_timeout(ct); 346 xt_ct_destroy_timeout(ct);
347 nf_ct_put(info->ct); 347 nf_ct_put(info->ct);
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index 1763ab82bcd7..c3b2017ebe41 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -32,15 +32,15 @@ static unsigned int
32log_tg(struct sk_buff *skb, const struct xt_action_param *par) 32log_tg(struct sk_buff *skb, const struct xt_action_param *par)
33{ 33{
34 const struct xt_log_info *loginfo = par->targinfo; 34 const struct xt_log_info *loginfo = par->targinfo;
35 struct net *net = xt_net(par);
35 struct nf_loginfo li; 36 struct nf_loginfo li;
36 struct net *net = par->net;
37 37
38 li.type = NF_LOG_TYPE_LOG; 38 li.type = NF_LOG_TYPE_LOG;
39 li.u.log.level = loginfo->level; 39 li.u.log.level = loginfo->level;
40 li.u.log.logflags = loginfo->logflags; 40 li.u.log.logflags = loginfo->logflags;
41 41
42 nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, 42 nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
43 &li, "%s", loginfo->prefix); 43 xt_out(par), &li, "%s", loginfo->prefix);
44 return XT_CONTINUE; 44 return XT_CONTINUE;
45} 45}
46 46
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index b253e07cb1c5..e45a01255e70 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -33,8 +33,8 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
33 netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ 33 netmask.ip6[i] = ~(range->min_addr.ip6[i] ^
34 range->max_addr.ip6[i]); 34 range->max_addr.ip6[i]);
35 35
36 if (par->hooknum == NF_INET_PRE_ROUTING || 36 if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
37 par->hooknum == NF_INET_LOCAL_OUT) 37 xt_hooknum(par) == NF_INET_LOCAL_OUT)
38 new_addr.in6 = ipv6_hdr(skb)->daddr; 38 new_addr.in6 = ipv6_hdr(skb)->daddr;
39 else 39 else
40 new_addr.in6 = ipv6_hdr(skb)->saddr; 40 new_addr.in6 = ipv6_hdr(skb)->saddr;
@@ -51,7 +51,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
51 newrange.min_proto = range->min_proto; 51 newrange.min_proto = range->min_proto;
52 newrange.max_proto = range->max_proto; 52 newrange.max_proto = range->max_proto;
53 53
54 return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); 54 return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
55} 55}
56 56
57static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) 57static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -60,7 +60,12 @@ static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
60 60
61 if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) 61 if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
62 return -EINVAL; 62 return -EINVAL;
63 return 0; 63 return nf_ct_netns_get(par->net, par->family);
64}
65
66static void netmap_tg_destroy(const struct xt_tgdtor_param *par)
67{
68 nf_ct_netns_put(par->net, par->family);
64} 69}
65 70
66static unsigned int 71static unsigned int
@@ -72,16 +77,16 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
72 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; 77 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
73 struct nf_nat_range newrange; 78 struct nf_nat_range newrange;
74 79
75 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || 80 NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
76 par->hooknum == NF_INET_POST_ROUTING || 81 xt_hooknum(par) == NF_INET_POST_ROUTING ||
77 par->hooknum == NF_INET_LOCAL_OUT || 82 xt_hooknum(par) == NF_INET_LOCAL_OUT ||
78 par->hooknum == NF_INET_LOCAL_IN); 83 xt_hooknum(par) == NF_INET_LOCAL_IN);
79 ct = nf_ct_get(skb, &ctinfo); 84 ct = nf_ct_get(skb, &ctinfo);
80 85
81 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); 86 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
82 87
83 if (par->hooknum == NF_INET_PRE_ROUTING || 88 if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
84 par->hooknum == NF_INET_LOCAL_OUT) 89 xt_hooknum(par) == NF_INET_LOCAL_OUT)
85 new_ip = ip_hdr(skb)->daddr & ~netmask; 90 new_ip = ip_hdr(skb)->daddr & ~netmask;
86 else 91 else
87 new_ip = ip_hdr(skb)->saddr & ~netmask; 92 new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -96,7 +101,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
96 newrange.max_proto = mr->range[0].max; 101 newrange.max_proto = mr->range[0].max;
97 102
98 /* Hand modified range to generic setup. */ 103 /* Hand modified range to generic setup. */
99 return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); 104 return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
100} 105}
101 106
102static int netmap_tg4_check(const struct xt_tgchk_param *par) 107static int netmap_tg4_check(const struct xt_tgchk_param *par)
@@ -111,7 +116,7 @@ static int netmap_tg4_check(const struct xt_tgchk_param *par)
111 pr_debug("bad rangesize %u.\n", mr->rangesize); 116 pr_debug("bad rangesize %u.\n", mr->rangesize);
112 return -EINVAL; 117 return -EINVAL;
113 } 118 }
114 return 0; 119 return nf_ct_netns_get(par->net, par->family);
115} 120}
116 121
117static struct xt_target netmap_tg_reg[] __read_mostly = { 122static struct xt_target netmap_tg_reg[] __read_mostly = {
@@ -127,6 +132,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
127 (1 << NF_INET_LOCAL_OUT) | 132 (1 << NF_INET_LOCAL_OUT) |
128 (1 << NF_INET_LOCAL_IN), 133 (1 << NF_INET_LOCAL_IN),
129 .checkentry = netmap_tg6_checkentry, 134 .checkentry = netmap_tg6_checkentry,
135 .destroy = netmap_tg_destroy,
130 .me = THIS_MODULE, 136 .me = THIS_MODULE,
131 }, 137 },
132 { 138 {
@@ -141,6 +147,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
141 (1 << NF_INET_LOCAL_OUT) | 147 (1 << NF_INET_LOCAL_OUT) |
142 (1 << NF_INET_LOCAL_IN), 148 (1 << NF_INET_LOCAL_IN),
143 .checkentry = netmap_tg4_check, 149 .checkentry = netmap_tg4_check,
150 .destroy = netmap_tg_destroy,
144 .me = THIS_MODULE, 151 .me = THIS_MODULE,
145 }, 152 },
146}; 153};
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 8668a5c18dc3..c7f8958cea4a 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -25,8 +25,8 @@ static unsigned int
25nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) 25nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
26{ 26{
27 const struct xt_nflog_info *info = par->targinfo; 27 const struct xt_nflog_info *info = par->targinfo;
28 struct net *net = xt_net(par);
28 struct nf_loginfo li; 29 struct nf_loginfo li;
29 struct net *net = par->net;
30 30
31 li.type = NF_LOG_TYPE_ULOG; 31 li.type = NF_LOG_TYPE_ULOG;
32 li.u.ulog.copy_len = info->len; 32 li.u.ulog.copy_len = info->len;
@@ -37,8 +37,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
37 if (info->flags & XT_NFLOG_F_COPY_LEN) 37 if (info->flags & XT_NFLOG_F_COPY_LEN)
38 li.u.ulog.flags |= NF_LOG_F_COPY_LEN; 38 li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
39 39
40 nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, 40 nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb,
41 par->out, &li, info->prefix); 41 xt_in(par), xt_out(par), &li, info->prefix);
42 return XT_CONTINUE; 42 return XT_CONTINUE;
43} 43}
44 44
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 8f1779ff7e30..a360b99a958a 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -43,7 +43,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
43 43
44 if (info->queues_total > 1) { 44 if (info->queues_total > 1) {
45 queue = nfqueue_hash(skb, queue, info->queues_total, 45 queue = nfqueue_hash(skb, queue, info->queues_total,
46 par->family, jhash_initval); 46 xt_family(par), jhash_initval);
47 } 47 }
48 return NF_QUEUE_NR(queue); 48 return NF_QUEUE_NR(queue);
49} 49}
@@ -98,7 +98,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
98 queue = info->queuenum + cpu % info->queues_total; 98 queue = info->queuenum + cpu % info->queues_total;
99 } else { 99 } else {
100 queue = nfqueue_hash(skb, queue, info->queues_total, 100 queue = nfqueue_hash(skb, queue, info->queues_total,
101 par->family, jhash_initval); 101 xt_family(par), jhash_initval);
102 } 102 }
103 } 103 }
104 104
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dbd6c4a12b97..91a373a3f534 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -63,7 +63,7 @@ void xt_rateest_put(struct xt_rateest *est)
63 mutex_lock(&xt_rateest_mutex); 63 mutex_lock(&xt_rateest_mutex);
64 if (--est->refcnt == 0) { 64 if (--est->refcnt == 0) {
65 hlist_del(&est->list); 65 hlist_del(&est->list);
66 gen_kill_estimator(&est->bstats, &est->rstats); 66 gen_kill_estimator(&est->rate_est);
67 /* 67 /*
68 * gen_estimator est_timer() might access est->lock or bstats, 68 * gen_estimator est_timer() might access est->lock or bstats,
69 * wait a RCU grace period before freeing 'est' 69 * wait a RCU grace period before freeing 'est'
@@ -132,7 +132,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
132 cfg.est.interval = info->interval; 132 cfg.est.interval = info->interval;
133 cfg.est.ewma_log = info->ewma_log; 133 cfg.est.ewma_log = info->ewma_log;
134 134
135 ret = gen_new_estimator(&est->bstats, NULL, &est->rstats, 135 ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est,
136 &est->lock, NULL, &cfg.opt); 136 &est->lock, NULL, &cfg.opt);
137 if (ret < 0) 137 if (ret < 0)
138 goto err2; 138 goto err2;
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 03f0b370e178..98a4c6d4f1cb 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c
@@ -31,7 +31,7 @@
31static unsigned int 31static unsigned int
32redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) 32redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
33{ 33{
34 return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum); 34 return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par));
35} 35}
36 36
37static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) 37static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -40,7 +40,13 @@ static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
40 40
41 if (range->flags & NF_NAT_RANGE_MAP_IPS) 41 if (range->flags & NF_NAT_RANGE_MAP_IPS)
42 return -EINVAL; 42 return -EINVAL;
43 return 0; 43
44 return nf_ct_netns_get(par->net, par->family);
45}
46
47static void redirect_tg_destroy(const struct xt_tgdtor_param *par)
48{
49 nf_ct_netns_put(par->net, par->family);
44} 50}
45 51
46/* FIXME: Take multiple ranges --RR */ 52/* FIXME: Take multiple ranges --RR */
@@ -56,13 +62,13 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par)
56 pr_debug("bad rangesize %u.\n", mr->rangesize); 62 pr_debug("bad rangesize %u.\n", mr->rangesize);
57 return -EINVAL; 63 return -EINVAL;
58 } 64 }
59 return 0; 65 return nf_ct_netns_get(par->net, par->family);
60} 66}
61 67
62static unsigned int 68static unsigned int
63redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) 69redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par)
64{ 70{
65 return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum); 71 return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par));
66} 72}
67 73
68static struct xt_target redirect_tg_reg[] __read_mostly = { 74static struct xt_target redirect_tg_reg[] __read_mostly = {
@@ -72,6 +78,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = {
72 .revision = 0, 78 .revision = 0,
73 .table = "nat", 79 .table = "nat",
74 .checkentry = redirect_tg6_checkentry, 80 .checkentry = redirect_tg6_checkentry,
81 .destroy = redirect_tg_destroy,
75 .target = redirect_tg6, 82 .target = redirect_tg6,
76 .targetsize = sizeof(struct nf_nat_range), 83 .targetsize = sizeof(struct nf_nat_range),
77 .hooks = (1 << NF_INET_PRE_ROUTING) | 84 .hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -85,6 +92,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = {
85 .table = "nat", 92 .table = "nat",
86 .target = redirect_tg4, 93 .target = redirect_tg4,
87 .checkentry = redirect_tg4_check, 94 .checkentry = redirect_tg4_check,
95 .destroy = redirect_tg_destroy,
88 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), 96 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
89 .hooks = (1 << NF_INET_PRE_ROUTING) | 97 .hooks = (1 << NF_INET_PRE_ROUTING) |
90 (1 << NF_INET_LOCAL_OUT), 98 (1 << NF_INET_LOCAL_OUT),
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 872db2d0e2a9..27241a767f17 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
108 return -1; 108 return -1;
109 109
110 if (info->mss == XT_TCPMSS_CLAMP_PMTU) { 110 if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
111 struct net *net = par->net; 111 struct net *net = xt_net(par);
112 unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); 112 unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
113 unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); 113 unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
114 114
@@ -172,7 +172,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
172 * length IPv6 header of 60, ergo the default MSS value is 1220 172 * length IPv6 header of 60, ergo the default MSS value is 1220
173 * Since no MSS was provided, we must use the default values 173 * Since no MSS was provided, we must use the default values
174 */ 174 */
175 if (par->family == NFPROTO_IPV4) 175 if (xt_family(par) == NFPROTO_IPV4)
176 newmss = min(newmss, (u16)536); 176 newmss = min(newmss, (u16)536);
177 else 177 else
178 newmss = min(newmss, (u16)1220); 178 newmss = min(newmss, (u16)1220);
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 0471db4032c5..1c57ace75ae6 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -33,7 +33,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
33 const struct xt_tee_tginfo *info = par->targinfo; 33 const struct xt_tee_tginfo *info = par->targinfo;
34 int oif = info->priv ? info->priv->oif : 0; 34 int oif = info->priv ? info->priv->oif : 0;
35 35
36 nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif); 36 nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif);
37 37
38 return XT_CONTINUE; 38 return XT_CONTINUE;
39} 39}
@@ -45,7 +45,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
45 const struct xt_tee_tginfo *info = par->targinfo; 45 const struct xt_tee_tginfo *info = par->targinfo;
46 int oif = info->priv ? info->priv->oif : 0; 46 int oif = info->priv ? info->priv->oif : 0;
47 47
48 nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif); 48 nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif);
49 49
50 return XT_CONTINUE; 50 return XT_CONTINUE;
51} 51}
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 663c4c3c9072..80cb7babeb64 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -364,7 +364,8 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
364{ 364{
365 const struct xt_tproxy_target_info *tgi = par->targinfo; 365 const struct xt_tproxy_target_info *tgi = par->targinfo;
366 366
367 return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); 367 return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport,
368 tgi->mark_mask, tgi->mark_value);
368} 369}
369 370
370static unsigned int 371static unsigned int
@@ -372,7 +373,8 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
372{ 373{
373 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; 374 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
374 375
375 return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); 376 return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport,
377 tgi->mark_mask, tgi->mark_value);
376} 378}
377 379
378#ifdef XT_TPROXY_HAVE_IPV6 380#ifdef XT_TPROXY_HAVE_IPV6
@@ -442,7 +444,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
442 * to a listener socket if there's one */ 444 * to a listener socket if there's one */
443 struct sock *sk2; 445 struct sock *sk2;
444 446
445 sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, 447 sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
446 &iph->saddr, 448 &iph->saddr,
447 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), 449 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
448 hp->source, 450 hp->source,
@@ -485,10 +487,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
485 * addresses, this happens if the redirect already happened 487 * addresses, this happens if the redirect already happened
486 * and the current packet belongs to an already established 488 * and the current packet belongs to an already established
487 * connection */ 489 * connection */
488 sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, 490 sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
489 &iph->saddr, &iph->daddr, 491 &iph->saddr, &iph->daddr,
490 hp->source, hp->dest, 492 hp->source, hp->dest,
491 par->in, NFT_LOOKUP_ESTABLISHED); 493 xt_in(par), NFT_LOOKUP_ESTABLISHED);
492 494
493 laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); 495 laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
494 lport = tgi->lport ? tgi->lport : hp->dest; 496 lport = tgi->lport ? tgi->lport : hp->dest;
@@ -500,10 +502,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
500 else if (!sk) 502 else if (!sk)
501 /* no there's no established connection, check if 503 /* no there's no established connection, check if
502 * there's a listener on the redirected addr/port */ 504 * there's a listener on the redirected addr/port */
503 sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, 505 sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
504 tproto, &iph->saddr, laddr, 506 tproto, &iph->saddr, laddr,
505 hp->source, lport, 507 hp->source, lport,
506 par->in, NFT_LOOKUP_LISTENER); 508 xt_in(par), NFT_LOOKUP_LISTENER);
507 509
508 /* NOTE: assign_sock consumes our sk reference */ 510 /* NOTE: assign_sock consumes our sk reference */
509 if (sk && tproxy_sk_is_transparent(sk)) { 511 if (sk && tproxy_sk_is_transparent(sk)) {
@@ -529,6 +531,11 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
529static int tproxy_tg6_check(const struct xt_tgchk_param *par) 531static int tproxy_tg6_check(const struct xt_tgchk_param *par)
530{ 532{
531 const struct ip6t_ip6 *i = par->entryinfo; 533 const struct ip6t_ip6 *i = par->entryinfo;
534 int err;
535
536 err = nf_defrag_ipv6_enable(par->net);
537 if (err)
538 return err;
532 539
533 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && 540 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) &&
534 !(i->invflags & IP6T_INV_PROTO)) 541 !(i->invflags & IP6T_INV_PROTO))
@@ -543,6 +550,11 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
543static int tproxy_tg4_check(const struct xt_tgchk_param *par) 550static int tproxy_tg4_check(const struct xt_tgchk_param *par)
544{ 551{
545 const struct ipt_ip *i = par->entryinfo; 552 const struct ipt_ip *i = par->entryinfo;
553 int err;
554
555 err = nf_defrag_ipv4_enable(par->net);
556 if (err)
557 return err;
546 558
547 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) 559 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
548 && !(i->invflags & IPT_INV_PROTO)) 560 && !(i->invflags & IPT_INV_PROTO))
@@ -594,11 +606,6 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
594 606
595static int __init tproxy_tg_init(void) 607static int __init tproxy_tg_init(void)
596{ 608{
597 nf_defrag_ipv4_enable();
598#ifdef XT_TPROXY_HAVE_IPV6
599 nf_defrag_ipv6_enable();
600#endif
601
602 return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); 609 return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
603} 610}
604 611
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 11d6091991a4..e329dabde35f 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
125static bool 125static bool
126addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) 126addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
127{ 127{
128 struct net *net = par->net; 128 struct net *net = xt_net(par);
129 const struct xt_addrtype_info *info = par->matchinfo; 129 const struct xt_addrtype_info *info = par->matchinfo;
130 const struct iphdr *iph = ip_hdr(skb); 130 const struct iphdr *iph = ip_hdr(skb);
131 bool ret = true; 131 bool ret = true;
@@ -143,19 +143,19 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
143static bool 143static bool
144addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) 144addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
145{ 145{
146 struct net *net = par->net; 146 struct net *net = xt_net(par);
147 const struct xt_addrtype_info_v1 *info = par->matchinfo; 147 const struct xt_addrtype_info_v1 *info = par->matchinfo;
148 const struct iphdr *iph; 148 const struct iphdr *iph;
149 const struct net_device *dev = NULL; 149 const struct net_device *dev = NULL;
150 bool ret = true; 150 bool ret = true;
151 151
152 if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) 152 if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
153 dev = par->in; 153 dev = xt_in(par);
154 else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) 154 else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
155 dev = par->out; 155 dev = xt_out(par);
156 156
157#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 157#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
158 if (par->family == NFPROTO_IPV6) 158 if (xt_family(par) == NFPROTO_IPV6)
159 return addrtype_mt6(net, dev, skb, info); 159 return addrtype_mt6(net, dev, skb, info);
160#endif 160#endif
161 iph = ip_hdr(skb); 161 iph = ip_hdr(skb);
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index dffee9d47ec4..2dedaa23ab0a 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -10,6 +10,7 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/filter.h> 12#include <linux/filter.h>
13#include <linux/bpf.h>
13 14
14#include <linux/netfilter/xt_bpf.h> 15#include <linux/netfilter/xt_bpf.h>
15#include <linux/netfilter/x_tables.h> 16#include <linux/netfilter/x_tables.h>
@@ -20,15 +21,15 @@ MODULE_LICENSE("GPL");
20MODULE_ALIAS("ipt_bpf"); 21MODULE_ALIAS("ipt_bpf");
21MODULE_ALIAS("ip6t_bpf"); 22MODULE_ALIAS("ip6t_bpf");
22 23
23static int bpf_mt_check(const struct xt_mtchk_param *par) 24static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
25 struct bpf_prog **ret)
24{ 26{
25 struct xt_bpf_info *info = par->matchinfo;
26 struct sock_fprog_kern program; 27 struct sock_fprog_kern program;
27 28
28 program.len = info->bpf_program_num_elem; 29 program.len = len;
29 program.filter = info->bpf_program; 30 program.filter = insns;
30 31
31 if (bpf_prog_create(&info->filter, &program)) { 32 if (bpf_prog_create(ret, &program)) {
32 pr_info("bpf: check failed: parse error\n"); 33 pr_info("bpf: check failed: parse error\n");
33 return -EINVAL; 34 return -EINVAL;
34 } 35 }
@@ -36,6 +37,42 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
36 return 0; 37 return 0;
37} 38}
38 39
40static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
41{
42 struct bpf_prog *prog;
43
44 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
45 if (IS_ERR(prog))
46 return PTR_ERR(prog);
47
48 *ret = prog;
49 return 0;
50}
51
52static int bpf_mt_check(const struct xt_mtchk_param *par)
53{
54 struct xt_bpf_info *info = par->matchinfo;
55
56 return __bpf_mt_check_bytecode(info->bpf_program,
57 info->bpf_program_num_elem,
58 &info->filter);
59}
60
61static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
62{
63 struct xt_bpf_info_v1 *info = par->matchinfo;
64
65 if (info->mode == XT_BPF_MODE_BYTECODE)
66 return __bpf_mt_check_bytecode(info->bpf_program,
67 info->bpf_program_num_elem,
68 &info->filter);
69 else if (info->mode == XT_BPF_MODE_FD_PINNED ||
70 info->mode == XT_BPF_MODE_FD_ELF)
71 return __bpf_mt_check_fd(info->fd, &info->filter);
72 else
73 return -EINVAL;
74}
75
39static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) 76static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
40{ 77{
41 const struct xt_bpf_info *info = par->matchinfo; 78 const struct xt_bpf_info *info = par->matchinfo;
@@ -43,31 +80,58 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
43 return BPF_PROG_RUN(info->filter, skb); 80 return BPF_PROG_RUN(info->filter, skb);
44} 81}
45 82
83static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
84{
85 const struct xt_bpf_info_v1 *info = par->matchinfo;
86
87 return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb);
88}
89
46static void bpf_mt_destroy(const struct xt_mtdtor_param *par) 90static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
47{ 91{
48 const struct xt_bpf_info *info = par->matchinfo; 92 const struct xt_bpf_info *info = par->matchinfo;
93
94 bpf_prog_destroy(info->filter);
95}
96
97static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par)
98{
99 const struct xt_bpf_info_v1 *info = par->matchinfo;
100
49 bpf_prog_destroy(info->filter); 101 bpf_prog_destroy(info->filter);
50} 102}
51 103
52static struct xt_match bpf_mt_reg __read_mostly = { 104static struct xt_match bpf_mt_reg[] __read_mostly = {
53 .name = "bpf", 105 {
54 .revision = 0, 106 .name = "bpf",
55 .family = NFPROTO_UNSPEC, 107 .revision = 0,
56 .checkentry = bpf_mt_check, 108 .family = NFPROTO_UNSPEC,
57 .match = bpf_mt, 109 .checkentry = bpf_mt_check,
58 .destroy = bpf_mt_destroy, 110 .match = bpf_mt,
59 .matchsize = sizeof(struct xt_bpf_info), 111 .destroy = bpf_mt_destroy,
60 .me = THIS_MODULE, 112 .matchsize = sizeof(struct xt_bpf_info),
113 .me = THIS_MODULE,
114 },
115 {
116 .name = "bpf",
117 .revision = 1,
118 .family = NFPROTO_UNSPEC,
119 .checkentry = bpf_mt_check_v1,
120 .match = bpf_mt_v1,
121 .destroy = bpf_mt_destroy_v1,
122 .matchsize = sizeof(struct xt_bpf_info_v1),
123 .me = THIS_MODULE,
124 },
61}; 125};
62 126
63static int __init bpf_mt_init(void) 127static int __init bpf_mt_init(void)
64{ 128{
65 return xt_register_match(&bpf_mt_reg); 129 return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
66} 130}
67 131
68static void __exit bpf_mt_exit(void) 132static void __exit bpf_mt_exit(void)
69{ 133{
70 xt_unregister_match(&bpf_mt_reg); 134 xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
71} 135}
72 136
73module_init(bpf_mt_init); 137module_init(bpf_mt_init);
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 96fa26b20b67..9a9884a39c0e 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -112,7 +112,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
112 * know, matches should not alter packets, but we are doing this here 112 * know, matches should not alter packets, but we are doing this here
113 * because we would need to add a PKTTYPE target for this sole purpose. 113 * because we would need to add a PKTTYPE target for this sole purpose.
114 */ 114 */
115 if (!xt_cluster_is_multicast_addr(skb, par->family) && 115 if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) &&
116 skb->pkt_type == PACKET_MULTICAST) { 116 skb->pkt_type == PACKET_MULTICAST) {
117 pskb->pkt_type = PACKET_HOST; 117 pskb->pkt_type = PACKET_HOST;
118 } 118 }
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index d4bec261e74e..cad0b7b5eb35 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -110,7 +110,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
110 sinfo->direction != XT_CONNBYTES_DIR_BOTH) 110 sinfo->direction != XT_CONNBYTES_DIR_BOTH)
111 return -EINVAL; 111 return -EINVAL;
112 112
113 ret = nf_ct_l3proto_try_module_get(par->family); 113 ret = nf_ct_netns_get(par->net, par->family);
114 if (ret < 0) 114 if (ret < 0)
115 pr_info("cannot load conntrack support for proto=%u\n", 115 pr_info("cannot load conntrack support for proto=%u\n",
116 par->family); 116 par->family);
@@ -129,7 +129,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
129 129
130static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) 130static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
131{ 131{
132 nf_ct_l3proto_module_put(par->family); 132 nf_ct_netns_put(par->net, par->family);
133} 133}
134 134
135static struct xt_match connbytes_mt_reg __read_mostly = { 135static struct xt_match connbytes_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index 03d66f1c5e69..7827128d5a95 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c
@@ -61,7 +61,7 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par)
61 return -EINVAL; 61 return -EINVAL;
62 } 62 }
63 63
64 ret = nf_ct_l3proto_try_module_get(par->family); 64 ret = nf_ct_netns_get(par->net, par->family);
65 if (ret < 0) { 65 if (ret < 0) {
66 pr_info("cannot load conntrack support for proto=%u\n", 66 pr_info("cannot load conntrack support for proto=%u\n",
67 par->family); 67 par->family);
@@ -70,14 +70,14 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par)
70 70
71 ret = nf_connlabels_get(par->net, info->bit); 71 ret = nf_connlabels_get(par->net, info->bit);
72 if (ret < 0) 72 if (ret < 0)
73 nf_ct_l3proto_module_put(par->family); 73 nf_ct_netns_put(par->net, par->family);
74 return ret; 74 return ret;
75} 75}
76 76
77static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) 77static void connlabel_mt_destroy(const struct xt_mtdtor_param *par)
78{ 78{
79 nf_connlabels_put(par->net); 79 nf_connlabels_put(par->net);
80 nf_ct_l3proto_module_put(par->family); 80 nf_ct_netns_put(par->net, par->family);
81} 81}
82 82
83static struct xt_match connlabels_mt_reg __read_mostly = { 83static struct xt_match connlabels_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b6dc322593a3..2aff2b7c4689 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -317,7 +317,7 @@ static int count_them(struct net *net,
317static bool 317static bool
318connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) 318connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
319{ 319{
320 struct net *net = par->net; 320 struct net *net = xt_net(par);
321 const struct xt_connlimit_info *info = par->matchinfo; 321 const struct xt_connlimit_info *info = par->matchinfo;
322 union nf_inet_addr addr; 322 union nf_inet_addr addr;
323 struct nf_conntrack_tuple tuple; 323 struct nf_conntrack_tuple tuple;
@@ -332,11 +332,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
332 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 332 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
333 zone = nf_ct_zone(ct); 333 zone = nf_ct_zone(ct);
334 } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), 334 } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
335 par->family, net, &tuple)) { 335 xt_family(par), net, &tuple)) {
336 goto hotdrop; 336 goto hotdrop;
337 } 337 }
338 338
339 if (par->family == NFPROTO_IPV6) { 339 if (xt_family(par) == NFPROTO_IPV6) {
340 const struct ipv6hdr *iph = ipv6_hdr(skb); 340 const struct ipv6hdr *iph = ipv6_hdr(skb);
341 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? 341 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
342 &iph->daddr : &iph->saddr, sizeof(addr.ip6)); 342 &iph->daddr : &iph->saddr, sizeof(addr.ip6));
@@ -347,7 +347,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
347 } 347 }
348 348
349 connections = count_them(net, info->data, tuple_ptr, &addr, 349 connections = count_them(net, info->data, tuple_ptr, &addr,
350 &info->mask, par->family, zone); 350 &info->mask, xt_family(par), zone);
351 if (connections == 0) 351 if (connections == 0)
352 /* kmalloc failed, drop it entirely */ 352 /* kmalloc failed, drop it entirely */
353 goto hotdrop; 353 goto hotdrop;
@@ -368,7 +368,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
368 368
369 net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); 369 net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd));
370 370
371 ret = nf_ct_l3proto_try_module_get(par->family); 371 ret = nf_ct_netns_get(par->net, par->family);
372 if (ret < 0) { 372 if (ret < 0) {
373 pr_info("cannot load conntrack support for " 373 pr_info("cannot load conntrack support for "
374 "address family %u\n", par->family); 374 "address family %u\n", par->family);
@@ -378,7 +378,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
378 /* init private data */ 378 /* init private data */
379 info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); 379 info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
380 if (info->data == NULL) { 380 if (info->data == NULL) {
381 nf_ct_l3proto_module_put(par->family); 381 nf_ct_netns_put(par->net, par->family);
382 return -ENOMEM; 382 return -ENOMEM;
383 } 383 }
384 384
@@ -414,7 +414,7 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
414 const struct xt_connlimit_info *info = par->matchinfo; 414 const struct xt_connlimit_info *info = par->matchinfo;
415 unsigned int i; 415 unsigned int i;
416 416
417 nf_ct_l3proto_module_put(par->family); 417 nf_ct_netns_put(par->net, par->family);
418 418
419 for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) 419 for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
420 destroy_tree(&info->data->climit_root4[i]); 420 destroy_tree(&info->data->climit_root4[i]);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index b83e158e116a..9935d5029b0e 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -77,7 +77,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
77{ 77{
78 int ret; 78 int ret;
79 79
80 ret = nf_ct_l3proto_try_module_get(par->family); 80 ret = nf_ct_netns_get(par->net, par->family);
81 if (ret < 0) 81 if (ret < 0)
82 pr_info("cannot load conntrack support for proto=%u\n", 82 pr_info("cannot load conntrack support for proto=%u\n",
83 par->family); 83 par->family);
@@ -86,7 +86,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
86 86
87static void connmark_tg_destroy(const struct xt_tgdtor_param *par) 87static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
88{ 88{
89 nf_ct_l3proto_module_put(par->family); 89 nf_ct_netns_put(par->net, par->family);
90} 90}
91 91
92static bool 92static bool
@@ -107,7 +107,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
107{ 107{
108 int ret; 108 int ret;
109 109
110 ret = nf_ct_l3proto_try_module_get(par->family); 110 ret = nf_ct_netns_get(par->net, par->family);
111 if (ret < 0) 111 if (ret < 0)
112 pr_info("cannot load conntrack support for proto=%u\n", 112 pr_info("cannot load conntrack support for proto=%u\n",
113 par->family); 113 par->family);
@@ -116,7 +116,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
116 116
117static void connmark_mt_destroy(const struct xt_mtdtor_param *par) 117static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
118{ 118{
119 nf_ct_l3proto_module_put(par->family); 119 nf_ct_netns_put(par->net, par->family);
120} 120}
121 121
122static struct xt_target connmark_tg_reg __read_mostly = { 122static struct xt_target connmark_tg_reg __read_mostly = {
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index a3b8f697cfc5..c0fb217bc649 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -200,22 +200,22 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
200 return false; 200 return false;
201 201
202 if (info->match_flags & XT_CONNTRACK_ORIGSRC) 202 if (info->match_flags & XT_CONNTRACK_ORIGSRC)
203 if (conntrack_mt_origsrc(ct, info, par->family) ^ 203 if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^
204 !(info->invert_flags & XT_CONNTRACK_ORIGSRC)) 204 !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
205 return false; 205 return false;
206 206
207 if (info->match_flags & XT_CONNTRACK_ORIGDST) 207 if (info->match_flags & XT_CONNTRACK_ORIGDST)
208 if (conntrack_mt_origdst(ct, info, par->family) ^ 208 if (conntrack_mt_origdst(ct, info, xt_family(par)) ^
209 !(info->invert_flags & XT_CONNTRACK_ORIGDST)) 209 !(info->invert_flags & XT_CONNTRACK_ORIGDST))
210 return false; 210 return false;
211 211
212 if (info->match_flags & XT_CONNTRACK_REPLSRC) 212 if (info->match_flags & XT_CONNTRACK_REPLSRC)
213 if (conntrack_mt_replsrc(ct, info, par->family) ^ 213 if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^
214 !(info->invert_flags & XT_CONNTRACK_REPLSRC)) 214 !(info->invert_flags & XT_CONNTRACK_REPLSRC))
215 return false; 215 return false;
216 216
217 if (info->match_flags & XT_CONNTRACK_REPLDST) 217 if (info->match_flags & XT_CONNTRACK_REPLDST)
218 if (conntrack_mt_repldst(ct, info, par->family) ^ 218 if (conntrack_mt_repldst(ct, info, xt_family(par)) ^
219 !(info->invert_flags & XT_CONNTRACK_REPLDST)) 219 !(info->invert_flags & XT_CONNTRACK_REPLDST))
220 return false; 220 return false;
221 221
@@ -271,7 +271,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
271{ 271{
272 int ret; 272 int ret;
273 273
274 ret = nf_ct_l3proto_try_module_get(par->family); 274 ret = nf_ct_netns_get(par->net, par->family);
275 if (ret < 0) 275 if (ret < 0)
276 pr_info("cannot load conntrack support for proto=%u\n", 276 pr_info("cannot load conntrack support for proto=%u\n",
277 par->family); 277 par->family);
@@ -280,7 +280,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
280 280
281static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) 281static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
282{ 282{
283 nf_ct_l3proto_module_put(par->family); 283 nf_ct_netns_put(par->net, par->family);
284} 284}
285 285
286static struct xt_match conntrack_mt_reg[] __read_mostly = { 286static struct xt_match conntrack_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
index d9202cdd25c9..96ebe1cdefec 100644
--- a/net/netfilter/xt_devgroup.c
+++ b/net/netfilter/xt_devgroup.c
@@ -24,12 +24,12 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
24 const struct xt_devgroup_info *info = par->matchinfo; 24 const struct xt_devgroup_info *info = par->matchinfo;
25 25
26 if (info->flags & XT_DEVGROUP_MATCH_SRC && 26 if (info->flags & XT_DEVGROUP_MATCH_SRC &&
27 (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^ 27 (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^
28 ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0))) 28 ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
29 return false; 29 return false;
30 30
31 if (info->flags & XT_DEVGROUP_MATCH_DST && 31 if (info->flags & XT_DEVGROUP_MATCH_DST &&
32 (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^ 32 (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^
33 ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0))) 33 ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
34 return false; 34 return false;
35 35
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 64670fc5d0e1..236ac8008909 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
58{ 58{
59 const struct xt_tos_match_info *info = par->matchinfo; 59 const struct xt_tos_match_info *info = par->matchinfo;
60 60
61 if (par->family == NFPROTO_IPV4) 61 if (xt_family(par) == NFPROTO_IPV4)
62 return ((ip_hdr(skb)->tos & info->tos_mask) == 62 return ((ip_hdr(skb)->tos & info->tos_mask) ==
63 info->tos_value) ^ !!info->invert; 63 info->tos_value) ^ !!info->invert;
64 else 64 else
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b89b688e9d01..10063408141d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -49,7 +49,7 @@ struct hashlimit_net {
49 struct proc_dir_entry *ip6t_hashlimit; 49 struct proc_dir_entry *ip6t_hashlimit;
50}; 50};
51 51
52static int hashlimit_net_id; 52static unsigned int hashlimit_net_id;
53static inline struct hashlimit_net *hashlimit_pernet(struct net *net) 53static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
54{ 54{
55 return net_generic(net, hashlimit_net_id); 55 return net_generic(net, hashlimit_net_id);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index f679dd4c272a..38a78151c0e9 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -59,7 +59,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
59 struct xt_helper_info *info = par->matchinfo; 59 struct xt_helper_info *info = par->matchinfo;
60 int ret; 60 int ret;
61 61
62 ret = nf_ct_l3proto_try_module_get(par->family); 62 ret = nf_ct_netns_get(par->net, par->family);
63 if (ret < 0) { 63 if (ret < 0) {
64 pr_info("cannot load conntrack support for proto=%u\n", 64 pr_info("cannot load conntrack support for proto=%u\n",
65 par->family); 65 par->family);
@@ -71,7 +71,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
71 71
72static void helper_mt_destroy(const struct xt_mtdtor_param *par) 72static void helper_mt_destroy(const struct xt_mtdtor_param *par)
73{ 73{
74 nf_ct_l3proto_module_put(par->family); 74 nf_ct_netns_put(par->net, par->family);
75} 75}
76 76
77static struct xt_match helper_mt_reg __read_mostly = { 77static struct xt_match helper_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 71a9d95e0a81..0fdc89064488 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -48,9 +48,9 @@ static bool
48ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) 48ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
49{ 49{
50 const struct xt_ipvs_mtinfo *data = par->matchinfo; 50 const struct xt_ipvs_mtinfo *data = par->matchinfo;
51 struct netns_ipvs *ipvs = net_ipvs(par->net); 51 struct netns_ipvs *ipvs = net_ipvs(xt_net(par));
52 /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ 52 /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
53 const u_int8_t family = par->family; 53 const u_int8_t family = xt_family(par);
54 struct ip_vs_iphdr iph; 54 struct ip_vs_iphdr iph;
55 struct ip_vs_protocol *pp; 55 struct ip_vs_protocol *pp;
56 struct ip_vs_conn *cp; 56 struct ip_vs_conn *cp;
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index ac1d3c3d09e7..1cde0e4985b7 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -42,29 +42,43 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
42 e = minfo->ports[++i]; 42 e = minfo->ports[++i];
43 pr_debug("src or dst matches with %d-%d?\n", s, e); 43 pr_debug("src or dst matches with %d-%d?\n", s, e);
44 44
45 if (minfo->flags == XT_MULTIPORT_SOURCE 45 switch (minfo->flags) {
46 && src >= s && src <= e) 46 case XT_MULTIPORT_SOURCE:
47 return true ^ minfo->invert; 47 if (src >= s && src <= e)
48 if (minfo->flags == XT_MULTIPORT_DESTINATION 48 return true ^ minfo->invert;
49 && dst >= s && dst <= e) 49 break;
50 return true ^ minfo->invert; 50 case XT_MULTIPORT_DESTINATION:
51 if (minfo->flags == XT_MULTIPORT_EITHER 51 if (dst >= s && dst <= e)
52 && ((dst >= s && dst <= e) 52 return true ^ minfo->invert;
53 || (src >= s && src <= e))) 53 break;
54 return true ^ minfo->invert; 54 case XT_MULTIPORT_EITHER:
55 if ((dst >= s && dst <= e) ||
56 (src >= s && src <= e))
57 return true ^ minfo->invert;
58 break;
59 default:
60 break;
61 }
55 } else { 62 } else {
56 /* exact port matching */ 63 /* exact port matching */
57 pr_debug("src or dst matches with %d?\n", s); 64 pr_debug("src or dst matches with %d?\n", s);
58 65
59 if (minfo->flags == XT_MULTIPORT_SOURCE 66 switch (minfo->flags) {
60 && src == s) 67 case XT_MULTIPORT_SOURCE:
61 return true ^ minfo->invert; 68 if (src == s)
62 if (minfo->flags == XT_MULTIPORT_DESTINATION 69 return true ^ minfo->invert;
63 && dst == s) 70 break;
64 return true ^ minfo->invert; 71 case XT_MULTIPORT_DESTINATION:
65 if (minfo->flags == XT_MULTIPORT_EITHER 72 if (dst == s)
66 && (src == s || dst == s)) 73 return true ^ minfo->invert;
67 return true ^ minfo->invert; 74 break;
75 case XT_MULTIPORT_EITHER:
76 if (src == s || dst == s)
77 return true ^ minfo->invert;
78 break;
79 default:
80 break;
81 }
68 } 82 }
69 } 83 }
70 84
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index bea7464cc43f..8107b3eb865f 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -23,7 +23,17 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
23 par->target->name); 23 par->target->name);
24 return -EINVAL; 24 return -EINVAL;
25 } 25 }
26 return 0; 26 return nf_ct_netns_get(par->net, par->family);
27}
28
29static int xt_nat_checkentry(const struct xt_tgchk_param *par)
30{
31 return nf_ct_netns_get(par->net, par->family);
32}
33
34static void xt_nat_destroy(const struct xt_tgdtor_param *par)
35{
36 nf_ct_netns_put(par->net, par->family);
27} 37}
28 38
29static void xt_nat_convert_range(struct nf_nat_range *dst, 39static void xt_nat_convert_range(struct nf_nat_range *dst,
@@ -106,6 +116,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
106 .name = "SNAT", 116 .name = "SNAT",
107 .revision = 0, 117 .revision = 0,
108 .checkentry = xt_nat_checkentry_v0, 118 .checkentry = xt_nat_checkentry_v0,
119 .destroy = xt_nat_destroy,
109 .target = xt_snat_target_v0, 120 .target = xt_snat_target_v0,
110 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), 121 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
111 .family = NFPROTO_IPV4, 122 .family = NFPROTO_IPV4,
@@ -118,6 +129,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
118 .name = "DNAT", 129 .name = "DNAT",
119 .revision = 0, 130 .revision = 0,
120 .checkentry = xt_nat_checkentry_v0, 131 .checkentry = xt_nat_checkentry_v0,
132 .destroy = xt_nat_destroy,
121 .target = xt_dnat_target_v0, 133 .target = xt_dnat_target_v0,
122 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), 134 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
123 .family = NFPROTO_IPV4, 135 .family = NFPROTO_IPV4,
@@ -129,6 +141,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
129 { 141 {
130 .name = "SNAT", 142 .name = "SNAT",
131 .revision = 1, 143 .revision = 1,
144 .checkentry = xt_nat_checkentry,
145 .destroy = xt_nat_destroy,
132 .target = xt_snat_target_v1, 146 .target = xt_snat_target_v1,
133 .targetsize = sizeof(struct nf_nat_range), 147 .targetsize = sizeof(struct nf_nat_range),
134 .table = "nat", 148 .table = "nat",
@@ -139,6 +153,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
139 { 153 {
140 .name = "DNAT", 154 .name = "DNAT",
141 .revision = 1, 155 .revision = 1,
156 .checkentry = xt_nat_checkentry,
157 .destroy = xt_nat_destroy,
142 .target = xt_dnat_target_v1, 158 .target = xt_dnat_target_v1,
143 .targetsize = sizeof(struct nf_nat_range), 159 .targetsize = sizeof(struct nf_nat_range),
144 .table = "nat", 160 .table = "nat",
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index cf327593852a..cc0518fe598e 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
26 26
27 nfnl_acct_update(skb, info->nfacct); 27 nfnl_acct_update(skb, info->nfacct);
28 28
29 overquota = nfnl_acct_overquota(par->net, skb, info->nfacct); 29 overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct);
30 30
31 return overquota == NFACCT_UNDERQUOTA ? false : true; 31 return overquota == NFACCT_UNDERQUOTA ? false : true;
32} 32}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 2455b69b5810..c05fefcec238 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -201,7 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
201 unsigned char opts[MAX_IPOPTLEN]; 201 unsigned char opts[MAX_IPOPTLEN];
202 const struct xt_osf_finger *kf; 202 const struct xt_osf_finger *kf;
203 const struct xt_osf_user_finger *f; 203 const struct xt_osf_user_finger *f;
204 struct net *net = p->net; 204 struct net *net = xt_net(p);
205 205
206 if (!info) 206 if (!info)
207 return false; 207 return false;
@@ -326,8 +326,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
326 fcount++; 326 fcount++;
327 327
328 if (info->flags & XT_OSF_LOG) 328 if (info->flags & XT_OSF_LOG)
329 nf_log_packet(net, p->family, p->hooknum, skb, 329 nf_log_packet(net, xt_family(p), xt_hooknum(p), skb,
330 p->in, p->out, NULL, 330 xt_in(p), xt_out(p), NULL,
331 "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", 331 "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
332 f->genre, f->version, f->subtype, 332 f->genre, f->version, f->subtype,
333 &ip->saddr, ntohs(tcp->source), 333 &ip->saddr, ntohs(tcp->source),
@@ -341,8 +341,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
341 rcu_read_unlock(); 341 rcu_read_unlock();
342 342
343 if (!fcount && (info->flags & XT_OSF_LOG)) 343 if (!fcount && (info->flags & XT_OSF_LOG))
344 nf_log_packet(net, p->family, p->hooknum, skb, p->in, 344 nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
345 p->out, NULL, 345 xt_out(p), NULL,
346 "Remote OS is not known: %pI4:%u -> %pI4:%u\n", 346 "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
347 &ip->saddr, ntohs(tcp->source), 347 &ip->saddr, ntohs(tcp->source),
348 &ip->daddr, ntohs(tcp->dest)); 348 &ip->daddr, ntohs(tcp->dest));
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index a20e731b5b6c..16477df45b3b 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -63,7 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
63 const struct xt_owner_match_info *info = par->matchinfo; 63 const struct xt_owner_match_info *info = par->matchinfo;
64 const struct file *filp; 64 const struct file *filp;
65 struct sock *sk = skb_to_full_sk(skb); 65 struct sock *sk = skb_to_full_sk(skb);
66 struct net *net = par->net; 66 struct net *net = xt_net(par);
67 67
68 if (sk == NULL || sk->sk_socket == NULL) 68 if (sk == NULL || sk->sk_socket == NULL)
69 return (info->match ^ info->invert) == 0; 69 return (info->match ^ info->invert) == 0;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 5b645cb598fc..57efb703ff18 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
30 30
31 if (skb->pkt_type != PACKET_LOOPBACK) 31 if (skb->pkt_type != PACKET_LOOPBACK)
32 type = skb->pkt_type; 32 type = skb->pkt_type;
33 else if (par->family == NFPROTO_IPV4 && 33 else if (xt_family(par) == NFPROTO_IPV4 &&
34 ipv4_is_multicast(ip_hdr(skb)->daddr)) 34 ipv4_is_multicast(ip_hdr(skb)->daddr))
35 type = PACKET_MULTICAST; 35 type = PACKET_MULTICAST;
36 else if (par->family == NFPROTO_IPV6 && 36 else if (xt_family(par) == NFPROTO_IPV6 &&
37 ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) 37 ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
38 type = PACKET_MULTICAST; 38 type = PACKET_MULTICAST;
39 else 39 else
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index f23e97bb42d7..2b4ab189bba7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
116 int ret; 116 int ret;
117 117
118 if (info->flags & XT_POLICY_MATCH_IN) 118 if (info->flags & XT_POLICY_MATCH_IN)
119 ret = match_policy_in(skb, info, par->family); 119 ret = match_policy_in(skb, info, xt_family(par));
120 else 120 else
121 ret = match_policy_out(skb, info, par->family); 121 ret = match_policy_out(skb, info, xt_family(par));
122 122
123 if (ret < 0) 123 if (ret < 0)
124 ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; 124 ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 7720b036d76a..1db02f6fca54 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -18,35 +18,33 @@ static bool
18xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) 18xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
19{ 19{
20 const struct xt_rateest_match_info *info = par->matchinfo; 20 const struct xt_rateest_match_info *info = par->matchinfo;
21 struct gnet_stats_rate_est64 *r; 21 struct gnet_stats_rate_est64 sample = {0};
22 u_int32_t bps1, bps2, pps1, pps2; 22 u_int32_t bps1, bps2, pps1, pps2;
23 bool ret = true; 23 bool ret = true;
24 24
25 spin_lock_bh(&info->est1->lock); 25 gen_estimator_read(&info->est1->rate_est, &sample);
26 r = &info->est1->rstats; 26
27 if (info->flags & XT_RATEEST_MATCH_DELTA) { 27 if (info->flags & XT_RATEEST_MATCH_DELTA) {
28 bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0; 28 bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0;
29 pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0; 29 pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0;
30 } else { 30 } else {
31 bps1 = r->bps; 31 bps1 = sample.bps;
32 pps1 = r->pps; 32 pps1 = sample.pps;
33 } 33 }
34 spin_unlock_bh(&info->est1->lock);
35 34
36 if (info->flags & XT_RATEEST_MATCH_ABS) { 35 if (info->flags & XT_RATEEST_MATCH_ABS) {
37 bps2 = info->bps2; 36 bps2 = info->bps2;
38 pps2 = info->pps2; 37 pps2 = info->pps2;
39 } else { 38 } else {
40 spin_lock_bh(&info->est2->lock); 39 gen_estimator_read(&info->est2->rate_est, &sample);
41 r = &info->est2->rstats; 40
42 if (info->flags & XT_RATEEST_MATCH_DELTA) { 41 if (info->flags & XT_RATEEST_MATCH_DELTA) {
43 bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0; 42 bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0;
44 pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0; 43 pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0;
45 } else { 44 } else {
46 bps2 = r->bps; 45 bps2 = sample.bps;
47 pps2 = r->pps; 46 pps2 = sample.pps;
48 } 47 }
49 spin_unlock_bh(&info->est2->lock);
50 } 48 }
51 49
52 switch (info->mode) { 50 switch (info->mode) {
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index e3b7a09b103e..1d89a4eaf841 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -95,7 +95,7 @@ struct recent_net {
95#endif 95#endif
96}; 96};
97 97
98static int recent_net_id __read_mostly; 98static unsigned int recent_net_id __read_mostly;
99 99
100static inline struct recent_net *recent_pernet(struct net *net) 100static inline struct recent_net *recent_pernet(struct net *net)
101{ 101{
@@ -236,7 +236,7 @@ static void recent_table_flush(struct recent_table *t)
236static bool 236static bool
237recent_mt(const struct sk_buff *skb, struct xt_action_param *par) 237recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
238{ 238{
239 struct net *net = par->net; 239 struct net *net = xt_net(par);
240 struct recent_net *recent_net = recent_pernet(net); 240 struct recent_net *recent_net = recent_pernet(net);
241 const struct xt_recent_mtinfo_v1 *info = par->matchinfo; 241 const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
242 struct recent_table *t; 242 struct recent_table *t;
@@ -245,7 +245,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
245 u_int8_t ttl; 245 u_int8_t ttl;
246 bool ret = info->invert; 246 bool ret = info->invert;
247 247
248 if (par->family == NFPROTO_IPV4) { 248 if (xt_family(par) == NFPROTO_IPV4) {
249 const struct iphdr *iph = ip_hdr(skb); 249 const struct iphdr *iph = ip_hdr(skb);
250 250
251 if (info->side == XT_RECENT_DEST) 251 if (info->side == XT_RECENT_DEST)
@@ -266,7 +266,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
266 } 266 }
267 267
268 /* use TTL as seen before forwarding */ 268 /* use TTL as seen before forwarding */
269 if (par->out != NULL && skb->sk == NULL) 269 if (xt_out(par) != NULL && skb->sk == NULL)
270 ttl++; 270 ttl++;
271 271
272 spin_lock_bh(&recent_lock); 272 spin_lock_bh(&recent_lock);
@@ -274,12 +274,12 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
274 274
275 nf_inet_addr_mask(&addr, &addr_mask, &t->mask); 275 nf_inet_addr_mask(&addr, &addr_mask, &t->mask);
276 276
277 e = recent_entry_lookup(t, &addr_mask, par->family, 277 e = recent_entry_lookup(t, &addr_mask, xt_family(par),
278 (info->check_set & XT_RECENT_TTL) ? ttl : 0); 278 (info->check_set & XT_RECENT_TTL) ? ttl : 0);
279 if (e == NULL) { 279 if (e == NULL) {
280 if (!(info->check_set & XT_RECENT_SET)) 280 if (!(info->check_set & XT_RECENT_SET))
281 goto out; 281 goto out;
282 e = recent_entry_init(t, &addr_mask, par->family, ttl); 282 e = recent_entry_init(t, &addr_mask, xt_family(par), ttl);
283 if (e == NULL) 283 if (e == NULL)
284 par->hotdrop = true; 284 par->hotdrop = true;
285 ret = !ret; 285 ret = !ret;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 5669e5b453f4..64285702afd5 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -55,7 +55,7 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
55{ 55{
56 const struct xt_set_info_match_v0 *info = par->matchinfo; 56 const struct xt_set_info_match_v0 *info = par->matchinfo;
57 57
58 ADT_OPT(opt, par->family, info->match_set.u.compat.dim, 58 ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim,
59 info->match_set.u.compat.flags, 0, UINT_MAX); 59 info->match_set.u.compat.flags, 0, UINT_MAX);
60 60
61 return match_set(info->match_set.index, skb, par, &opt, 61 return match_set(info->match_set.index, skb, par, &opt,
@@ -118,7 +118,7 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
118{ 118{
119 const struct xt_set_info_match_v1 *info = par->matchinfo; 119 const struct xt_set_info_match_v1 *info = par->matchinfo;
120 120
121 ADT_OPT(opt, par->family, info->match_set.dim, 121 ADT_OPT(opt, xt_family(par), info->match_set.dim,
122 info->match_set.flags, 0, UINT_MAX); 122 info->match_set.flags, 0, UINT_MAX);
123 123
124 if (opt.flags & IPSET_RETURN_NOMATCH) 124 if (opt.flags & IPSET_RETURN_NOMATCH)
@@ -184,7 +184,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
184 const struct xt_set_info_match_v3 *info = par->matchinfo; 184 const struct xt_set_info_match_v3 *info = par->matchinfo;
185 int ret; 185 int ret;
186 186
187 ADT_OPT(opt, par->family, info->match_set.dim, 187 ADT_OPT(opt, xt_family(par), info->match_set.dim,
188 info->match_set.flags, info->flags, UINT_MAX); 188 info->match_set.flags, info->flags, UINT_MAX);
189 189
190 if (info->packets.op != IPSET_COUNTER_NONE || 190 if (info->packets.op != IPSET_COUNTER_NONE ||
@@ -231,7 +231,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
231 const struct xt_set_info_match_v4 *info = par->matchinfo; 231 const struct xt_set_info_match_v4 *info = par->matchinfo;
232 int ret; 232 int ret;
233 233
234 ADT_OPT(opt, par->family, info->match_set.dim, 234 ADT_OPT(opt, xt_family(par), info->match_set.dim,
235 info->match_set.flags, info->flags, UINT_MAX); 235 info->match_set.flags, info->flags, UINT_MAX);
236 236
237 if (info->packets.op != IPSET_COUNTER_NONE || 237 if (info->packets.op != IPSET_COUNTER_NONE ||
@@ -259,9 +259,9 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
259{ 259{
260 const struct xt_set_info_target_v0 *info = par->targinfo; 260 const struct xt_set_info_target_v0 *info = par->targinfo;
261 261
262 ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, 262 ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim,
263 info->add_set.u.compat.flags, 0, UINT_MAX); 263 info->add_set.u.compat.flags, 0, UINT_MAX);
264 ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, 264 ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim,
265 info->del_set.u.compat.flags, 0, UINT_MAX); 265 info->del_set.u.compat.flags, 0, UINT_MAX);
266 266
267 if (info->add_set.index != IPSET_INVALID_ID) 267 if (info->add_set.index != IPSET_INVALID_ID)
@@ -332,9 +332,9 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
332{ 332{
333 const struct xt_set_info_target_v1 *info = par->targinfo; 333 const struct xt_set_info_target_v1 *info = par->targinfo;
334 334
335 ADT_OPT(add_opt, par->family, info->add_set.dim, 335 ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
336 info->add_set.flags, 0, UINT_MAX); 336 info->add_set.flags, 0, UINT_MAX);
337 ADT_OPT(del_opt, par->family, info->del_set.dim, 337 ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
338 info->del_set.flags, 0, UINT_MAX); 338 info->del_set.flags, 0, UINT_MAX);
339 339
340 if (info->add_set.index != IPSET_INVALID_ID) 340 if (info->add_set.index != IPSET_INVALID_ID)
@@ -401,9 +401,9 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
401{ 401{
402 const struct xt_set_info_target_v2 *info = par->targinfo; 402 const struct xt_set_info_target_v2 *info = par->targinfo;
403 403
404 ADT_OPT(add_opt, par->family, info->add_set.dim, 404 ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
405 info->add_set.flags, info->flags, info->timeout); 405 info->add_set.flags, info->flags, info->timeout);
406 ADT_OPT(del_opt, par->family, info->del_set.dim, 406 ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
407 info->del_set.flags, 0, UINT_MAX); 407 info->del_set.flags, 0, UINT_MAX);
408 408
409 /* Normalize to fit into jiffies */ 409 /* Normalize to fit into jiffies */
@@ -423,17 +423,19 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
423 423
424/* Revision 3 target */ 424/* Revision 3 target */
425 425
426#define MOPT(opt, member) ((opt).ext.skbinfo.member)
427
426static unsigned int 428static unsigned int
427set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) 429set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
428{ 430{
429 const struct xt_set_info_target_v3 *info = par->targinfo; 431 const struct xt_set_info_target_v3 *info = par->targinfo;
430 int ret; 432 int ret;
431 433
432 ADT_OPT(add_opt, par->family, info->add_set.dim, 434 ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
433 info->add_set.flags, info->flags, info->timeout); 435 info->add_set.flags, info->flags, info->timeout);
434 ADT_OPT(del_opt, par->family, info->del_set.dim, 436 ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
435 info->del_set.flags, 0, UINT_MAX); 437 info->del_set.flags, 0, UINT_MAX);
436 ADT_OPT(map_opt, par->family, info->map_set.dim, 438 ADT_OPT(map_opt, xt_family(par), info->map_set.dim,
437 info->map_set.flags, 0, UINT_MAX); 439 info->map_set.flags, 0, UINT_MAX);
438 440
439 /* Normalize to fit into jiffies */ 441 /* Normalize to fit into jiffies */
@@ -453,14 +455,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
453 if (!ret) 455 if (!ret)
454 return XT_CONTINUE; 456 return XT_CONTINUE;
455 if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK) 457 if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
456 skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask)) 458 skb->mark = (skb->mark & ~MOPT(map_opt,skbmarkmask))
457 ^ (map_opt.ext.skbmark); 459 ^ MOPT(map_opt, skbmark);
458 if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO) 460 if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
459 skb->priority = map_opt.ext.skbprio; 461 skb->priority = MOPT(map_opt, skbprio);
460 if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) && 462 if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
461 skb->dev && 463 skb->dev &&
462 skb->dev->real_num_tx_queues > map_opt.ext.skbqueue) 464 skb->dev->real_num_tx_queues > MOPT(map_opt, skbqueue))
463 skb_set_queue_mapping(skb, map_opt.ext.skbqueue); 465 skb_set_queue_mapping(skb, MOPT(map_opt, skbqueue));
464 } 466 }
465 return XT_CONTINUE; 467 return XT_CONTINUE;
466} 468}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index b10ade272b50..770bbec878f1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,76 +22,14 @@
22#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 22#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
23 23
24#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 24#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
25#define XT_SOCKET_HAVE_IPV6 1
26#include <linux/netfilter_ipv6/ip6_tables.h> 25#include <linux/netfilter_ipv6/ip6_tables.h>
27#include <net/inet6_hashtables.h> 26#include <net/inet6_hashtables.h>
28#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 27#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29#endif 28#endif
30 29
30#include <net/netfilter/nf_socket.h>
31#include <linux/netfilter/xt_socket.h> 31#include <linux/netfilter/xt_socket.h>
32 32
33#if IS_ENABLED(CONFIG_NF_CONNTRACK)
34#define XT_SOCKET_HAVE_CONNTRACK 1
35#include <net/netfilter/nf_conntrack.h>
36#endif
37
38static int
39extract_icmp4_fields(const struct sk_buff *skb,
40 u8 *protocol,
41 __be32 *raddr,
42 __be32 *laddr,
43 __be16 *rport,
44 __be16 *lport)
45{
46 unsigned int outside_hdrlen = ip_hdrlen(skb);
47 struct iphdr *inside_iph, _inside_iph;
48 struct icmphdr *icmph, _icmph;
49 __be16 *ports, _ports[2];
50
51 icmph = skb_header_pointer(skb, outside_hdrlen,
52 sizeof(_icmph), &_icmph);
53 if (icmph == NULL)
54 return 1;
55
56 switch (icmph->type) {
57 case ICMP_DEST_UNREACH:
58 case ICMP_SOURCE_QUENCH:
59 case ICMP_REDIRECT:
60 case ICMP_TIME_EXCEEDED:
61 case ICMP_PARAMETERPROB:
62 break;
63 default:
64 return 1;
65 }
66
67 inside_iph = skb_header_pointer(skb, outside_hdrlen +
68 sizeof(struct icmphdr),
69 sizeof(_inside_iph), &_inside_iph);
70 if (inside_iph == NULL)
71 return 1;
72
73 if (inside_iph->protocol != IPPROTO_TCP &&
74 inside_iph->protocol != IPPROTO_UDP)
75 return 1;
76
77 ports = skb_header_pointer(skb, outside_hdrlen +
78 sizeof(struct icmphdr) +
79 (inside_iph->ihl << 2),
80 sizeof(_ports), &_ports);
81 if (ports == NULL)
82 return 1;
83
84 /* the inside IP packet is the one quoted from our side, thus
85 * its saddr is the local address */
86 *protocol = inside_iph->protocol;
87 *laddr = inside_iph->saddr;
88 *lport = ports[0];
89 *raddr = inside_iph->daddr;
90 *rport = ports[1];
91
92 return 0;
93}
94
95/* "socket" match based redirection (no specific rule) 33/* "socket" match based redirection (no specific rule)
96 * =================================================== 34 * ===================================================
97 * 35 *
@@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb,
111 * then local services could intercept traffic going through the 49 * then local services could intercept traffic going through the
112 * box. 50 * box.
113 */ 51 */
114static struct sock *
115xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
116 const u8 protocol,
117 const __be32 saddr, const __be32 daddr,
118 const __be16 sport, const __be16 dport,
119 const struct net_device *in)
120{
121 switch (protocol) {
122 case IPPROTO_TCP:
123 return inet_lookup(net, &tcp_hashinfo, skb, doff,
124 saddr, sport, daddr, dport,
125 in->ifindex);
126 case IPPROTO_UDP:
127 return udp4_lib_lookup(net, saddr, sport, daddr, dport,
128 in->ifindex);
129 }
130 return NULL;
131}
132
133static bool xt_socket_sk_is_transparent(struct sock *sk)
134{
135 switch (sk->sk_state) {
136 case TCP_TIME_WAIT:
137 return inet_twsk(sk)->tw_transparent;
138
139 case TCP_NEW_SYN_RECV:
140 return inet_rsk(inet_reqsk(sk))->no_srccheck;
141
142 default:
143 return inet_sk(sk)->transparent;
144 }
145}
146
147static struct sock *xt_socket_lookup_slow_v4(struct net *net,
148 const struct sk_buff *skb,
149 const struct net_device *indev)
150{
151 const struct iphdr *iph = ip_hdr(skb);
152 struct sk_buff *data_skb = NULL;
153 int doff = 0;
154 __be32 uninitialized_var(daddr), uninitialized_var(saddr);
155 __be16 uninitialized_var(dport), uninitialized_var(sport);
156 u8 uninitialized_var(protocol);
157#ifdef XT_SOCKET_HAVE_CONNTRACK
158 struct nf_conn const *ct;
159 enum ip_conntrack_info ctinfo;
160#endif
161
162 if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
163 struct udphdr _hdr, *hp;
164
165 hp = skb_header_pointer(skb, ip_hdrlen(skb),
166 sizeof(_hdr), &_hdr);
167 if (hp == NULL)
168 return NULL;
169
170 protocol = iph->protocol;
171 saddr = iph->saddr;
172 sport = hp->source;
173 daddr = iph->daddr;
174 dport = hp->dest;
175 data_skb = (struct sk_buff *)skb;
176 doff = iph->protocol == IPPROTO_TCP ?
177 ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
178 ip_hdrlen(skb) + sizeof(*hp);
179
180 } else if (iph->protocol == IPPROTO_ICMP) {
181 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
182 &sport, &dport))
183 return NULL;
184 } else {
185 return NULL;
186 }
187
188#ifdef XT_SOCKET_HAVE_CONNTRACK
189 /* Do the lookup with the original socket address in
190 * case this is a reply packet of an established
191 * SNAT-ted connection.
192 */
193 ct = nf_ct_get(skb, &ctinfo);
194 if (ct && !nf_ct_is_untracked(ct) &&
195 ((iph->protocol != IPPROTO_ICMP &&
196 ctinfo == IP_CT_ESTABLISHED_REPLY) ||
197 (iph->protocol == IPPROTO_ICMP &&
198 ctinfo == IP_CT_RELATED_REPLY)) &&
199 (ct->status & IPS_SRC_NAT_DONE)) {
200
201 daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
202 dport = (iph->protocol == IPPROTO_TCP) ?
203 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
204 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
205 }
206#endif
207
208 return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
209 daddr, sport, dport, indev);
210}
211
212static bool 52static bool
213socket_match(const struct sk_buff *skb, struct xt_action_param *par, 53socket_match(const struct sk_buff *skb, struct xt_action_param *par,
214 const struct xt_socket_mtinfo1 *info) 54 const struct xt_socket_mtinfo1 *info)
@@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
217 struct sock *sk = skb->sk; 57 struct sock *sk = skb->sk;
218 58
219 if (!sk) 59 if (!sk)
220 sk = xt_socket_lookup_slow_v4(par->net, skb, par->in); 60 sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
221 if (sk) { 61 if (sk) {
222 bool wildcard; 62 bool wildcard;
223 bool transparent = true; 63 bool transparent = true;
@@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
233 * if XT_SOCKET_TRANSPARENT is used 73 * if XT_SOCKET_TRANSPARENT is used
234 */ 74 */
235 if (info->flags & XT_SOCKET_TRANSPARENT) 75 if (info->flags & XT_SOCKET_TRANSPARENT)
236 transparent = xt_socket_sk_is_transparent(sk); 76 transparent = nf_sk_is_transparent(sk);
237 77
238 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && 78 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
239 transparent) 79 transparent)
@@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
265 return socket_match(skb, par, par->matchinfo); 105 return socket_match(skb, par, par->matchinfo);
266} 106}
267 107
268#ifdef XT_SOCKET_HAVE_IPV6 108#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
269
270static int
271extract_icmp6_fields(const struct sk_buff *skb,
272 unsigned int outside_hdrlen,
273 int *protocol,
274 const struct in6_addr **raddr,
275 const struct in6_addr **laddr,
276 __be16 *rport,
277 __be16 *lport,
278 struct ipv6hdr *ipv6_var)
279{
280 const struct ipv6hdr *inside_iph;
281 struct icmp6hdr *icmph, _icmph;
282 __be16 *ports, _ports[2];
283 u8 inside_nexthdr;
284 __be16 inside_fragoff;
285 int inside_hdrlen;
286
287 icmph = skb_header_pointer(skb, outside_hdrlen,
288 sizeof(_icmph), &_icmph);
289 if (icmph == NULL)
290 return 1;
291
292 if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
293 return 1;
294
295 inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
296 sizeof(*ipv6_var), ipv6_var);
297 if (inside_iph == NULL)
298 return 1;
299 inside_nexthdr = inside_iph->nexthdr;
300
301 inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
302 sizeof(*ipv6_var),
303 &inside_nexthdr, &inside_fragoff);
304 if (inside_hdrlen < 0)
305 return 1; /* hjm: Packet has no/incomplete transport layer headers. */
306
307 if (inside_nexthdr != IPPROTO_TCP &&
308 inside_nexthdr != IPPROTO_UDP)
309 return 1;
310
311 ports = skb_header_pointer(skb, inside_hdrlen,
312 sizeof(_ports), &_ports);
313 if (ports == NULL)
314 return 1;
315
316 /* the inside IP packet is the one quoted from our side, thus
317 * its saddr is the local address */
318 *protocol = inside_nexthdr;
319 *laddr = &inside_iph->saddr;
320 *lport = ports[0];
321 *raddr = &inside_iph->daddr;
322 *rport = ports[1];
323
324 return 0;
325}
326
327static struct sock *
328xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
329 const u8 protocol,
330 const struct in6_addr *saddr, const struct in6_addr *daddr,
331 const __be16 sport, const __be16 dport,
332 const struct net_device *in)
333{
334 switch (protocol) {
335 case IPPROTO_TCP:
336 return inet6_lookup(net, &tcp_hashinfo, skb, doff,
337 saddr, sport, daddr, dport,
338 in->ifindex);
339 case IPPROTO_UDP:
340 return udp6_lib_lookup(net, saddr, sport, daddr, dport,
341 in->ifindex);
342 }
343
344 return NULL;
345}
346
347static struct sock *xt_socket_lookup_slow_v6(struct net *net,
348 const struct sk_buff *skb,
349 const struct net_device *indev)
350{
351 __be16 uninitialized_var(dport), uninitialized_var(sport);
352 const struct in6_addr *daddr = NULL, *saddr = NULL;
353 struct ipv6hdr *iph = ipv6_hdr(skb);
354 struct sk_buff *data_skb = NULL;
355 int doff = 0;
356 int thoff = 0, tproto;
357
358 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
359 if (tproto < 0) {
360 pr_debug("unable to find transport header in IPv6 packet, dropping\n");
361 return NULL;
362 }
363
364 if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
365 struct udphdr _hdr, *hp;
366
367 hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
368 if (hp == NULL)
369 return NULL;
370
371 saddr = &iph->saddr;
372 sport = hp->source;
373 daddr = &iph->daddr;
374 dport = hp->dest;
375 data_skb = (struct sk_buff *)skb;
376 doff = tproto == IPPROTO_TCP ?
377 thoff + __tcp_hdrlen((struct tcphdr *)hp) :
378 thoff + sizeof(*hp);
379
380 } else if (tproto == IPPROTO_ICMPV6) {
381 struct ipv6hdr ipv6_var;
382
383 if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
384 &sport, &dport, &ipv6_var))
385 return NULL;
386 } else {
387 return NULL;
388 }
389
390 return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
391 sport, dport, indev);
392}
393
394static bool 109static bool
395socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) 110socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
396{ 111{
@@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
399 struct sock *sk = skb->sk; 114 struct sock *sk = skb->sk;
400 115
401 if (!sk) 116 if (!sk)
402 sk = xt_socket_lookup_slow_v6(par->net, skb, par->in); 117 sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
403 if (sk) { 118 if (sk) {
404 bool wildcard; 119 bool wildcard;
405 bool transparent = true; 120 bool transparent = true;
@@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
415 * if XT_SOCKET_TRANSPARENT is used 130 * if XT_SOCKET_TRANSPARENT is used
416 */ 131 */
417 if (info->flags & XT_SOCKET_TRANSPARENT) 132 if (info->flags & XT_SOCKET_TRANSPARENT)
418 transparent = xt_socket_sk_is_transparent(sk); 133 transparent = nf_sk_is_transparent(sk);
419 134
420 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && 135 if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
421 transparent) 136 transparent)
@@ -432,9 +147,28 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
432} 147}
433#endif 148#endif
434 149
150static int socket_mt_enable_defrag(struct net *net, int family)
151{
152 switch (family) {
153 case NFPROTO_IPV4:
154 return nf_defrag_ipv4_enable(net);
155#ifdef XT_SOCKET_HAVE_IPV6
156 case NFPROTO_IPV6:
157 return nf_defrag_ipv6_enable(net);
158#endif
159 }
160 WARN_ONCE(1, "Unknown family %d\n", family);
161 return 0;
162}
163
435static int socket_mt_v1_check(const struct xt_mtchk_param *par) 164static int socket_mt_v1_check(const struct xt_mtchk_param *par)
436{ 165{
437 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; 166 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
167 int err;
168
169 err = socket_mt_enable_defrag(par->net, par->family);
170 if (err)
171 return err;
438 172
439 if (info->flags & ~XT_SOCKET_FLAGS_V1) { 173 if (info->flags & ~XT_SOCKET_FLAGS_V1) {
440 pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); 174 pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1);
@@ -446,6 +180,11 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par)
446static int socket_mt_v2_check(const struct xt_mtchk_param *par) 180static int socket_mt_v2_check(const struct xt_mtchk_param *par)
447{ 181{
448 const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; 182 const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo;
183 int err;
184
185 err = socket_mt_enable_defrag(par->net, par->family);
186 if (err)
187 return err;
449 188
450 if (info->flags & ~XT_SOCKET_FLAGS_V2) { 189 if (info->flags & ~XT_SOCKET_FLAGS_V2) {
451 pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); 190 pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2);
@@ -458,7 +197,11 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par)
458{ 197{
459 const struct xt_socket_mtinfo3 *info = 198 const struct xt_socket_mtinfo3 *info =
460 (struct xt_socket_mtinfo3 *)par->matchinfo; 199 (struct xt_socket_mtinfo3 *)par->matchinfo;
200 int err;
461 201
202 err = socket_mt_enable_defrag(par->net, par->family);
203 if (err)
204 return err;
462 if (info->flags & ~XT_SOCKET_FLAGS_V3) { 205 if (info->flags & ~XT_SOCKET_FLAGS_V3) {
463 pr_info("unknown flags 0x%x\n", 206 pr_info("unknown flags 0x%x\n",
464 info->flags & ~XT_SOCKET_FLAGS_V3); 207 info->flags & ~XT_SOCKET_FLAGS_V3);
@@ -488,7 +231,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
488 (1 << NF_INET_LOCAL_IN), 231 (1 << NF_INET_LOCAL_IN),
489 .me = THIS_MODULE, 232 .me = THIS_MODULE,
490 }, 233 },
491#ifdef XT_SOCKET_HAVE_IPV6 234#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
492 { 235 {
493 .name = "socket", 236 .name = "socket",
494 .revision = 1, 237 .revision = 1,
@@ -512,7 +255,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
512 (1 << NF_INET_LOCAL_IN), 255 (1 << NF_INET_LOCAL_IN),
513 .me = THIS_MODULE, 256 .me = THIS_MODULE,
514 }, 257 },
515#ifdef XT_SOCKET_HAVE_IPV6 258#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
516 { 259 {
517 .name = "socket", 260 .name = "socket",
518 .revision = 2, 261 .revision = 2,
@@ -536,7 +279,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
536 (1 << NF_INET_LOCAL_IN), 279 (1 << NF_INET_LOCAL_IN),
537 .me = THIS_MODULE, 280 .me = THIS_MODULE,
538 }, 281 },
539#ifdef XT_SOCKET_HAVE_IPV6 282#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
540 { 283 {
541 .name = "socket", 284 .name = "socket",
542 .revision = 3, 285 .revision = 3,
@@ -553,11 +296,6 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
553 296
554static int __init socket_mt_init(void) 297static int __init socket_mt_init(void)
555{ 298{
556 nf_defrag_ipv4_enable();
557#ifdef XT_SOCKET_HAVE_IPV6
558 nf_defrag_ipv6_enable();
559#endif
560
561 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); 299 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
562} 300}
563 301
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index a507922d80cd..5746a33789a5 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -43,7 +43,7 @@ static int state_mt_check(const struct xt_mtchk_param *par)
43{ 43{
44 int ret; 44 int ret;
45 45
46 ret = nf_ct_l3proto_try_module_get(par->family); 46 ret = nf_ct_netns_get(par->net, par->family);
47 if (ret < 0) 47 if (ret < 0)
48 pr_info("cannot load conntrack support for proto=%u\n", 48 pr_info("cannot load conntrack support for proto=%u\n",
49 par->family); 49 par->family);
@@ -52,7 +52,7 @@ static int state_mt_check(const struct xt_mtchk_param *par)
52 52
53static void state_mt_destroy(const struct xt_mtdtor_param *par) 53static void state_mt_destroy(const struct xt_mtdtor_param *par)
54{ 54{
55 nf_ct_l3proto_module_put(par->family); 55 nf_ct_netns_put(par->net, par->family);
56} 56}
57 57
58static struct xt_match state_mt_reg __read_mostly = { 58static struct xt_match state_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 0ae55a36f492..1b01eec1fbda 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -168,7 +168,7 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
168 * may happen that the same packet matches both rules if 168 * may happen that the same packet matches both rules if
169 * it arrived at the right moment before 13:00. 169 * it arrived at the right moment before 13:00.
170 */ 170 */
171 if (skb->tstamp.tv64 == 0) 171 if (skb->tstamp == 0)
172 __net_timestamp((struct sk_buff *)skb); 172 __net_timestamp((struct sk_buff *)skb);
173 173
174 stamp = ktime_to_ns(skb->tstamp); 174 stamp = ktime_to_ns(skb->tstamp);
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index 2ec93c5e77bb..d177dd066504 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg {
60}; 60};
61 61
62/* NetLabel Generic NETLINK CALIPSO family */ 62/* NetLabel Generic NETLINK CALIPSO family */
63static struct genl_family netlbl_calipso_gnl_family = { 63static struct genl_family netlbl_calipso_gnl_family;
64 .id = GENL_ID_GENERATE,
65 .hdrsize = 0,
66 .name = NETLBL_NLTYPE_CALIPSO_NAME,
67 .version = NETLBL_PROTO_VERSION,
68 .maxattr = NLBL_CALIPSO_A_MAX,
69};
70 64
71/* NetLabel Netlink attribute policy */ 65/* NetLabel Netlink attribute policy */
72static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = { 66static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = {
@@ -355,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = {
355 }, 349 },
356}; 350};
357 351
352static struct genl_family netlbl_calipso_gnl_family __ro_after_init = {
353 .hdrsize = 0,
354 .name = NETLBL_NLTYPE_CALIPSO_NAME,
355 .version = NETLBL_PROTO_VERSION,
356 .maxattr = NLBL_CALIPSO_A_MAX,
357 .module = THIS_MODULE,
358 .ops = netlbl_calipso_ops,
359 .n_ops = ARRAY_SIZE(netlbl_calipso_ops),
360};
361
358/* NetLabel Generic NETLINK Protocol Functions 362/* NetLabel Generic NETLINK Protocol Functions
359 */ 363 */
360 364
@@ -368,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = {
368 */ 372 */
369int __init netlbl_calipso_genl_init(void) 373int __init netlbl_calipso_genl_init(void)
370{ 374{
371 return genl_register_family_with_ops(&netlbl_calipso_gnl_family, 375 return genl_register_family(&netlbl_calipso_gnl_family);
372 netlbl_calipso_ops);
373} 376}
374 377
375static const struct netlbl_calipso_ops *calipso_ops; 378static const struct netlbl_calipso_ops *calipso_ops;
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 7fd1104ba900..4149d3e63589 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -59,14 +59,7 @@ struct netlbl_domhsh_walk_arg {
59}; 59};
60 60
61/* NetLabel Generic NETLINK CIPSOv4 family */ 61/* NetLabel Generic NETLINK CIPSOv4 family */
62static struct genl_family netlbl_cipsov4_gnl_family = { 62static struct genl_family netlbl_cipsov4_gnl_family;
63 .id = GENL_ID_GENERATE,
64 .hdrsize = 0,
65 .name = NETLBL_NLTYPE_CIPSOV4_NAME,
66 .version = NETLBL_PROTO_VERSION,
67 .maxattr = NLBL_CIPSOV4_A_MAX,
68};
69
70/* NetLabel Netlink attribute policy */ 63/* NetLabel Netlink attribute policy */
71static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { 64static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
72 [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, 65 [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
@@ -767,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
767 }, 760 },
768}; 761};
769 762
763static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = {
764 .hdrsize = 0,
765 .name = NETLBL_NLTYPE_CIPSOV4_NAME,
766 .version = NETLBL_PROTO_VERSION,
767 .maxattr = NLBL_CIPSOV4_A_MAX,
768 .module = THIS_MODULE,
769 .ops = netlbl_cipsov4_ops,
770 .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops),
771};
772
770/* 773/*
771 * NetLabel Generic NETLINK Protocol Functions 774 * NetLabel Generic NETLINK Protocol Functions
772 */ 775 */
@@ -781,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
781 */ 784 */
782int __init netlbl_cipsov4_genl_init(void) 785int __init netlbl_cipsov4_genl_init(void)
783{ 786{
784 return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family, 787 return genl_register_family(&netlbl_cipsov4_gnl_family);
785 netlbl_cipsov4_ops);
786} 788}
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index f85d0e07af2d..21e0095b1d14 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg {
60}; 60};
61 61
62/* NetLabel Generic NETLINK CIPSOv4 family */ 62/* NetLabel Generic NETLINK CIPSOv4 family */
63static struct genl_family netlbl_mgmt_gnl_family = { 63static struct genl_family netlbl_mgmt_gnl_family;
64 .id = GENL_ID_GENERATE,
65 .hdrsize = 0,
66 .name = NETLBL_NLTYPE_MGMT_NAME,
67 .version = NETLBL_PROTO_VERSION,
68 .maxattr = NLBL_MGMT_A_MAX,
69};
70 64
71/* NetLabel Netlink attribute policy */ 65/* NetLabel Netlink attribute policy */
72static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { 66static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
@@ -834,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
834 }, 828 },
835}; 829};
836 830
831static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = {
832 .hdrsize = 0,
833 .name = NETLBL_NLTYPE_MGMT_NAME,
834 .version = NETLBL_PROTO_VERSION,
835 .maxattr = NLBL_MGMT_A_MAX,
836 .module = THIS_MODULE,
837 .ops = netlbl_mgmt_genl_ops,
838 .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops),
839};
840
837/* 841/*
838 * NetLabel Generic NETLINK Protocol Functions 842 * NetLabel Generic NETLINK Protocol Functions
839 */ 843 */
@@ -848,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
848 */ 852 */
849int __init netlbl_mgmt_genl_init(void) 853int __init netlbl_mgmt_genl_init(void)
850{ 854{
851 return genl_register_family_with_ops(&netlbl_mgmt_gnl_family, 855 return genl_register_family(&netlbl_mgmt_gnl_family);
852 netlbl_mgmt_genl_ops);
853} 856}
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 4528cff9138b..22dc1b9d6362 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -123,13 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def;
123static u8 netlabel_unlabel_acceptflg; 123static u8 netlabel_unlabel_acceptflg;
124 124
125/* NetLabel Generic NETLINK unlabeled family */ 125/* NetLabel Generic NETLINK unlabeled family */
126static struct genl_family netlbl_unlabel_gnl_family = { 126static struct genl_family netlbl_unlabel_gnl_family;
127 .id = GENL_ID_GENERATE,
128 .hdrsize = 0,
129 .name = NETLBL_NLTYPE_UNLABELED_NAME,
130 .version = NETLBL_PROTO_VERSION,
131 .maxattr = NLBL_UNLABEL_A_MAX,
132};
133 127
134/* NetLabel Netlink attribute policy */ 128/* NetLabel Netlink attribute policy */
135static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { 129static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
@@ -1378,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
1378 }, 1372 },
1379}; 1373};
1380 1374
1375static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = {
1376 .hdrsize = 0,
1377 .name = NETLBL_NLTYPE_UNLABELED_NAME,
1378 .version = NETLBL_PROTO_VERSION,
1379 .maxattr = NLBL_UNLABEL_A_MAX,
1380 .module = THIS_MODULE,
1381 .ops = netlbl_unlabel_genl_ops,
1382 .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops),
1383};
1384
1381/* 1385/*
1382 * NetLabel Generic NETLINK Protocol Functions 1386 * NetLabel Generic NETLINK Protocol Functions
1383 */ 1387 */
@@ -1392,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
1392 */ 1396 */
1393int __init netlbl_unlabel_genl_init(void) 1397int __init netlbl_unlabel_genl_init(void)
1394{ 1398{
1395 return genl_register_family_with_ops(&netlbl_unlabel_gnl_family, 1399 return genl_register_family(&netlbl_unlabel_gnl_family);
1396 netlbl_unlabel_genl_ops);
1397} 1400}
1398 1401
1399/* 1402/*
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 246f29d365c0..161b628ab2b0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -40,7 +40,7 @@
40#include <linux/net.h> 40#include <linux/net.h>
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <asm/uaccess.h> 43#include <linux/uaccess.h>
44#include <linux/skbuff.h> 44#include <linux/skbuff.h>
45#include <linux/netdevice.h> 45#include <linux/netdevice.h>
46#include <linux/rtnetlink.h> 46#include <linux/rtnetlink.h>
@@ -113,7 +113,7 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
113 113
114#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 114#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
115 115
116static ATOMIC_NOTIFIER_HEAD(netlink_chain); 116static BLOCKING_NOTIFIER_HEAD(netlink_chain);
117 117
118static DEFINE_SPINLOCK(netlink_tap_lock); 118static DEFINE_SPINLOCK(netlink_tap_lock);
119static struct list_head netlink_tap_all __read_mostly; 119static struct list_head netlink_tap_all __read_mostly;
@@ -711,7 +711,7 @@ static int netlink_release(struct socket *sock)
711 .protocol = sk->sk_protocol, 711 .protocol = sk->sk_protocol,
712 .portid = nlk->portid, 712 .portid = nlk->portid,
713 }; 713 };
714 atomic_notifier_call_chain(&netlink_chain, 714 blocking_notifier_call_chain(&netlink_chain,
715 NETLINK_URELEASE, &n); 715 NETLINK_URELEASE, &n);
716 } 716 }
717 717
@@ -2504,13 +2504,13 @@ static const struct file_operations netlink_seq_fops = {
2504 2504
2505int netlink_register_notifier(struct notifier_block *nb) 2505int netlink_register_notifier(struct notifier_block *nb)
2506{ 2506{
2507 return atomic_notifier_chain_register(&netlink_chain, nb); 2507 return blocking_notifier_chain_register(&netlink_chain, nb);
2508} 2508}
2509EXPORT_SYMBOL(netlink_register_notifier); 2509EXPORT_SYMBOL(netlink_register_notifier);
2510 2510
2511int netlink_unregister_notifier(struct notifier_block *nb) 2511int netlink_unregister_notifier(struct notifier_block *nb)
2512{ 2512{
2513 return atomic_notifier_chain_unregister(&netlink_chain, nb); 2513 return blocking_notifier_chain_unregister(&netlink_chain, nb);
2514} 2514}
2515EXPORT_SYMBOL(netlink_unregister_notifier); 2515EXPORT_SYMBOL(netlink_unregister_notifier);
2516 2516
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 49c28e8ef01b..fb6e10fdb217 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -17,6 +17,7 @@
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/bitmap.h> 18#include <linux/bitmap.h>
19#include <linux/rwsem.h> 19#include <linux/rwsem.h>
20#include <linux/idr.h>
20#include <net/sock.h> 21#include <net/sock.h>
21#include <net/genetlink.h> 22#include <net/genetlink.h>
22 23
@@ -58,10 +59,8 @@ static void genl_unlock_all(void)
58 up_write(&cb_lock); 59 up_write(&cb_lock);
59} 60}
60 61
61#define GENL_FAM_TAB_SIZE 16 62static DEFINE_IDR(genl_fam_idr);
62#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
63 63
64static struct list_head family_ht[GENL_FAM_TAB_SIZE];
65/* 64/*
66 * Bitmap of multicast groups that are currently in use. 65 * Bitmap of multicast groups that are currently in use.
67 * 66 *
@@ -86,45 +85,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) |
86static unsigned long *mc_groups = &mc_group_start; 85static unsigned long *mc_groups = &mc_group_start;
87static unsigned long mc_groups_longs = 1; 86static unsigned long mc_groups_longs = 1;
88 87
89static int genl_ctrl_event(int event, struct genl_family *family, 88static int genl_ctrl_event(int event, const struct genl_family *family,
90 const struct genl_multicast_group *grp, 89 const struct genl_multicast_group *grp,
91 int grp_id); 90 int grp_id);
92 91
93static inline unsigned int genl_family_hash(unsigned int id) 92static const struct genl_family *genl_family_find_byid(unsigned int id)
94{ 93{
95 return id & GENL_FAM_TAB_MASK; 94 return idr_find(&genl_fam_idr, id);
96} 95}
97 96
98static inline struct list_head *genl_family_chain(unsigned int id) 97static const struct genl_family *genl_family_find_byname(char *name)
99{ 98{
100 return &family_ht[genl_family_hash(id)]; 99 const struct genl_family *family;
101} 100 unsigned int id;
102
103static struct genl_family *genl_family_find_byid(unsigned int id)
104{
105 struct genl_family *f;
106
107 list_for_each_entry(f, genl_family_chain(id), family_list)
108 if (f->id == id)
109 return f;
110
111 return NULL;
112}
113
114static struct genl_family *genl_family_find_byname(char *name)
115{
116 struct genl_family *f;
117 int i;
118 101
119 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) 102 idr_for_each_entry(&genl_fam_idr, family, id)
120 list_for_each_entry(f, genl_family_chain(i), family_list) 103 if (strcmp(family->name, name) == 0)
121 if (strcmp(f->name, name) == 0) 104 return family;
122 return f;
123 105
124 return NULL; 106 return NULL;
125} 107}
126 108
127static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) 109static const struct genl_ops *genl_get_cmd(u8 cmd,
110 const struct genl_family *family)
128{ 111{
129 int i; 112 int i;
130 113
@@ -135,26 +118,6 @@ static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
135 return NULL; 118 return NULL;
136} 119}
137 120
138/* Of course we are going to have problems once we hit
139 * 2^16 alive types, but that can only happen by year 2K
140*/
141static u16 genl_generate_id(void)
142{
143 static u16 id_gen_idx = GENL_MIN_ID;
144 int i;
145
146 for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) {
147 if (id_gen_idx != GENL_ID_VFS_DQUOT &&
148 id_gen_idx != GENL_ID_PMCRAID &&
149 !genl_family_find_byid(id_gen_idx))
150 return id_gen_idx;
151 if (++id_gen_idx > GENL_MAX_ID)
152 id_gen_idx = GENL_MIN_ID;
153 }
154
155 return 0;
156}
157
158static int genl_allocate_reserve_groups(int n_groups, int *first_id) 121static int genl_allocate_reserve_groups(int n_groups, int *first_id)
159{ 122{
160 unsigned long *new_groups; 123 unsigned long *new_groups;
@@ -295,7 +258,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family)
295 return err; 258 return err;
296} 259}
297 260
298static void genl_unregister_mc_groups(struct genl_family *family) 261static void genl_unregister_mc_groups(const struct genl_family *family)
299{ 262{
300 struct net *net; 263 struct net *net;
301 int i; 264 int i;
@@ -344,28 +307,21 @@ static int genl_validate_ops(const struct genl_family *family)
344} 307}
345 308
346/** 309/**
347 * __genl_register_family - register a generic netlink family 310 * genl_register_family - register a generic netlink family
348 * @family: generic netlink family 311 * @family: generic netlink family
349 * 312 *
350 * Registers the specified family after validating it first. Only one 313 * Registers the specified family after validating it first. Only one
351 * family may be registered with the same family name or identifier. 314 * family may be registered with the same family name or identifier.
352 * The family id may equal GENL_ID_GENERATE causing an unique id to
353 * be automatically generated and assigned.
354 * 315 *
355 * The family's ops array must already be assigned, you can use the 316 * The family's ops, multicast groups and module pointer must already
356 * genl_register_family_with_ops() helper function. 317 * be assigned.
357 * 318 *
358 * Return 0 on success or a negative error code. 319 * Return 0 on success or a negative error code.
359 */ 320 */
360int __genl_register_family(struct genl_family *family) 321int genl_register_family(struct genl_family *family)
361{ 322{
362 int err = -EINVAL, i; 323 int err, i;
363 324 int start = GENL_START_ALLOC, end = GENL_MAX_ID;
364 if (family->id && family->id < GENL_MIN_ID)
365 goto errout;
366
367 if (family->id > GENL_MAX_ID)
368 goto errout;
369 325
370 err = genl_validate_ops(family); 326 err = genl_validate_ops(family);
371 if (err) 327 if (err)
@@ -378,18 +334,20 @@ int __genl_register_family(struct genl_family *family)
378 goto errout_locked; 334 goto errout_locked;
379 } 335 }
380 336
381 if (family->id == GENL_ID_GENERATE) { 337 /*
382 u16 newid = genl_generate_id(); 338 * Sadly, a few cases need to be special-cased
383 339 * due to them having previously abused the API
384 if (!newid) { 340 * and having used their family ID also as their
385 err = -ENOMEM; 341 * multicast group ID, so we use reserved IDs
386 goto errout_locked; 342 * for both to be sure we can do that mapping.
387 } 343 */
388 344 if (family == &genl_ctrl) {
389 family->id = newid; 345 /* and this needs to be special for initial family lookups */
390 } else if (genl_family_find_byid(family->id)) { 346 start = end = GENL_ID_CTRL;
391 err = -EEXIST; 347 } else if (strcmp(family->name, "pmcraid") == 0) {
392 goto errout_locked; 348 start = end = GENL_ID_PMCRAID;
349 } else if (strcmp(family->name, "VFS_DQUOT") == 0) {
350 start = end = GENL_ID_VFS_DQUOT;
393 } 351 }
394 352
395 if (family->maxattr && !family->parallel_ops) { 353 if (family->maxattr && !family->parallel_ops) {
@@ -402,11 +360,17 @@ int __genl_register_family(struct genl_family *family)
402 } else 360 } else
403 family->attrbuf = NULL; 361 family->attrbuf = NULL;
404 362
363 family->id = idr_alloc(&genl_fam_idr, family,
364 start, end + 1, GFP_KERNEL);
365 if (family->id < 0) {
366 err = family->id;
367 goto errout_locked;
368 }
369
405 err = genl_validate_assign_mc_groups(family); 370 err = genl_validate_assign_mc_groups(family);
406 if (err) 371 if (err)
407 goto errout_free; 372 goto errout_remove;
408 373
409 list_add_tail(&family->family_list, genl_family_chain(family->id));
410 genl_unlock_all(); 374 genl_unlock_all();
411 375
412 /* send all events */ 376 /* send all events */
@@ -417,14 +381,14 @@ int __genl_register_family(struct genl_family *family)
417 381
418 return 0; 382 return 0;
419 383
420errout_free: 384errout_remove:
385 idr_remove(&genl_fam_idr, family->id);
421 kfree(family->attrbuf); 386 kfree(family->attrbuf);
422errout_locked: 387errout_locked:
423 genl_unlock_all(); 388 genl_unlock_all();
424errout:
425 return err; 389 return err;
426} 390}
427EXPORT_SYMBOL(__genl_register_family); 391EXPORT_SYMBOL(genl_register_family);
428 392
429/** 393/**
430 * genl_unregister_family - unregister generic netlink family 394 * genl_unregister_family - unregister generic netlink family
@@ -434,33 +398,29 @@ EXPORT_SYMBOL(__genl_register_family);
434 * 398 *
435 * Returns 0 on success or a negative error code. 399 * Returns 0 on success or a negative error code.
436 */ 400 */
437int genl_unregister_family(struct genl_family *family) 401int genl_unregister_family(const struct genl_family *family)
438{ 402{
439 struct genl_family *rc;
440
441 genl_lock_all(); 403 genl_lock_all();
442 404
443 list_for_each_entry(rc, genl_family_chain(family->id), family_list) { 405 if (!genl_family_find_byid(family->id)) {
444 if (family->id != rc->id || strcmp(rc->name, family->name)) 406 genl_unlock_all();
445 continue; 407 return -ENOENT;
408 }
446 409
447 genl_unregister_mc_groups(family); 410 genl_unregister_mc_groups(family);
448 411
449 list_del(&rc->family_list); 412 idr_remove(&genl_fam_idr, family->id);
450 family->n_ops = 0;
451 up_write(&cb_lock);
452 wait_event(genl_sk_destructing_waitq,
453 atomic_read(&genl_sk_destructing_cnt) == 0);
454 genl_unlock();
455 413
456 kfree(family->attrbuf); 414 up_write(&cb_lock);
457 genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); 415 wait_event(genl_sk_destructing_waitq,
458 return 0; 416 atomic_read(&genl_sk_destructing_cnt) == 0);
459 } 417 genl_unlock();
460 418
461 genl_unlock_all(); 419 kfree(family->attrbuf);
420
421 genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
462 422
463 return -ENOENT; 423 return 0;
464} 424}
465EXPORT_SYMBOL(genl_unregister_family); 425EXPORT_SYMBOL(genl_unregister_family);
466 426
@@ -476,7 +436,7 @@ EXPORT_SYMBOL(genl_unregister_family);
476 * Returns pointer to user specific header 436 * Returns pointer to user specific header
477 */ 437 */
478void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, 438void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
479 struct genl_family *family, int flags, u8 cmd) 439 const struct genl_family *family, int flags, u8 cmd)
480{ 440{
481 struct nlmsghdr *nlh; 441 struct nlmsghdr *nlh;
482 struct genlmsghdr *hdr; 442 struct genlmsghdr *hdr;
@@ -535,7 +495,7 @@ static int genl_lock_done(struct netlink_callback *cb)
535 return rc; 495 return rc;
536} 496}
537 497
538static int genl_family_rcv_msg(struct genl_family *family, 498static int genl_family_rcv_msg(const struct genl_family *family,
539 struct sk_buff *skb, 499 struct sk_buff *skb,
540 struct nlmsghdr *nlh) 500 struct nlmsghdr *nlh)
541{ 501{
@@ -647,7 +607,7 @@ out:
647 607
648static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 608static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
649{ 609{
650 struct genl_family *family; 610 const struct genl_family *family;
651 int err; 611 int err;
652 612
653 family = genl_family_find_byid(nlh->nlmsg_type); 613 family = genl_family_find_byid(nlh->nlmsg_type);
@@ -676,15 +636,9 @@ static void genl_rcv(struct sk_buff *skb)
676 * Controller 636 * Controller
677 **************************************************************************/ 637 **************************************************************************/
678 638
679static struct genl_family genl_ctrl = { 639static struct genl_family genl_ctrl;
680 .id = GENL_ID_CTRL,
681 .name = "nlctrl",
682 .version = 0x2,
683 .maxattr = CTRL_ATTR_MAX,
684 .netnsok = true,
685};
686 640
687static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, 641static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq,
688 u32 flags, struct sk_buff *skb, u8 cmd) 642 u32 flags, struct sk_buff *skb, u8 cmd)
689{ 643{
690 void *hdr; 644 void *hdr;
@@ -771,7 +725,7 @@ nla_put_failure:
771 return -EMSGSIZE; 725 return -EMSGSIZE;
772} 726}
773 727
774static int ctrl_fill_mcgrp_info(struct genl_family *family, 728static int ctrl_fill_mcgrp_info(const struct genl_family *family,
775 const struct genl_multicast_group *grp, 729 const struct genl_multicast_group *grp,
776 int grp_id, u32 portid, u32 seq, u32 flags, 730 int grp_id, u32 portid, u32 seq, u32 flags,
777 struct sk_buff *skb, u8 cmd) 731 struct sk_buff *skb, u8 cmd)
@@ -814,37 +768,30 @@ nla_put_failure:
814 768
815static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) 769static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
816{ 770{
817 771 int n = 0;
818 int i, n = 0;
819 struct genl_family *rt; 772 struct genl_family *rt;
820 struct net *net = sock_net(skb->sk); 773 struct net *net = sock_net(skb->sk);
821 int chains_to_skip = cb->args[0]; 774 int fams_to_skip = cb->args[0];
822 int fams_to_skip = cb->args[1]; 775 unsigned int id;
823
824 for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) {
825 n = 0;
826 list_for_each_entry(rt, genl_family_chain(i), family_list) {
827 if (!rt->netnsok && !net_eq(net, &init_net))
828 continue;
829 if (++n < fams_to_skip)
830 continue;
831 if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
832 cb->nlh->nlmsg_seq, NLM_F_MULTI,
833 skb, CTRL_CMD_NEWFAMILY) < 0)
834 goto errout;
835 }
836 776
837 fams_to_skip = 0; 777 idr_for_each_entry(&genl_fam_idr, rt, id) {
838 } 778 if (!rt->netnsok && !net_eq(net, &init_net))
779 continue;
780
781 if (n++ < fams_to_skip)
782 continue;
839 783
840errout: 784 if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
841 cb->args[0] = i; 785 cb->nlh->nlmsg_seq, NLM_F_MULTI,
842 cb->args[1] = n; 786 skb, CTRL_CMD_NEWFAMILY) < 0)
787 break;
788 }
843 789
790 cb->args[0] = n;
844 return skb->len; 791 return skb->len;
845} 792}
846 793
847static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, 794static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family,
848 u32 portid, int seq, u8 cmd) 795 u32 portid, int seq, u8 cmd)
849{ 796{
850 struct sk_buff *skb; 797 struct sk_buff *skb;
@@ -864,7 +811,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
864} 811}
865 812
866static struct sk_buff * 813static struct sk_buff *
867ctrl_build_mcgrp_msg(struct genl_family *family, 814ctrl_build_mcgrp_msg(const struct genl_family *family,
868 const struct genl_multicast_group *grp, 815 const struct genl_multicast_group *grp,
869 int grp_id, u32 portid, int seq, u8 cmd) 816 int grp_id, u32 portid, int seq, u8 cmd)
870{ 817{
@@ -894,7 +841,7 @@ static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = {
894static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) 841static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
895{ 842{
896 struct sk_buff *msg; 843 struct sk_buff *msg;
897 struct genl_family *res = NULL; 844 const struct genl_family *res = NULL;
898 int err = -EINVAL; 845 int err = -EINVAL;
899 846
900 if (info->attrs[CTRL_ATTR_FAMILY_ID]) { 847 if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
@@ -938,7 +885,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
938 return genlmsg_reply(msg, info); 885 return genlmsg_reply(msg, info);
939} 886}
940 887
941static int genl_ctrl_event(int event, struct genl_family *family, 888static int genl_ctrl_event(int event, const struct genl_family *family,
942 const struct genl_multicast_group *grp, 889 const struct genl_multicast_group *grp,
943 int grp_id) 890 int grp_id)
944{ 891{
@@ -992,27 +939,39 @@ static const struct genl_multicast_group genl_ctrl_groups[] = {
992 { .name = "notify", }, 939 { .name = "notify", },
993}; 940};
994 941
942static struct genl_family genl_ctrl __ro_after_init = {
943 .module = THIS_MODULE,
944 .ops = genl_ctrl_ops,
945 .n_ops = ARRAY_SIZE(genl_ctrl_ops),
946 .mcgrps = genl_ctrl_groups,
947 .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups),
948 .id = GENL_ID_CTRL,
949 .name = "nlctrl",
950 .version = 0x2,
951 .maxattr = CTRL_ATTR_MAX,
952 .netnsok = true,
953};
954
995static int genl_bind(struct net *net, int group) 955static int genl_bind(struct net *net, int group)
996{ 956{
997 int i, err = -ENOENT; 957 struct genl_family *f;
958 int err = -ENOENT;
959 unsigned int id;
998 960
999 down_read(&cb_lock); 961 down_read(&cb_lock);
1000 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { 962
1001 struct genl_family *f; 963 idr_for_each_entry(&genl_fam_idr, f, id) {
1002 964 if (group >= f->mcgrp_offset &&
1003 list_for_each_entry(f, genl_family_chain(i), family_list) { 965 group < f->mcgrp_offset + f->n_mcgrps) {
1004 if (group >= f->mcgrp_offset && 966 int fam_grp = group - f->mcgrp_offset;
1005 group < f->mcgrp_offset + f->n_mcgrps) { 967
1006 int fam_grp = group - f->mcgrp_offset; 968 if (!f->netnsok && net != &init_net)
1007 969 err = -ENOENT;
1008 if (!f->netnsok && net != &init_net) 970 else if (f->mcast_bind)
1009 err = -ENOENT; 971 err = f->mcast_bind(net, fam_grp);
1010 else if (f->mcast_bind) 972 else
1011 err = f->mcast_bind(net, fam_grp); 973 err = 0;
1012 else 974 break;
1013 err = 0;
1014 break;
1015 }
1016 } 975 }
1017 } 976 }
1018 up_read(&cb_lock); 977 up_read(&cb_lock);
@@ -1022,21 +981,19 @@ static int genl_bind(struct net *net, int group)
1022 981
1023static void genl_unbind(struct net *net, int group) 982static void genl_unbind(struct net *net, int group)
1024{ 983{
1025 int i; 984 struct genl_family *f;
985 unsigned int id;
1026 986
1027 down_read(&cb_lock); 987 down_read(&cb_lock);
1028 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
1029 struct genl_family *f;
1030 988
1031 list_for_each_entry(f, genl_family_chain(i), family_list) { 989 idr_for_each_entry(&genl_fam_idr, f, id) {
1032 if (group >= f->mcgrp_offset && 990 if (group >= f->mcgrp_offset &&
1033 group < f->mcgrp_offset + f->n_mcgrps) { 991 group < f->mcgrp_offset + f->n_mcgrps) {
1034 int fam_grp = group - f->mcgrp_offset; 992 int fam_grp = group - f->mcgrp_offset;
1035 993
1036 if (f->mcast_unbind) 994 if (f->mcast_unbind)
1037 f->mcast_unbind(net, fam_grp); 995 f->mcast_unbind(net, fam_grp);
1038 break; 996 break;
1039 }
1040 } 997 }
1041 } 998 }
1042 up_read(&cb_lock); 999 up_read(&cb_lock);
@@ -1076,13 +1033,9 @@ static struct pernet_operations genl_pernet_ops = {
1076 1033
1077static int __init genl_init(void) 1034static int __init genl_init(void)
1078{ 1035{
1079 int i, err; 1036 int err;
1080
1081 for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
1082 INIT_LIST_HEAD(&family_ht[i]);
1083 1037
1084 err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops, 1038 err = genl_register_family(&genl_ctrl);
1085 genl_ctrl_groups);
1086 if (err < 0) 1039 if (err < 0)
1087 goto problem; 1040 goto problem;
1088 1041
@@ -1098,6 +1051,25 @@ problem:
1098 1051
1099subsys_initcall(genl_init); 1052subsys_initcall(genl_init);
1100 1053
1054/**
1055 * genl_family_attrbuf - return family's attrbuf
1056 * @family: the family
1057 *
1058 * Return the family's attrbuf, while validating that it's
1059 * actually valid to access it.
1060 *
1061 * You cannot use this function with a family that has parallel_ops
1062 * and you can only use it within (pre/post) doit/dumpit callbacks.
1063 */
1064struct nlattr **genl_family_attrbuf(const struct genl_family *family)
1065{
1066 if (!WARN_ON(family->parallel_ops))
1067 lockdep_assert_held(&genl_mutex);
1068
1069 return family->attrbuf;
1070}
1071EXPORT_SYMBOL(genl_family_attrbuf);
1072
1101static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, 1073static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
1102 gfp_t flags) 1074 gfp_t flags)
1103{ 1075{
@@ -1127,8 +1099,9 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
1127 return err; 1099 return err;
1128} 1100}
1129 1101
1130int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, 1102int genlmsg_multicast_allns(const struct genl_family *family,
1131 u32 portid, unsigned int group, gfp_t flags) 1103 struct sk_buff *skb, u32 portid,
1104 unsigned int group, gfp_t flags)
1132{ 1105{
1133 if (WARN_ON_ONCE(group >= family->n_mcgrps)) 1106 if (WARN_ON_ONCE(group >= family->n_mcgrps))
1134 return -EINVAL; 1107 return -EINVAL;
@@ -1137,7 +1110,7 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
1137} 1110}
1138EXPORT_SYMBOL(genlmsg_multicast_allns); 1111EXPORT_SYMBOL(genlmsg_multicast_allns);
1139 1112
1140void genl_notify(struct genl_family *family, struct sk_buff *skb, 1113void genl_notify(const struct genl_family *family, struct sk_buff *skb,
1141 struct genl_info *info, u32 group, gfp_t flags) 1114 struct genl_info *info, u32 group, gfp_t flags)
1142{ 1115{
1143 struct net *net = genl_info_net(info); 1116 struct net *net = genl_info_net(info);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index ea023b35f1c2..03f3d5c7beb8 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -38,14 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = {
38 { .name = NFC_GENL_MCAST_EVENT_NAME, }, 38 { .name = NFC_GENL_MCAST_EVENT_NAME, },
39}; 39};
40 40
41static struct genl_family nfc_genl_family = { 41static struct genl_family nfc_genl_family;
42 .id = GENL_ID_GENERATE,
43 .hdrsize = 0,
44 .name = NFC_GENL_NAME,
45 .version = NFC_GENL_VERSION,
46 .maxattr = NFC_ATTR_MAX,
47};
48
49static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { 42static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
50 [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 }, 43 [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 },
51 [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, 44 [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
@@ -120,21 +113,20 @@ nla_put_failure:
120 113
121static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) 114static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
122{ 115{
116 struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family);
123 struct nfc_dev *dev; 117 struct nfc_dev *dev;
124 int rc; 118 int rc;
125 u32 idx; 119 u32 idx;
126 120
127 rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize, 121 rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize,
128 nfc_genl_family.attrbuf, 122 attrbuf, nfc_genl_family.maxattr, nfc_genl_policy);
129 nfc_genl_family.maxattr,
130 nfc_genl_policy);
131 if (rc < 0) 123 if (rc < 0)
132 return ERR_PTR(rc); 124 return ERR_PTR(rc);
133 125
134 if (!nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]) 126 if (!attrbuf[NFC_ATTR_DEVICE_INDEX])
135 return ERR_PTR(-EINVAL); 127 return ERR_PTR(-EINVAL);
136 128
137 idx = nla_get_u32(nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]); 129 idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]);
138 130
139 dev = nfc_get_device(idx); 131 dev = nfc_get_device(idx);
140 if (!dev) 132 if (!dev)
@@ -1754,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = {
1754 }, 1746 },
1755}; 1747};
1756 1748
1749static struct genl_family nfc_genl_family __ro_after_init = {
1750 .hdrsize = 0,
1751 .name = NFC_GENL_NAME,
1752 .version = NFC_GENL_VERSION,
1753 .maxattr = NFC_ATTR_MAX,
1754 .module = THIS_MODULE,
1755 .ops = nfc_genl_ops,
1756 .n_ops = ARRAY_SIZE(nfc_genl_ops),
1757 .mcgrps = nfc_genl_mcgrps,
1758 .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps),
1759};
1760
1757 1761
1758struct urelease_work { 1762struct urelease_work {
1759 struct work_struct w; 1763 struct work_struct w;
@@ -1839,9 +1843,7 @@ int __init nfc_genl_init(void)
1839{ 1843{
1840 int rc; 1844 int rc;
1841 1845
1842 rc = genl_register_family_with_ops_groups(&nfc_genl_family, 1846 rc = genl_register_family(&nfc_genl_family);
1843 nfc_genl_ops,
1844 nfc_genl_mcgrps);
1845 if (rc) 1847 if (rc)
1846 return rc; 1848 return rc;
1847 1849
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4e03f64709bc..514f7bcf7c63 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -62,9 +62,11 @@ struct ovs_frag_data {
62 struct vport *vport; 62 struct vport *vport;
63 struct ovs_skb_cb cb; 63 struct ovs_skb_cb cb;
64 __be16 inner_protocol; 64 __be16 inner_protocol;
65 __u16 vlan_tci; 65 u16 network_offset; /* valid only for MPLS */
66 u16 vlan_tci;
66 __be16 vlan_proto; 67 __be16 vlan_proto;
67 unsigned int l2_len; 68 unsigned int l2_len;
69 u8 mac_proto;
68 u8 l2_data[MAX_L2_LEN]; 70 u8 l2_data[MAX_L2_LEN];
69}; 71};
70 72
@@ -136,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
136 138
137static void invalidate_flow_key(struct sw_flow_key *key) 139static void invalidate_flow_key(struct sw_flow_key *key)
138{ 140{
139 key->eth.type = htons(0); 141 key->mac_proto |= SW_FLOW_KEY_INVALID;
140} 142}
141 143
142static bool is_flow_key_valid(const struct sw_flow_key *key) 144static bool is_flow_key_valid(const struct sw_flow_key *key)
143{ 145{
144 return !!key->eth.type; 146 return !(key->mac_proto & SW_FLOW_KEY_INVALID);
145} 147}
146 148
147static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, 149static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
@@ -185,7 +187,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
185 187
186 skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); 188 skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
187 189
188 update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); 190 if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
191 update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
189 skb->protocol = mpls->mpls_ethertype; 192 skb->protocol = mpls->mpls_ethertype;
190 193
191 invalidate_flow_key(key); 194 invalidate_flow_key(key);
@@ -195,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
195static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, 198static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
196 const __be16 ethertype) 199 const __be16 ethertype)
197{ 200{
198 struct ethhdr *hdr;
199 int err; 201 int err;
200 202
201 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); 203 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -211,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
211 skb_reset_mac_header(skb); 213 skb_reset_mac_header(skb);
212 skb_set_network_header(skb, skb->mac_len); 214 skb_set_network_header(skb, skb->mac_len);
213 215
214 /* mpls_hdr() is used to locate the ethertype field correctly in the 216 if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
215 * presence of VLAN tags. 217 struct ethhdr *hdr;
216 */ 218
217 hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); 219 /* mpls_hdr() is used to locate the ethertype field correctly in the
218 update_ethertype(skb, hdr, ethertype); 220 * presence of VLAN tags.
221 */
222 hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
223 update_ethertype(skb, hdr, ethertype);
224 }
219 if (eth_p_mpls(skb->protocol)) 225 if (eth_p_mpls(skb->protocol))
220 skb->protocol = ethertype; 226 skb->protocol = ethertype;
221 227
@@ -311,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
311 return 0; 317 return 0;
312} 318}
313 319
320/* pop_eth does not support VLAN packets as this action is never called
321 * for them.
322 */
323static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
324{
325 skb_pull_rcsum(skb, ETH_HLEN);
326 skb_reset_mac_header(skb);
327 skb_reset_mac_len(skb);
328
329 /* safe right before invalidate_flow_key */
330 key->mac_proto = MAC_PROTO_NONE;
331 invalidate_flow_key(key);
332 return 0;
333}
334
335static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
336 const struct ovs_action_push_eth *ethh)
337{
338 struct ethhdr *hdr;
339
340 /* Add the new Ethernet header */
341 if (skb_cow_head(skb, ETH_HLEN) < 0)
342 return -ENOMEM;
343
344 skb_push(skb, ETH_HLEN);
345 skb_reset_mac_header(skb);
346 skb_reset_mac_len(skb);
347
348 hdr = eth_hdr(skb);
349 ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
350 ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
351 hdr->h_proto = skb->protocol;
352
353 skb_postpush_rcsum(skb, hdr, ETH_HLEN);
354
355 /* safe right before invalidate_flow_key */
356 key->mac_proto = MAC_PROTO_ETHERNET;
357 invalidate_flow_key(key);
358 return 0;
359}
360
314static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, 361static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
315 __be32 addr, __be32 new_addr) 362 __be32 addr, __be32 new_addr)
316{ 363{
@@ -666,7 +713,13 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
666 skb_postpush_rcsum(skb, skb->data, data->l2_len); 713 skb_postpush_rcsum(skb, skb->data, data->l2_len);
667 skb_reset_mac_header(skb); 714 skb_reset_mac_header(skb);
668 715
669 ovs_vport_send(vport, skb); 716 if (eth_p_mpls(skb->protocol)) {
717 skb->inner_network_header = skb->network_header;
718 skb_set_network_header(skb, data->network_offset);
719 skb_reset_mac_len(skb);
720 }
721
722 ovs_vport_send(vport, skb, data->mac_proto);
670 return 0; 723 return 0;
671} 724}
672 725
@@ -684,7 +737,8 @@ static struct dst_ops ovs_dst_ops = {
684/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is 737/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
685 * ovs_vport_output(), which is called once per fragmented packet. 738 * ovs_vport_output(), which is called once per fragmented packet.
686 */ 739 */
687static void prepare_frag(struct vport *vport, struct sk_buff *skb) 740static void prepare_frag(struct vport *vport, struct sk_buff *skb,
741 u16 orig_network_offset, u8 mac_proto)
688{ 742{
689 unsigned int hlen = skb_network_offset(skb); 743 unsigned int hlen = skb_network_offset(skb);
690 struct ovs_frag_data *data; 744 struct ovs_frag_data *data;
@@ -694,8 +748,10 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
694 data->vport = vport; 748 data->vport = vport;
695 data->cb = *OVS_CB(skb); 749 data->cb = *OVS_CB(skb);
696 data->inner_protocol = skb->inner_protocol; 750 data->inner_protocol = skb->inner_protocol;
751 data->network_offset = orig_network_offset;
697 data->vlan_tci = skb->vlan_tci; 752 data->vlan_tci = skb->vlan_tci;
698 data->vlan_proto = skb->vlan_proto; 753 data->vlan_proto = skb->vlan_proto;
754 data->mac_proto = mac_proto;
699 data->l2_len = hlen; 755 data->l2_len = hlen;
700 memcpy(&data->l2_data, skb->data, hlen); 756 memcpy(&data->l2_data, skb->data, hlen);
701 757
@@ -704,18 +760,27 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
704} 760}
705 761
706static void ovs_fragment(struct net *net, struct vport *vport, 762static void ovs_fragment(struct net *net, struct vport *vport,
707 struct sk_buff *skb, u16 mru, __be16 ethertype) 763 struct sk_buff *skb, u16 mru,
764 struct sw_flow_key *key)
708{ 765{
766 u16 orig_network_offset = 0;
767
768 if (eth_p_mpls(skb->protocol)) {
769 orig_network_offset = skb_network_offset(skb);
770 skb->network_header = skb->inner_network_header;
771 }
772
709 if (skb_network_offset(skb) > MAX_L2_LEN) { 773 if (skb_network_offset(skb) > MAX_L2_LEN) {
710 OVS_NLERR(1, "L2 header too long to fragment"); 774 OVS_NLERR(1, "L2 header too long to fragment");
711 goto err; 775 goto err;
712 } 776 }
713 777
714 if (ethertype == htons(ETH_P_IP)) { 778 if (key->eth.type == htons(ETH_P_IP)) {
715 struct dst_entry ovs_dst; 779 struct dst_entry ovs_dst;
716 unsigned long orig_dst; 780 unsigned long orig_dst;
717 781
718 prepare_frag(vport, skb); 782 prepare_frag(vport, skb, orig_network_offset,
783 ovs_key_mac_proto(key));
719 dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, 784 dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
720 DST_OBSOLETE_NONE, DST_NOCOUNT); 785 DST_OBSOLETE_NONE, DST_NOCOUNT);
721 ovs_dst.dev = vport->dev; 786 ovs_dst.dev = vport->dev;
@@ -726,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
726 791
727 ip_do_fragment(net, skb->sk, skb, ovs_vport_output); 792 ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
728 refdst_drop(orig_dst); 793 refdst_drop(orig_dst);
729 } else if (ethertype == htons(ETH_P_IPV6)) { 794 } else if (key->eth.type == htons(ETH_P_IPV6)) {
730 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 795 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
731 unsigned long orig_dst; 796 unsigned long orig_dst;
732 struct rt6_info ovs_rt; 797 struct rt6_info ovs_rt;
@@ -735,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
735 goto err; 800 goto err;
736 } 801 }
737 802
738 prepare_frag(vport, skb); 803 prepare_frag(vport, skb, orig_network_offset,
804 ovs_key_mac_proto(key));
739 memset(&ovs_rt, 0, sizeof(ovs_rt)); 805 memset(&ovs_rt, 0, sizeof(ovs_rt));
740 dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, 806 dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
741 DST_OBSOLETE_NONE, DST_NOCOUNT); 807 DST_OBSOLETE_NONE, DST_NOCOUNT);
@@ -749,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
749 refdst_drop(orig_dst); 815 refdst_drop(orig_dst);
750 } else { 816 } else {
751 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", 817 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
752 ovs_vport_name(vport), ntohs(ethertype), mru, 818 ovs_vport_name(vport), ntohs(key->eth.type), mru,
753 vport->dev->mtu); 819 vport->dev->mtu);
754 goto err; 820 goto err;
755 } 821 }
@@ -769,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
769 u32 cutlen = OVS_CB(skb)->cutlen; 835 u32 cutlen = OVS_CB(skb)->cutlen;
770 836
771 if (unlikely(cutlen > 0)) { 837 if (unlikely(cutlen > 0)) {
772 if (skb->len - cutlen > ETH_HLEN) 838 if (skb->len - cutlen > ovs_mac_header_len(key))
773 pskb_trim(skb, skb->len - cutlen); 839 pskb_trim(skb, skb->len - cutlen);
774 else 840 else
775 pskb_trim(skb, ETH_HLEN); 841 pskb_trim(skb, ovs_mac_header_len(key));
776 } 842 }
777 843
778 if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { 844 if (likely(!mru ||
779 ovs_vport_send(vport, skb); 845 (skb->len <= mru + vport->dev->hard_header_len))) {
846 ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
780 } else if (mru <= vport->dev->mtu) { 847 } else if (mru <= vport->dev->mtu) {
781 struct net *net = read_pnet(&dp->net); 848 struct net *net = read_pnet(&dp->net);
782 __be16 ethertype = key->eth.type;
783
784 if (!is_flow_key_valid(key)) {
785 if (eth_p_mpls(skb->protocol))
786 ethertype = skb->inner_protocol;
787 else
788 ethertype = vlan_get_protocol(skb);
789 }
790 849
791 ovs_fragment(net, vport, skb, mru, ethertype); 850 ovs_fragment(net, vport, skb, mru, key);
792 } else { 851 } else {
793 kfree_skb(skb); 852 kfree_skb(skb);
794 } 853 }
@@ -1182,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1182 if (err) 1241 if (err)
1183 return err == -EINPROGRESS ? 0 : err; 1242 return err == -EINPROGRESS ? 0 : err;
1184 break; 1243 break;
1244
1245 case OVS_ACTION_ATTR_PUSH_ETH:
1246 err = push_eth(skb, key, nla_data(a));
1247 break;
1248
1249 case OVS_ACTION_ATTR_POP_ETH:
1250 err = pop_eth(skb, key);
1251 break;
1185 } 1252 }
1186 1253
1187 if (unlikely(err)) { 1254 if (unlikely(err)) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index fecefa2dc94e..6b78bab27755 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -728,12 +728,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
728 skb->nfctinfo = IP_CT_NEW; 728 skb->nfctinfo = IP_CT_NEW;
729 } 729 }
730 730
731 /* Repeat if requested, see nf_iterate(). */ 731 err = nf_conntrack_in(net, info->family,
732 do { 732 NF_INET_PRE_ROUTING, skb);
733 err = nf_conntrack_in(net, info->family,
734 NF_INET_PRE_ROUTING, skb);
735 } while (err == NF_REPEAT);
736
737 if (err != NF_ACCEPT) 733 if (err != NF_ACCEPT)
738 return -ENOENT; 734 return -ENOENT;
739 735
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4d67ea856067..2d4c4d3911c0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -58,8 +58,7 @@
58#include "vport-internal_dev.h" 58#include "vport-internal_dev.h"
59#include "vport-netdev.h" 59#include "vport-netdev.h"
60 60
61int ovs_net_id __read_mostly; 61unsigned int ovs_net_id __read_mostly;
62EXPORT_SYMBOL_GPL(ovs_net_id);
63 62
64static struct genl_family dp_packet_genl_family; 63static struct genl_family dp_packet_genl_family;
65static struct genl_family dp_flow_genl_family; 64static struct genl_family dp_flow_genl_family;
@@ -131,7 +130,6 @@ int lockdep_ovsl_is_held(void)
131 else 130 else
132 return 1; 131 return 1;
133} 132}
134EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
135#endif 133#endif
136 134
137static struct vport *new_vport(const struct vport_parms *); 135static struct vport *new_vport(const struct vport_parms *);
@@ -562,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
562 struct sw_flow *flow; 560 struct sw_flow *flow;
563 struct sw_flow_actions *sf_acts; 561 struct sw_flow_actions *sf_acts;
564 struct datapath *dp; 562 struct datapath *dp;
565 struct ethhdr *eth;
566 struct vport *input_vport; 563 struct vport *input_vport;
567 u16 mru = 0; 564 u16 mru = 0;
568 int len; 565 int len;
@@ -583,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
583 580
584 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 581 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
585 582
586 skb_reset_mac_header(packet);
587 eth = eth_hdr(packet);
588
589 /* Normally, setting the skb 'protocol' field would be handled by a
590 * call to eth_type_trans(), but it assumes there's a sending
591 * device, which we may not have. */
592 if (eth_proto_is_802_3(eth->h_proto))
593 packet->protocol = eth->h_proto;
594 else
595 packet->protocol = htons(ETH_P_802_2);
596
597 /* Set packet's mru */ 583 /* Set packet's mru */
598 if (a[OVS_PACKET_ATTR_MRU]) { 584 if (a[OVS_PACKET_ATTR_MRU]) {
599 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); 585 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
@@ -620,6 +606,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
620 rcu_assign_pointer(flow->sf_acts, acts); 606 rcu_assign_pointer(flow->sf_acts, acts);
621 packet->priority = flow->key.phy.priority; 607 packet->priority = flow->key.phy.priority;
622 packet->mark = flow->key.phy.skb_mark; 608 packet->mark = flow->key.phy.skb_mark;
609 packet->protocol = flow->key.eth.type;
623 610
624 rcu_read_lock(); 611 rcu_read_lock();
625 dp = get_dp_rcu(net, ovs_header->dp_ifindex); 612 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
@@ -672,8 +659,7 @@ static const struct genl_ops dp_packet_genl_ops[] = {
672 } 659 }
673}; 660};
674 661
675static struct genl_family dp_packet_genl_family = { 662static struct genl_family dp_packet_genl_family __ro_after_init = {
676 .id = GENL_ID_GENERATE,
677 .hdrsize = sizeof(struct ovs_header), 663 .hdrsize = sizeof(struct ovs_header),
678 .name = OVS_PACKET_FAMILY, 664 .name = OVS_PACKET_FAMILY,
679 .version = OVS_PACKET_VERSION, 665 .version = OVS_PACKET_VERSION,
@@ -682,6 +668,7 @@ static struct genl_family dp_packet_genl_family = {
682 .parallel_ops = true, 668 .parallel_ops = true,
683 .ops = dp_packet_genl_ops, 669 .ops = dp_packet_genl_ops,
684 .n_ops = ARRAY_SIZE(dp_packet_genl_ops), 670 .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
671 .module = THIS_MODULE,
685}; 672};
686 673
687static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, 674static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
@@ -1437,8 +1424,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
1437 }, 1424 },
1438}; 1425};
1439 1426
1440static struct genl_family dp_flow_genl_family = { 1427static struct genl_family dp_flow_genl_family __ro_after_init = {
1441 .id = GENL_ID_GENERATE,
1442 .hdrsize = sizeof(struct ovs_header), 1428 .hdrsize = sizeof(struct ovs_header),
1443 .name = OVS_FLOW_FAMILY, 1429 .name = OVS_FLOW_FAMILY,
1444 .version = OVS_FLOW_VERSION, 1430 .version = OVS_FLOW_VERSION,
@@ -1449,6 +1435,7 @@ static struct genl_family dp_flow_genl_family = {
1449 .n_ops = ARRAY_SIZE(dp_flow_genl_ops), 1435 .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1450 .mcgrps = &ovs_dp_flow_multicast_group, 1436 .mcgrps = &ovs_dp_flow_multicast_group,
1451 .n_mcgrps = 1, 1437 .n_mcgrps = 1,
1438 .module = THIS_MODULE,
1452}; 1439};
1453 1440
1454static size_t ovs_dp_cmd_msg_size(void) 1441static size_t ovs_dp_cmd_msg_size(void)
@@ -1823,8 +1810,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
1823 }, 1810 },
1824}; 1811};
1825 1812
1826static struct genl_family dp_datapath_genl_family = { 1813static struct genl_family dp_datapath_genl_family __ro_after_init = {
1827 .id = GENL_ID_GENERATE,
1828 .hdrsize = sizeof(struct ovs_header), 1814 .hdrsize = sizeof(struct ovs_header),
1829 .name = OVS_DATAPATH_FAMILY, 1815 .name = OVS_DATAPATH_FAMILY,
1830 .version = OVS_DATAPATH_VERSION, 1816 .version = OVS_DATAPATH_VERSION,
@@ -1835,6 +1821,7 @@ static struct genl_family dp_datapath_genl_family = {
1835 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), 1821 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1836 .mcgrps = &ovs_dp_datapath_multicast_group, 1822 .mcgrps = &ovs_dp_datapath_multicast_group,
1837 .n_mcgrps = 1, 1823 .n_mcgrps = 1,
1824 .module = THIS_MODULE,
1838}; 1825};
1839 1826
1840/* Called with ovs_mutex or RCU read lock. */ 1827/* Called with ovs_mutex or RCU read lock. */
@@ -2245,8 +2232,7 @@ static const struct genl_ops dp_vport_genl_ops[] = {
2245 }, 2232 },
2246}; 2233};
2247 2234
2248struct genl_family dp_vport_genl_family = { 2235struct genl_family dp_vport_genl_family __ro_after_init = {
2249 .id = GENL_ID_GENERATE,
2250 .hdrsize = sizeof(struct ovs_header), 2236 .hdrsize = sizeof(struct ovs_header),
2251 .name = OVS_VPORT_FAMILY, 2237 .name = OVS_VPORT_FAMILY,
2252 .version = OVS_VPORT_VERSION, 2238 .version = OVS_VPORT_VERSION,
@@ -2257,6 +2243,7 @@ struct genl_family dp_vport_genl_family = {
2257 .n_ops = ARRAY_SIZE(dp_vport_genl_ops), 2243 .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2258 .mcgrps = &ovs_dp_vport_multicast_group, 2244 .mcgrps = &ovs_dp_vport_multicast_group,
2259 .n_mcgrps = 1, 2245 .n_mcgrps = 1,
2246 .module = THIS_MODULE,
2260}; 2247};
2261 2248
2262static struct genl_family * const dp_genl_families[] = { 2249static struct genl_family * const dp_genl_families[] = {
@@ -2274,7 +2261,7 @@ static void dp_unregister_genl(int n_families)
2274 genl_unregister_family(dp_genl_families[i]); 2261 genl_unregister_family(dp_genl_families[i]);
2275} 2262}
2276 2263
2277static int dp_register_genl(void) 2264static int __init dp_register_genl(void)
2278{ 2265{
2279 int err; 2266 int err;
2280 int i; 2267 int i;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index ab85c1cae255..1c6e9377436d 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -144,7 +144,7 @@ struct ovs_net {
144 bool xt_label; 144 bool xt_label;
145}; 145};
146 146
147extern int ovs_net_id; 147extern unsigned int ovs_net_id;
148void ovs_lock(void); 148void ovs_lock(void);
149void ovs_unlock(void); 149void ovs_unlock(void);
150 150
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 22087062bd10..08aa926cd5cf 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -334,14 +334,17 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
334 return 1; 334 return 1;
335} 335}
336 336
337static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) 337static void clear_vlan(struct sw_flow_key *key)
338{ 338{
339 int res;
340
341 key->eth.vlan.tci = 0; 339 key->eth.vlan.tci = 0;
342 key->eth.vlan.tpid = 0; 340 key->eth.vlan.tpid = 0;
343 key->eth.cvlan.tci = 0; 341 key->eth.cvlan.tci = 0;
344 key->eth.cvlan.tpid = 0; 342 key->eth.cvlan.tpid = 0;
343}
344
345static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
346{
347 int res;
345 348
346 if (skb_vlan_tag_present(skb)) { 349 if (skb_vlan_tag_present(skb)) {
347 key->eth.vlan.tci = htons(skb->vlan_tci); 350 key->eth.vlan.tci = htons(skb->vlan_tci);
@@ -483,17 +486,20 @@ invalid:
483 * 486 *
484 * Returns 0 if successful, otherwise a negative errno value. 487 * Returns 0 if successful, otherwise a negative errno value.
485 * 488 *
486 * Initializes @skb header pointers as follows: 489 * Initializes @skb header fields as follows:
487 * 490 *
488 * - skb->mac_header: the Ethernet header. 491 * - skb->mac_header: the L2 header.
489 * 492 *
490 * - skb->network_header: just past the Ethernet header, or just past the 493 * - skb->network_header: just past the L2 header, or just past the
491 * VLAN header, to the first byte of the Ethernet payload. 494 * VLAN header, to the first byte of the L2 payload.
492 * 495 *
493 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 496 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
494 * on output, then just past the IP header, if one is present and 497 * on output, then just past the IP header, if one is present and
495 * of a correct length, otherwise the same as skb->network_header. 498 * of a correct length, otherwise the same as skb->network_header.
496 * For other key->eth.type values it is left untouched. 499 * For other key->eth.type values it is left untouched.
500 *
501 * - skb->protocol: the type of the data starting at skb->network_header.
502 * Equals to key->eth.type.
497 */ 503 */
498static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) 504static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
499{ 505{
@@ -505,28 +511,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
505 511
506 skb_reset_mac_header(skb); 512 skb_reset_mac_header(skb);
507 513
508 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet 514 /* Link layer. */
509 * header in the linear data area. 515 clear_vlan(key);
510 */ 516 if (key->mac_proto == MAC_PROTO_NONE) {
511 eth = eth_hdr(skb); 517 if (unlikely(eth_type_vlan(skb->protocol)))
512 ether_addr_copy(key->eth.src, eth->h_source); 518 return -EINVAL;
513 ether_addr_copy(key->eth.dst, eth->h_dest);
514 519
515 __skb_pull(skb, 2 * ETH_ALEN); 520 skb_reset_network_header(skb);
516 /* We are going to push all headers that we pull, so no need to 521 } else {
517 * update skb->csum here. 522 eth = eth_hdr(skb);
518 */ 523 ether_addr_copy(key->eth.src, eth->h_source);
524 ether_addr_copy(key->eth.dst, eth->h_dest);
519 525
520 if (unlikely(parse_vlan(skb, key))) 526 __skb_pull(skb, 2 * ETH_ALEN);
521 return -ENOMEM; 527 /* We are going to push all headers that we pull, so no need to
528 * update skb->csum here.
529 */
522 530
523 key->eth.type = parse_ethertype(skb); 531 if (unlikely(parse_vlan(skb, key)))
524 if (unlikely(key->eth.type == htons(0))) 532 return -ENOMEM;
525 return -ENOMEM; 533
534 skb->protocol = parse_ethertype(skb);
535 if (unlikely(skb->protocol == htons(0)))
536 return -ENOMEM;
526 537
527 skb_reset_network_header(skb); 538 skb_reset_network_header(skb);
539 __skb_push(skb, skb->data - skb_mac_header(skb));
540 }
528 skb_reset_mac_len(skb); 541 skb_reset_mac_len(skb);
529 __skb_push(skb, skb->data - skb_mac_header(skb)); 542 key->eth.type = skb->protocol;
530 543
531 /* Network layer. */ 544 /* Network layer. */
532 if (key->eth.type == htons(ETH_P_IP)) { 545 if (key->eth.type == htons(ETH_P_IP)) {
@@ -721,9 +734,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
721 return key_extract(skb, key); 734 return key_extract(skb, key);
722} 735}
723 736
737static int key_extract_mac_proto(struct sk_buff *skb)
738{
739 switch (skb->dev->type) {
740 case ARPHRD_ETHER:
741 return MAC_PROTO_ETHERNET;
742 case ARPHRD_NONE:
743 if (skb->protocol == htons(ETH_P_TEB))
744 return MAC_PROTO_ETHERNET;
745 return MAC_PROTO_NONE;
746 }
747 WARN_ON_ONCE(1);
748 return -EINVAL;
749}
750
724int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, 751int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
725 struct sk_buff *skb, struct sw_flow_key *key) 752 struct sk_buff *skb, struct sw_flow_key *key)
726{ 753{
754 int res;
755
727 /* Extract metadata from packet. */ 756 /* Extract metadata from packet. */
728 if (tun_info) { 757 if (tun_info) {
729 key->tun_proto = ip_tunnel_info_af(tun_info); 758 key->tun_proto = ip_tunnel_info_af(tun_info);
@@ -751,6 +780,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
751 key->phy.skb_mark = skb->mark; 780 key->phy.skb_mark = skb->mark;
752 ovs_ct_fill_key(skb, key); 781 ovs_ct_fill_key(skb, key);
753 key->ovs_flow_hash = 0; 782 key->ovs_flow_hash = 0;
783 res = key_extract_mac_proto(skb);
784 if (res < 0)
785 return res;
786 key->mac_proto = res;
754 key->recirc_id = 0; 787 key->recirc_id = 0;
755 788
756 return key_extract(skb, key); 789 return key_extract(skb, key);
@@ -767,5 +800,29 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
767 if (err) 800 if (err)
768 return err; 801 return err;
769 802
803 if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
804 /* key_extract assumes that skb->protocol is set-up for
805 * layer 3 packets which is the case for other callers,
806 * in particular packets recieved from the network stack.
807 * Here the correct value can be set from the metadata
808 * extracted above.
809 */
810 skb->protocol = key->eth.type;
811 } else {
812 struct ethhdr *eth;
813
814 skb_reset_mac_header(skb);
815 eth = eth_hdr(skb);
816
817 /* Normally, setting the skb 'protocol' field would be
818 * handled by a call to eth_type_trans(), but it assumes
819 * there's a sending device, which we may not have.
820 */
821 if (eth_proto_is_802_3(eth->h_proto))
822 skb->protocol = eth->h_proto;
823 else
824 skb->protocol = htons(ETH_P_802_2);
825 }
826
770 return key_extract(skb, key); 827 return key_extract(skb, key);
771} 828}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index ae783f5c6695..f61cae7f9030 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,6 +37,12 @@
37 37
38struct sk_buff; 38struct sk_buff;
39 39
40enum sw_flow_mac_proto {
41 MAC_PROTO_NONE = 0,
42 MAC_PROTO_ETHERNET,
43};
44#define SW_FLOW_KEY_INVALID 0x80
45
40/* Store options at the end of the array if they are less than the 46/* Store options at the end of the array if they are less than the
41 * maximum size. This allows us to get the benefits of variable length 47 * maximum size. This allows us to get the benefits of variable length
42 * matching for small options. 48 * matching for small options.
@@ -68,6 +74,7 @@ struct sw_flow_key {
68 u32 skb_mark; /* SKB mark. */ 74 u32 skb_mark; /* SKB mark. */
69 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ 75 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
70 } __packed phy; /* Safe when right after 'tun_key'. */ 76 } __packed phy; /* Safe when right after 'tun_key'. */
77 u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */
71 u8 tun_proto; /* Protocol of encapsulating tunnel. */ 78 u8 tun_proto; /* Protocol of encapsulating tunnel. */
72 u32 ovs_flow_hash; /* Datapath computed hash value. */ 79 u32 ovs_flow_hash; /* Datapath computed hash value. */
73 u32 recirc_id; /* Recirculation ID. */ 80 u32 recirc_id; /* Recirculation ID. */
@@ -206,6 +213,21 @@ struct arp_eth_header {
206 unsigned char ar_tip[4]; /* target IP address */ 213 unsigned char ar_tip[4]; /* target IP address */
207} __packed; 214} __packed;
208 215
216static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
217{
218 return key->mac_proto & ~SW_FLOW_KEY_INVALID;
219}
220
221static inline u16 __ovs_mac_header_len(u8 mac_proto)
222{
223 return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
224}
225
226static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
227{
228 return __ovs_mac_header_len(ovs_key_mac_proto(key));
229}
230
209static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) 231static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
210{ 232{
211 return sfid->ufid_len; 233 return sfid->ufid_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ae25ded82b3b..c87d359b9b37 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -123,7 +123,7 @@ static void update_range(struct sw_flow_match *match,
123static bool match_validate(const struct sw_flow_match *match, 123static bool match_validate(const struct sw_flow_match *match,
124 u64 key_attrs, u64 mask_attrs, bool log) 124 u64 key_attrs, u64 mask_attrs, bool log)
125{ 125{
126 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 126 u64 key_expected = 0;
127 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 127 u64 mask_allowed = key_attrs; /* At most allow all key attributes */
128 128
129 /* The following mask attributes allowed only if they 129 /* The following mask attributes allowed only if they
@@ -969,10 +969,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
969 return 0; 969 return 0;
970} 970}
971 971
972static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
973 u64 *attrs, const struct nlattr **a,
974 bool is_mask, bool log)
975{
976 __be16 eth_type;
977
978 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
979 if (is_mask) {
980 /* Always exact match EtherType. */
981 eth_type = htons(0xffff);
982 } else if (!eth_proto_is_802_3(eth_type)) {
983 OVS_NLERR(log, "EtherType %x is less than min %x",
984 ntohs(eth_type), ETH_P_802_3_MIN);
985 return -EINVAL;
986 }
987
988 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
989 *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
990 return 0;
991}
992
972static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, 993static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
973 u64 *attrs, const struct nlattr **a, 994 u64 *attrs, const struct nlattr **a,
974 bool is_mask, bool log) 995 bool is_mask, bool log)
975{ 996{
997 u8 mac_proto = MAC_PROTO_ETHERNET;
998
976 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 999 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
977 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 1000 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
978 1001
@@ -1059,6 +1082,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1059 sizeof(*cl), is_mask); 1082 sizeof(*cl), is_mask);
1060 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); 1083 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1061 } 1084 }
1085
1086 /* For layer 3 packets the Ethernet type is provided
1087 * and treated as metadata but no MAC addresses are provided.
1088 */
1089 if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1090 (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1091 mac_proto = MAC_PROTO_NONE;
1092
1093 /* Always exact match mac_proto */
1094 SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1095
1096 if (mac_proto == MAC_PROTO_NONE)
1097 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1098 log);
1099
1062 return 0; 1100 return 0;
1063} 1101}
1064 1102
@@ -1081,33 +1119,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1081 SW_FLOW_KEY_MEMCPY(match, eth.dst, 1119 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1082 eth_key->eth_dst, ETH_ALEN, is_mask); 1120 eth_key->eth_dst, ETH_ALEN, is_mask);
1083 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1121 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1084 }
1085 1122
1086 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 1123 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1087 /* VLAN attribute is always parsed before getting here since it 1124 /* VLAN attribute is always parsed before getting here since it
1088 * may occur multiple times. 1125 * may occur multiple times.
1089 */ 1126 */
1090 OVS_NLERR(log, "VLAN attribute unexpected."); 1127 OVS_NLERR(log, "VLAN attribute unexpected.");
1091 return -EINVAL;
1092 }
1093
1094 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1095 __be16 eth_type;
1096
1097 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1098 if (is_mask) {
1099 /* Always exact match EtherType. */
1100 eth_type = htons(0xffff);
1101 } else if (!eth_proto_is_802_3(eth_type)) {
1102 OVS_NLERR(log, "EtherType %x is less than min %x",
1103 ntohs(eth_type), ETH_P_802_3_MIN);
1104 return -EINVAL; 1128 return -EINVAL;
1105 } 1129 }
1106 1130
1107 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 1131 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1108 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1132 err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1109 } else if (!is_mask) { 1133 log);
1110 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 1134 if (err)
1135 return err;
1136 } else if (!is_mask) {
1137 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1138 }
1139 } else if (!match->key->eth.type) {
1140 OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1141 return -EINVAL;
1111 } 1142 }
1112 1143
1113 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1144 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
@@ -1556,42 +1587,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1556 if (ovs_ct_put_key(output, skb)) 1587 if (ovs_ct_put_key(output, skb))
1557 goto nla_put_failure; 1588 goto nla_put_failure;
1558 1589
1559 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1590 if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
1560 if (!nla) 1591 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1561 goto nla_put_failure; 1592 if (!nla)
1562
1563 eth_key = nla_data(nla);
1564 ether_addr_copy(eth_key->eth_src, output->eth.src);
1565 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1566
1567 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
1568 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
1569 goto nla_put_failure; 1593 goto nla_put_failure;
1570 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1571 if (!swkey->eth.vlan.tci)
1572 goto unencap;
1573 1594
1574 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { 1595 eth_key = nla_data(nla);
1575 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) 1596 ether_addr_copy(eth_key->eth_src, output->eth.src);
1597 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
1598
1599 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
1600 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
1576 goto nla_put_failure; 1601 goto nla_put_failure;
1577 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1602 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1578 if (!swkey->eth.cvlan.tci) 1603 if (!swkey->eth.vlan.tci)
1579 goto unencap; 1604 goto unencap;
1605
1606 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
1607 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
1608 goto nla_put_failure;
1609 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1610 if (!swkey->eth.cvlan.tci)
1611 goto unencap;
1612 }
1580 } 1613 }
1581 }
1582 1614
1583 if (swkey->eth.type == htons(ETH_P_802_2)) { 1615 if (swkey->eth.type == htons(ETH_P_802_2)) {
1584 /* 1616 /*
1585 * Ethertype 802.2 is represented in the netlink with omitted 1617 * Ethertype 802.2 is represented in the netlink with omitted
1586 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1618 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1587 * 0xffff in the mask attribute. Ethertype can also 1619 * 0xffff in the mask attribute. Ethertype can also
1588 * be wildcarded. 1620 * be wildcarded.
1589 */ 1621 */
1590 if (is_mask && output->eth.type) 1622 if (is_mask && output->eth.type)
1591 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1623 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1592 output->eth.type)) 1624 output->eth.type))
1593 goto nla_put_failure; 1625 goto nla_put_failure;
1594 goto unencap; 1626 goto unencap;
1627 }
1595 } 1628 }
1596 1629
1597 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1630 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
@@ -2126,8 +2159,8 @@ static bool validate_masked(u8 *data, int len)
2126 2159
2127static int validate_set(const struct nlattr *a, 2160static int validate_set(const struct nlattr *a,
2128 const struct sw_flow_key *flow_key, 2161 const struct sw_flow_key *flow_key,
2129 struct sw_flow_actions **sfa, 2162 struct sw_flow_actions **sfa, bool *skip_copy,
2130 bool *skip_copy, __be16 eth_type, bool masked, bool log) 2163 u8 mac_proto, __be16 eth_type, bool masked, bool log)
2131{ 2164{
2132 const struct nlattr *ovs_key = nla_data(a); 2165 const struct nlattr *ovs_key = nla_data(a);
2133 int key_type = nla_type(ovs_key); 2166 int key_type = nla_type(ovs_key);
@@ -2157,7 +2190,11 @@ static int validate_set(const struct nlattr *a,
2157 case OVS_KEY_ATTR_SKB_MARK: 2190 case OVS_KEY_ATTR_SKB_MARK:
2158 case OVS_KEY_ATTR_CT_MARK: 2191 case OVS_KEY_ATTR_CT_MARK:
2159 case OVS_KEY_ATTR_CT_LABELS: 2192 case OVS_KEY_ATTR_CT_LABELS:
2193 break;
2194
2160 case OVS_KEY_ATTR_ETHERNET: 2195 case OVS_KEY_ATTR_ETHERNET:
2196 if (mac_proto != MAC_PROTO_ETHERNET)
2197 return -EINVAL;
2161 break; 2198 break;
2162 2199
2163 case OVS_KEY_ATTR_TUNNEL: 2200 case OVS_KEY_ATTR_TUNNEL:
@@ -2324,6 +2361,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2324 int depth, struct sw_flow_actions **sfa, 2361 int depth, struct sw_flow_actions **sfa,
2325 __be16 eth_type, __be16 vlan_tci, bool log) 2362 __be16 eth_type, __be16 vlan_tci, bool log)
2326{ 2363{
2364 u8 mac_proto = ovs_key_mac_proto(key);
2327 const struct nlattr *a; 2365 const struct nlattr *a;
2328 int rem, err; 2366 int rem, err;
2329 2367
@@ -2346,6 +2384,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2346 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), 2384 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2347 [OVS_ACTION_ATTR_CT] = (u32)-1, 2385 [OVS_ACTION_ATTR_CT] = (u32)-1,
2348 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2386 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2387 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2388 [OVS_ACTION_ATTR_POP_ETH] = 0,
2349 }; 2389 };
2350 const struct ovs_action_push_vlan *vlan; 2390 const struct ovs_action_push_vlan *vlan;
2351 int type = nla_type(a); 2391 int type = nla_type(a);
@@ -2394,10 +2434,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2394 } 2434 }
2395 2435
2396 case OVS_ACTION_ATTR_POP_VLAN: 2436 case OVS_ACTION_ATTR_POP_VLAN:
2437 if (mac_proto != MAC_PROTO_ETHERNET)
2438 return -EINVAL;
2397 vlan_tci = htons(0); 2439 vlan_tci = htons(0);
2398 break; 2440 break;
2399 2441
2400 case OVS_ACTION_ATTR_PUSH_VLAN: 2442 case OVS_ACTION_ATTR_PUSH_VLAN:
2443 if (mac_proto != MAC_PROTO_ETHERNET)
2444 return -EINVAL;
2401 vlan = nla_data(a); 2445 vlan = nla_data(a);
2402 if (!eth_type_vlan(vlan->vlan_tpid)) 2446 if (!eth_type_vlan(vlan->vlan_tpid))
2403 return -EINVAL; 2447 return -EINVAL;
@@ -2447,14 +2491,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2447 2491
2448 case OVS_ACTION_ATTR_SET: 2492 case OVS_ACTION_ATTR_SET:
2449 err = validate_set(a, key, sfa, 2493 err = validate_set(a, key, sfa,
2450 &skip_copy, eth_type, false, log); 2494 &skip_copy, mac_proto, eth_type,
2495 false, log);
2451 if (err) 2496 if (err)
2452 return err; 2497 return err;
2453 break; 2498 break;
2454 2499
2455 case OVS_ACTION_ATTR_SET_MASKED: 2500 case OVS_ACTION_ATTR_SET_MASKED:
2456 err = validate_set(a, key, sfa, 2501 err = validate_set(a, key, sfa,
2457 &skip_copy, eth_type, true, log); 2502 &skip_copy, mac_proto, eth_type,
2503 true, log);
2458 if (err) 2504 if (err)
2459 return err; 2505 return err;
2460 break; 2506 break;
@@ -2474,6 +2520,22 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2474 skip_copy = true; 2520 skip_copy = true;
2475 break; 2521 break;
2476 2522
2523 case OVS_ACTION_ATTR_PUSH_ETH:
2524 /* Disallow pushing an Ethernet header if one
2525 * is already present */
2526 if (mac_proto != MAC_PROTO_NONE)
2527 return -EINVAL;
2528 mac_proto = MAC_PROTO_NONE;
2529 break;
2530
2531 case OVS_ACTION_ATTR_POP_ETH:
2532 if (mac_proto != MAC_PROTO_ETHERNET)
2533 return -EINVAL;
2534 if (vlan_tci & htons(VLAN_TAG_PRESENT))
2535 return -EINVAL;
2536 mac_proto = MAC_PROTO_ETHERNET;
2537 break;
2538
2477 default: 2539 default:
2478 OVS_NLERR(log, "Unknown Action type %d", type); 2540 OVS_NLERR(log, "Unknown Action type %d", type);
2479 return -EINVAL; 2541 return -EINVAL;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index e7da29021b38..d5d6caecd072 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops = {
89 .get_link = ethtool_op_get_link, 89 .get_link = ethtool_op_get_link,
90}; 90};
91 91
92static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
93{
94 if (new_mtu < 68)
95 return -EINVAL;
96
97 netdev->mtu = new_mtu;
98 return 0;
99}
100
101static void internal_dev_destructor(struct net_device *dev) 92static void internal_dev_destructor(struct net_device *dev)
102{ 93{
103 struct vport *vport = ovs_internal_dev_get_vport(dev); 94 struct vport *vport = ovs_internal_dev_get_vport(dev);
@@ -148,7 +139,6 @@ static const struct net_device_ops internal_dev_netdev_ops = {
148 .ndo_stop = internal_dev_stop, 139 .ndo_stop = internal_dev_stop,
149 .ndo_start_xmit = internal_dev_xmit, 140 .ndo_start_xmit = internal_dev_xmit,
150 .ndo_set_mac_address = eth_mac_addr, 141 .ndo_set_mac_address = eth_mac_addr,
151 .ndo_change_mtu = internal_dev_change_mtu,
152 .ndo_get_stats64 = internal_get_stats, 142 .ndo_get_stats64 = internal_get_stats,
153 .ndo_set_rx_headroom = internal_set_rx_headroom, 143 .ndo_set_rx_headroom = internal_set_rx_headroom,
154}; 144};
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4e3972344aa6..0389398fa4ab 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -57,8 +57,10 @@ static void netdev_port_receive(struct sk_buff *skb)
57 if (unlikely(!skb)) 57 if (unlikely(!skb))
58 return; 58 return;
59 59
60 skb_push(skb, ETH_HLEN); 60 if (skb->dev->type == ARPHRD_ETHER) {
61 skb_postpush_rcsum(skb, skb->data, ETH_HLEN); 61 skb_push(skb, ETH_HLEN);
62 skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
63 }
62 ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); 64 ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
63 return; 65 return;
64error: 66error:
@@ -97,7 +99,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
97 } 99 }
98 100
99 if (vport->dev->flags & IFF_LOOPBACK || 101 if (vport->dev->flags & IFF_LOOPBACK ||
100 vport->dev->type != ARPHRD_ETHER || 102 (vport->dev->type != ARPHRD_ETHER &&
103 vport->dev->type != ARPHRD_NONE) ||
101 ovs_is_internal_dev(vport->dev)) { 104 ovs_is_internal_dev(vport->dev)) {
102 err = -EINVAL; 105 err = -EINVAL;
103 goto error_put; 106 goto error_put;
@@ -162,7 +165,6 @@ void ovs_netdev_detach_dev(struct vport *vport)
162 netdev_master_upper_dev_get(vport->dev)); 165 netdev_master_upper_dev_get(vport->dev));
163 dev_set_promiscuity(vport->dev, -1); 166 dev_set_promiscuity(vport->dev, -1);
164} 167}
165EXPORT_SYMBOL_GPL(ovs_netdev_detach_dev);
166 168
167static void netdev_destroy(struct vport *vport) 169static void netdev_destroy(struct vport *vport)
168{ 170{
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 7387418ac514..b6c8524032a0 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -463,27 +463,11 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
463 ovs_dp_process_packet(skb, &key); 463 ovs_dp_process_packet(skb, &key);
464 return 0; 464 return 0;
465} 465}
466EXPORT_SYMBOL_GPL(ovs_vport_receive);
467 466
468static void free_vport_rcu(struct rcu_head *rcu) 467static unsigned int packet_length(const struct sk_buff *skb,
468 struct net_device *dev)
469{ 469{
470 struct vport *vport = container_of(rcu, struct vport, rcu); 470 unsigned int length = skb->len - dev->hard_header_len;
471
472 ovs_vport_free(vport);
473}
474
475void ovs_vport_deferred_free(struct vport *vport)
476{
477 if (!vport)
478 return;
479
480 call_rcu(&vport->rcu, free_vport_rcu);
481}
482EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
483
484static unsigned int packet_length(const struct sk_buff *skb)
485{
486 unsigned int length = skb->len - ETH_HLEN;
487 471
488 if (!skb_vlan_tag_present(skb) && 472 if (!skb_vlan_tag_present(skb) &&
489 eth_type_vlan(skb->protocol)) 473 eth_type_vlan(skb->protocol))
@@ -497,14 +481,34 @@ static unsigned int packet_length(const struct sk_buff *skb)
497 return length; 481 return length;
498} 482}
499 483
500void ovs_vport_send(struct vport *vport, struct sk_buff *skb) 484void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
501{ 485{
502 int mtu = vport->dev->mtu; 486 int mtu = vport->dev->mtu;
503 487
504 if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { 488 switch (vport->dev->type) {
489 case ARPHRD_NONE:
490 if (mac_proto == MAC_PROTO_ETHERNET) {
491 skb_reset_network_header(skb);
492 skb_reset_mac_len(skb);
493 skb->protocol = htons(ETH_P_TEB);
494 } else if (mac_proto != MAC_PROTO_NONE) {
495 WARN_ON_ONCE(1);
496 goto drop;
497 }
498 break;
499 case ARPHRD_ETHER:
500 if (mac_proto != MAC_PROTO_ETHERNET)
501 goto drop;
502 break;
503 default:
504 goto drop;
505 }
506
507 if (unlikely(packet_length(skb, vport->dev) > mtu &&
508 !skb_is_gso(skb))) {
505 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", 509 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
506 vport->dev->name, 510 vport->dev->name,
507 packet_length(skb), mtu); 511 packet_length(skb, vport->dev), mtu);
508 vport->dev->stats.tx_errors++; 512 vport->dev->stats.tx_errors++;
509 goto drop; 513 goto drop;
510 } 514 }
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index f01f28a567ad..cda66c26ad08 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -149,7 +149,6 @@ struct vport_ops {
149struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, 149struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
150 const struct vport_parms *); 150 const struct vport_parms *);
151void ovs_vport_free(struct vport *); 151void ovs_vport_free(struct vport *);
152void ovs_vport_deferred_free(struct vport *vport);
153 152
154#define VPORT_ALIGN 8 153#define VPORT_ALIGN 8
155 154
@@ -198,6 +197,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops);
198 }) 197 })
199 198
200void ovs_vport_ops_unregister(struct vport_ops *ops); 199void ovs_vport_ops_unregister(struct vport_ops *ops);
201void ovs_vport_send(struct vport *vport, struct sk_buff *skb); 200void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto);
202 201
203#endif /* vport.h */ 202#endif /* vport.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index dd2332390c45..b9e1a13b4ba3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -73,7 +73,7 @@
73#include <net/sock.h> 73#include <net/sock.h>
74#include <linux/errno.h> 74#include <linux/errno.h>
75#include <linux/timer.h> 75#include <linux/timer.h>
76#include <asm/uaccess.h> 76#include <linux/uaccess.h>
77#include <asm/ioctls.h> 77#include <asm/ioctls.h>
78#include <asm/page.h> 78#include <asm/page.h>
79#include <asm/cacheflush.h> 79#include <asm/cacheflush.h>
@@ -1967,17 +1967,6 @@ static unsigned int run_filter(struct sk_buff *skb,
1967 return res; 1967 return res;
1968} 1968}
1969 1969
1970static int __packet_rcv_vnet(const struct sk_buff *skb,
1971 struct virtio_net_hdr *vnet_hdr)
1972{
1973 *vnet_hdr = (const struct virtio_net_hdr) { 0 };
1974
1975 if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le()))
1976 BUG();
1977
1978 return 0;
1979}
1980
1981static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, 1970static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
1982 size_t *len) 1971 size_t *len)
1983{ 1972{
@@ -1987,7 +1976,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
1987 return -EINVAL; 1976 return -EINVAL;
1988 *len -= sizeof(vnet_hdr); 1977 *len -= sizeof(vnet_hdr);
1989 1978
1990 if (__packet_rcv_vnet(skb, &vnet_hdr)) 1979 if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le()))
1991 return -EINVAL; 1980 return -EINVAL;
1992 1981
1993 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr)); 1982 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
@@ -2246,8 +2235,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2246 spin_unlock(&sk->sk_receive_queue.lock); 2235 spin_unlock(&sk->sk_receive_queue.lock);
2247 2236
2248 if (po->has_vnet_hdr) { 2237 if (po->has_vnet_hdr) {
2249 if (__packet_rcv_vnet(skb, h.raw + macoff - 2238 if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
2250 sizeof(struct virtio_net_hdr))) { 2239 sizeof(struct virtio_net_hdr),
2240 vio_le())) {
2251 spin_lock(&sk->sk_receive_queue.lock); 2241 spin_lock(&sk->sk_receive_queue.lock);
2252 goto drop_n_account; 2242 goto drop_n_account;
2253 } 2243 }
@@ -2390,8 +2380,6 @@ static void tpacket_set_protocol(const struct net_device *dev,
2390 2380
2391static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) 2381static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2392{ 2382{
2393 unsigned short gso_type = 0;
2394
2395 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 2383 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2396 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + 2384 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2397 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 > 2385 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
@@ -2403,69 +2391,22 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2403 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len) 2391 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2404 return -EINVAL; 2392 return -EINVAL;
2405 2393
2406 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2407 switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2408 case VIRTIO_NET_HDR_GSO_TCPV4:
2409 gso_type = SKB_GSO_TCPV4;
2410 break;
2411 case VIRTIO_NET_HDR_GSO_TCPV6:
2412 gso_type = SKB_GSO_TCPV6;
2413 break;
2414 case VIRTIO_NET_HDR_GSO_UDP:
2415 gso_type = SKB_GSO_UDP;
2416 break;
2417 default:
2418 return -EINVAL;
2419 }
2420
2421 if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
2422 gso_type |= SKB_GSO_TCP_ECN;
2423
2424 if (vnet_hdr->gso_size == 0)
2425 return -EINVAL;
2426 }
2427
2428 vnet_hdr->gso_type = gso_type; /* changes type, temporary storage */
2429 return 0; 2394 return 0;
2430} 2395}
2431 2396
2432static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len, 2397static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2433 struct virtio_net_hdr *vnet_hdr) 2398 struct virtio_net_hdr *vnet_hdr)
2434{ 2399{
2435 int n;
2436
2437 if (*len < sizeof(*vnet_hdr)) 2400 if (*len < sizeof(*vnet_hdr))
2438 return -EINVAL; 2401 return -EINVAL;
2439 *len -= sizeof(*vnet_hdr); 2402 *len -= sizeof(*vnet_hdr);
2440 2403
2441 n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter); 2404 if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
2442 if (n != sizeof(*vnet_hdr))
2443 return -EFAULT; 2405 return -EFAULT;
2444 2406
2445 return __packet_snd_vnet_parse(vnet_hdr, *len); 2407 return __packet_snd_vnet_parse(vnet_hdr, *len);
2446} 2408}
2447 2409
2448static int packet_snd_vnet_gso(struct sk_buff *skb,
2449 struct virtio_net_hdr *vnet_hdr)
2450{
2451 if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2452 u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start);
2453 u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset);
2454
2455 if (!skb_partial_csum_set(skb, s, o))
2456 return -EINVAL;
2457 }
2458
2459 skb_shinfo(skb)->gso_size =
2460 __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size);
2461 skb_shinfo(skb)->gso_type = vnet_hdr->gso_type;
2462
2463 /* Header must be checked, and gso_segs computed. */
2464 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2465 skb_shinfo(skb)->gso_segs = 0;
2466 return 0;
2467}
2468
2469static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 2410static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2470 void *frame, struct net_device *dev, void *data, int tp_len, 2411 void *frame, struct net_device *dev, void *data, int tp_len,
2471 __be16 proto, unsigned char *addr, int hlen, int copylen, 2412 __be16 proto, unsigned char *addr, int hlen, int copylen,
@@ -2725,7 +2666,8 @@ tpacket_error:
2725 } 2666 }
2726 } 2667 }
2727 2668
2728 if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) { 2669 if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
2670 vio_le())) {
2729 tp_len = -EINVAL; 2671 tp_len = -EINVAL;
2730 goto tpacket_error; 2672 goto tpacket_error;
2731 } 2673 }
@@ -2916,7 +2858,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2916 packet_pick_tx_queue(dev, skb); 2858 packet_pick_tx_queue(dev, skb);
2917 2859
2918 if (po->has_vnet_hdr) { 2860 if (po->has_vnet_hdr) {
2919 err = packet_snd_vnet_gso(skb, &vnet_hdr); 2861 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
2920 if (err) 2862 if (err)
2921 goto out_free; 2863 goto out_free;
2922 len += sizeof(vnet_hdr); 2864 len += sizeof(vnet_hdr);
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index fa8237fdc57b..21c28b51be94 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -217,20 +217,10 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev)
217 return NETDEV_TX_OK; 217 return NETDEV_TX_OK;
218} 218}
219 219
220static int gprs_set_mtu(struct net_device *dev, int new_mtu)
221{
222 if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11)))
223 return -EINVAL;
224
225 dev->mtu = new_mtu;
226 return 0;
227}
228
229static const struct net_device_ops gprs_netdev_ops = { 220static const struct net_device_ops gprs_netdev_ops = {
230 .ndo_open = gprs_open, 221 .ndo_open = gprs_open,
231 .ndo_stop = gprs_close, 222 .ndo_stop = gprs_close,
232 .ndo_start_xmit = gprs_xmit, 223 .ndo_start_xmit = gprs_xmit,
233 .ndo_change_mtu = gprs_set_mtu,
234}; 224};
235 225
236static void gprs_setup(struct net_device *dev) 226static void gprs_setup(struct net_device *dev)
@@ -239,6 +229,8 @@ static void gprs_setup(struct net_device *dev)
239 dev->type = ARPHRD_PHONET_PIPE; 229 dev->type = ARPHRD_PHONET_PIPE;
240 dev->flags = IFF_POINTOPOINT | IFF_NOARP; 230 dev->flags = IFF_POINTOPOINT | IFF_NOARP;
241 dev->mtu = GPRS_DEFAULT_MTU; 231 dev->mtu = GPRS_DEFAULT_MTU;
232 dev->min_mtu = 576;
233 dev->max_mtu = (PHONET_MAX_MTU - 11);
242 dev->hard_header_len = 0; 234 dev->hard_header_len = 0;
243 dev->addr_len = 0; 235 dev->addr_len = 0;
244 dev->tx_queue_len = 10; 236 dev->tx_queue_len = 10;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 850a86cde0b3..8bad5624a27a 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1167,7 +1167,7 @@ disabled:
1167 /* Wait until flow control allows TX */ 1167 /* Wait until flow control allows TX */
1168 done = atomic_read(&pn->tx_credits); 1168 done = atomic_read(&pn->tx_credits);
1169 while (!done) { 1169 while (!done) {
1170 DEFINE_WAIT(wait); 1170 DEFINE_WAIT_FUNC(wait, woken_wake_function);
1171 1171
1172 if (!timeo) { 1172 if (!timeo) {
1173 err = -EAGAIN; 1173 err = -EAGAIN;
@@ -1178,10 +1178,9 @@ disabled:
1178 goto out; 1178 goto out;
1179 } 1179 }
1180 1180
1181 prepare_to_wait(sk_sleep(sk), &wait, 1181 add_wait_queue(sk_sleep(sk), &wait);
1182 TASK_INTERRUPTIBLE); 1182 done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits), &wait);
1183 done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); 1183 remove_wait_queue(sk_sleep(sk), &wait);
1184 finish_wait(sk_sleep(sk), &wait);
1185 1184
1186 if (sk->sk_state != TCP_ESTABLISHED) 1185 if (sk->sk_state != TCP_ESTABLISHED)
1187 goto disabled; 1186 goto disabled;
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index a58680016472..2cb4c5dfad6f 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -44,7 +44,7 @@ struct phonet_net {
44 struct phonet_routes routes; 44 struct phonet_routes routes;
45}; 45};
46 46
47static int phonet_net_id __read_mostly; 47static unsigned int phonet_net_id __read_mostly;
48 48
49static struct phonet_net *phonet_pernet(struct net *net) 49static struct phonet_net *phonet_pernet(struct net *net)
50{ 50{
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 6beaeb1138f3..2ac1e6194be3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -605,10 +605,14 @@ static void rds_exit(void)
605} 605}
606module_exit(rds_exit); 606module_exit(rds_exit);
607 607
608u32 rds_gen_num;
609
608static int rds_init(void) 610static int rds_init(void)
609{ 611{
610 int ret; 612 int ret;
611 613
614 net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));
615
612 ret = rds_bind_lock_init(); 616 ret = rds_bind_lock_init();
613 if (ret) 617 if (ret)
614 goto out; 618 goto out;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index f5058559bb08..fe9d31c0b22d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -269,6 +269,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
269 kmem_cache_free(rds_conn_slab, conn); 269 kmem_cache_free(rds_conn_slab, conn);
270 conn = found; 270 conn = found;
271 } else { 271 } else {
272 conn->c_my_gen_num = rds_gen_num;
273 conn->c_peer_gen_num = 0;
272 hlist_add_head_rcu(&conn->c_hash_node, head); 274 hlist_add_head_rcu(&conn->c_hash_node, head);
273 rds_cong_add_conn(conn); 275 rds_cong_add_conn(conn);
274 rds_conn_count++; 276 rds_conn_count++;
@@ -681,6 +683,7 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
681 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) 683 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
682 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 684 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
683} 685}
686EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
684 687
685void rds_conn_connect_if_down(struct rds_connection *conn) 688void rds_conn_connect_if_down(struct rds_connection *conn)
686{ 689{
@@ -689,21 +692,6 @@ void rds_conn_connect_if_down(struct rds_connection *conn)
689} 692}
690EXPORT_SYMBOL_GPL(rds_conn_connect_if_down); 693EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
691 694
692/*
693 * An error occurred on the connection
694 */
695void
696__rds_conn_error(struct rds_connection *conn, const char *fmt, ...)
697{
698 va_list ap;
699
700 va_start(ap, fmt);
701 vprintk(fmt, ap);
702 va_end(ap);
703
704 rds_conn_drop(conn);
705}
706
707void 695void
708__rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) 696__rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
709{ 697{
diff --git a/net/rds/message.c b/net/rds/message.c
index 6cb91061556a..49bfb512d808 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -42,6 +42,7 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
42[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma), 42[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
43[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest), 43[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
44[RDS_EXTHDR_NPATHS] = sizeof(u16), 44[RDS_EXTHDR_NPATHS] = sizeof(u16),
45[RDS_EXTHDR_GEN_NUM] = sizeof(u32),
45}; 46};
46 47
47 48
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 4c93badeabf2..ea961144084f 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -135,7 +135,7 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
135 /* Release any MRs associated with this socket */ 135 /* Release any MRs associated with this socket */
136 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 136 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
137 while ((node = rb_first(&rs->rs_rdma_keys))) { 137 while ((node = rb_first(&rs->rs_rdma_keys))) {
138 mr = container_of(node, struct rds_mr, r_rb_node); 138 mr = rb_entry(node, struct rds_mr, r_rb_node);
139 if (mr->r_trans == rs->rs_transport) 139 if (mr->r_trans == rs->rs_transport)
140 mr->r_invalidate = 0; 140 mr->r_invalidate = 0;
141 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); 141 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 345f09059e9f..d5f311767157 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -100,11 +100,14 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
100 trans->cm_connect_complete(conn, event); 100 trans->cm_connect_complete(conn, event);
101 break; 101 break;
102 102
103 case RDMA_CM_EVENT_REJECTED:
104 rdsdebug("Connection rejected: %s\n",
105 rdma_reject_msg(cm_id, event->status));
106 /* FALLTHROUGH */
103 case RDMA_CM_EVENT_ADDR_ERROR: 107 case RDMA_CM_EVENT_ADDR_ERROR:
104 case RDMA_CM_EVENT_ROUTE_ERROR: 108 case RDMA_CM_EVENT_ROUTE_ERROR:
105 case RDMA_CM_EVENT_CONNECT_ERROR: 109 case RDMA_CM_EVENT_CONNECT_ERROR:
106 case RDMA_CM_EVENT_UNREACHABLE: 110 case RDMA_CM_EVENT_UNREACHABLE:
107 case RDMA_CM_EVENT_REJECTED:
108 case RDMA_CM_EVENT_DEVICE_REMOVAL: 111 case RDMA_CM_EVENT_DEVICE_REMOVAL:
109 case RDMA_CM_EVENT_ADDR_CHANGE: 112 case RDMA_CM_EVENT_ADDR_CHANGE:
110 if (conn) 113 if (conn)
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 67ba67c058b1..ebbf909b87ec 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -151,6 +151,9 @@ struct rds_connection {
151 151
152 struct rds_conn_path c_path[RDS_MPATH_WORKERS]; 152 struct rds_conn_path c_path[RDS_MPATH_WORKERS];
153 wait_queue_head_t c_hs_waitq; /* handshake waitq */ 153 wait_queue_head_t c_hs_waitq; /* handshake waitq */
154
155 u32 c_my_gen_num;
156 u32 c_peer_gen_num;
154}; 157};
155 158
156static inline 159static inline
@@ -243,7 +246,8 @@ struct rds_ext_header_rdma_dest {
243/* Extension header announcing number of paths. 246/* Extension header announcing number of paths.
244 * Implicit length = 2 bytes. 247 * Implicit length = 2 bytes.
245 */ 248 */
246#define RDS_EXTHDR_NPATHS 4 249#define RDS_EXTHDR_NPATHS 5
250#define RDS_EXTHDR_GEN_NUM 6
247 251
248#define __RDS_EXTHDR_MAX 16 /* for now */ 252#define __RDS_EXTHDR_MAX 16 /* for now */
249 253
@@ -338,6 +342,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
338#define RDS_MSG_RETRANSMITTED 5 342#define RDS_MSG_RETRANSMITTED 5
339#define RDS_MSG_MAPPED 6 343#define RDS_MSG_MAPPED 6
340#define RDS_MSG_PAGEVEC 7 344#define RDS_MSG_PAGEVEC 7
345#define RDS_MSG_FLUSH 8
341 346
342struct rds_message { 347struct rds_message {
343 atomic_t m_refcount; 348 atomic_t m_refcount;
@@ -664,6 +669,7 @@ void rds_cong_exit(void);
664struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); 669struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
665 670
666/* conn.c */ 671/* conn.c */
672extern u32 rds_gen_num;
667int rds_conn_init(void); 673int rds_conn_init(void);
668void rds_conn_exit(void); 674void rds_conn_exit(void);
669struct rds_connection *rds_conn_create(struct net *net, 675struct rds_connection *rds_conn_create(struct net *net,
@@ -683,10 +689,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
683 struct rds_info_lengths *lens, 689 struct rds_info_lengths *lens,
684 int (*visitor)(struct rds_connection *, void *), 690 int (*visitor)(struct rds_connection *, void *),
685 size_t item_len); 691 size_t item_len);
686__printf(2, 3)
687void __rds_conn_error(struct rds_connection *conn, const char *, ...);
688#define rds_conn_error(conn, fmt...) \
689 __rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
690 692
691__printf(2, 3) 693__printf(2, 3)
692void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...); 694void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index cbfabdf3ff48..9d0666e5fe35 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -120,6 +120,36 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
120 /* do nothing if no change in cong state */ 120 /* do nothing if no change in cong state */
121} 121}
122 122
123static void rds_conn_peer_gen_update(struct rds_connection *conn,
124 u32 peer_gen_num)
125{
126 int i;
127 struct rds_message *rm, *tmp;
128 unsigned long flags;
129
130 WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP);
131 if (peer_gen_num != 0) {
132 if (conn->c_peer_gen_num != 0 &&
133 peer_gen_num != conn->c_peer_gen_num) {
134 for (i = 0; i < RDS_MPATH_WORKERS; i++) {
135 struct rds_conn_path *cp;
136
137 cp = &conn->c_path[i];
138 spin_lock_irqsave(&cp->cp_lock, flags);
139 cp->cp_next_tx_seq = 1;
140 cp->cp_next_rx_seq = 0;
141 list_for_each_entry_safe(rm, tmp,
142 &cp->cp_retrans,
143 m_conn_item) {
144 set_bit(RDS_MSG_FLUSH, &rm->m_flags);
145 }
146 spin_unlock_irqrestore(&cp->cp_lock, flags);
147 }
148 }
149 conn->c_peer_gen_num = peer_gen_num;
150 }
151}
152
123/* 153/*
124 * Process all extension headers that come with this message. 154 * Process all extension headers that come with this message.
125 */ 155 */
@@ -163,7 +193,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
163 union { 193 union {
164 struct rds_ext_header_version version; 194 struct rds_ext_header_version version;
165 u16 rds_npaths; 195 u16 rds_npaths;
196 u32 rds_gen_num;
166 } buffer; 197 } buffer;
198 u32 new_peer_gen_num = 0;
167 199
168 while (1) { 200 while (1) {
169 len = sizeof(buffer); 201 len = sizeof(buffer);
@@ -176,6 +208,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
176 conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, 208 conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
177 buffer.rds_npaths); 209 buffer.rds_npaths);
178 break; 210 break;
211 case RDS_EXTHDR_GEN_NUM:
212 new_peer_gen_num = buffer.rds_gen_num;
213 break;
179 default: 214 default:
180 pr_warn_ratelimited("ignoring unknown exthdr type " 215 pr_warn_ratelimited("ignoring unknown exthdr type "
181 "0x%x\n", type); 216 "0x%x\n", type);
@@ -183,6 +218,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
183 } 218 }
184 /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ 219 /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
185 conn->c_npaths = max_t(int, conn->c_npaths, 1); 220 conn->c_npaths = max_t(int, conn->c_npaths, 1);
221 rds_conn_peer_gen_update(conn, new_peer_gen_num);
186} 222}
187 223
188/* rds_start_mprds() will synchronously start multiple paths when appropriate. 224/* rds_start_mprds() will synchronously start multiple paths when appropriate.
diff --git a/net/rds/send.c b/net/rds/send.c
index 896626b9a0ef..77c8c6e613ad 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -259,8 +259,9 @@ restart:
259 * connection. 259 * connection.
260 * Therefore, we never retransmit messages with RDMA ops. 260 * Therefore, we never retransmit messages with RDMA ops.
261 */ 261 */
262 if (rm->rdma.op_active && 262 if (test_bit(RDS_MSG_FLUSH, &rm->m_flags) ||
263 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { 263 (rm->rdma.op_active &&
264 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))) {
264 spin_lock_irqsave(&cp->cp_lock, flags); 265 spin_lock_irqsave(&cp->cp_lock, flags);
265 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 266 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
266 list_move(&rm->m_conn_item, &to_be_dropped); 267 list_move(&rm->m_conn_item, &to_be_dropped);
@@ -1209,6 +1210,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
1209 rds_message_add_extension(&rm->m_inc.i_hdr, 1210 rds_message_add_extension(&rm->m_inc.i_hdr,
1210 RDS_EXTHDR_NPATHS, &npaths, 1211 RDS_EXTHDR_NPATHS, &npaths,
1211 sizeof(npaths)); 1212 sizeof(npaths));
1213 rds_message_add_extension(&rm->m_inc.i_hdr,
1214 RDS_EXTHDR_GEN_NUM,
1215 &cp->cp_conn->c_my_gen_num,
1216 sizeof(u32));
1212 } 1217 }
1213 spin_unlock_irqrestore(&cp->cp_lock, flags); 1218 spin_unlock_irqrestore(&cp->cp_lock, flags);
1214 1219
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 20e2923dc827..57bb52361e0f 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -220,7 +220,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
220 write_unlock_bh(&sock->sk->sk_callback_lock); 220 write_unlock_bh(&sock->sk->sk_callback_lock);
221} 221}
222 222
223static void rds_tcp_tc_info(struct socket *sock, unsigned int len, 223static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
224 struct rds_info_iterator *iter, 224 struct rds_info_iterator *iter,
225 struct rds_info_lengths *lens) 225 struct rds_info_lengths *lens)
226{ 226{
@@ -229,6 +229,7 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
229 unsigned long flags; 229 unsigned long flags;
230 struct sockaddr_in sin; 230 struct sockaddr_in sin;
231 int sinlen; 231 int sinlen;
232 struct socket *sock;
232 233
233 spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); 234 spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
234 235
@@ -237,12 +238,17 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
237 238
238 list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { 239 list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
239 240
240 sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0); 241 sock = tc->t_sock;
241 tsinfo.local_addr = sin.sin_addr.s_addr; 242 if (sock) {
242 tsinfo.local_port = sin.sin_port; 243 sock->ops->getname(sock, (struct sockaddr *)&sin,
243 sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1); 244 &sinlen, 0);
244 tsinfo.peer_addr = sin.sin_addr.s_addr; 245 tsinfo.local_addr = sin.sin_addr.s_addr;
245 tsinfo.peer_port = sin.sin_port; 246 tsinfo.local_port = sin.sin_port;
247 sock->ops->getname(sock, (struct sockaddr *)&sin,
248 &sinlen, 1);
249 tsinfo.peer_addr = sin.sin_addr.s_addr;
250 tsinfo.peer_port = sin.sin_port;
251 }
246 252
247 tsinfo.hdr_rem = tc->t_tinc_hdr_rem; 253 tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
248 tsinfo.data_rem = tc->t_tinc_data_rem; 254 tsinfo.data_rem = tc->t_tinc_data_rem;
@@ -360,7 +366,7 @@ struct rds_transport rds_tcp_transport = {
360 .t_mp_capable = 1, 366 .t_mp_capable = 1,
361}; 367};
362 368
363static int rds_tcp_netid; 369static unsigned int rds_tcp_netid;
364 370
365/* per-network namespace private data for this module */ 371/* per-network namespace private data for this module */
366struct rds_tcp_net { 372struct rds_tcp_net {
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 05f61c533ed3..d6839d96d539 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -60,7 +60,19 @@ void rds_tcp_state_change(struct sock *sk)
60 case TCP_SYN_RECV: 60 case TCP_SYN_RECV:
61 break; 61 break;
62 case TCP_ESTABLISHED: 62 case TCP_ESTABLISHED:
63 rds_connect_path_complete(cp, RDS_CONN_CONNECTING); 63 /* Force the peer to reconnect so that we have the
64 * TCP ports going from <smaller-ip>.<transient> to
65 * <larger-ip>.<RDS_TCP_PORT>. We avoid marking the
66 * RDS connection as RDS_CONN_UP until the reconnect,
67 * to avoid RDS datagram loss.
68 */
69 if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr &&
70 rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
71 RDS_CONN_ERROR)) {
72 rds_conn_path_drop(cp);
73 } else {
74 rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
75 }
64 break; 76 break;
65 case TCP_CLOSE_WAIT: 77 case TCP_CLOSE_WAIT:
66 case TCP_CLOSE: 78 case TCP_CLOSE:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index e0b23fb5b8d5..f74bab3ecdca 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -83,27 +83,22 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
83{ 83{
84 int i; 84 int i;
85 bool peer_is_smaller = (conn->c_faddr < conn->c_laddr); 85 bool peer_is_smaller = (conn->c_faddr < conn->c_laddr);
86 int npaths = conn->c_npaths; 86 int npaths = max_t(int, 1, conn->c_npaths);
87
88 if (npaths <= 1) {
89 struct rds_conn_path *cp = &conn->c_path[0];
90 int ret;
91
92 ret = rds_conn_path_transition(cp, RDS_CONN_DOWN,
93 RDS_CONN_CONNECTING);
94 if (!ret)
95 rds_conn_path_transition(cp, RDS_CONN_ERROR,
96 RDS_CONN_CONNECTING);
97 return cp->cp_transport_data;
98 }
99 87
100 /* for mprds, paths with cp_index > 0 MUST be initiated by the peer 88 /* for mprds, all paths MUST be initiated by the peer
101 * with the smaller address. 89 * with the smaller address.
102 */ 90 */
103 if (!peer_is_smaller) 91 if (!peer_is_smaller) {
92 /* Make sure we initiate at least one path if this
93 * has not already been done; rds_start_mprds() will
94 * take care of additional paths, if necessary.
95 */
96 if (npaths == 1)
97 rds_conn_path_connect_if_down(&conn->c_path[0]);
104 return NULL; 98 return NULL;
99 }
105 100
106 for (i = 1; i < npaths; i++) { 101 for (i = 0; i < npaths; i++) {
107 struct rds_conn_path *cp = &conn->c_path[i]; 102 struct rds_conn_path *cp = &conn->c_path[i];
108 103
109 if (rds_conn_path_transition(cp, RDS_CONN_DOWN, 104 if (rds_conn_path_transition(cp, RDS_CONN_DOWN,
@@ -171,8 +166,8 @@ int rds_tcp_accept_one(struct socket *sock)
171 mutex_lock(&rs_tcp->t_conn_path_lock); 166 mutex_lock(&rs_tcp->t_conn_path_lock);
172 cp = rs_tcp->t_cpath; 167 cp = rs_tcp->t_cpath;
173 conn_state = rds_conn_path_state(cp); 168 conn_state = rds_conn_path_state(cp);
174 if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP && 169 WARN_ON(conn_state == RDS_CONN_UP);
175 conn_state != RDS_CONN_ERROR) 170 if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR)
176 goto rst_nsk; 171 goto rst_nsk;
177 if (rs_tcp->t_sock) { 172 if (rs_tcp->t_sock) {
178 /* Need to resolve a duelling SYN between peers. 173 /* Need to resolve a duelling SYN between peers.
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 89d09b481f47..dcf4742083ea 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -100,6 +100,9 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
100 set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags); 100 set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags);
101 tc->t_last_expected_una = rm->m_ack_seq + 1; 101 tc->t_last_expected_una = rm->m_ack_seq + 1;
102 102
103 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
104 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
105
103 rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", 106 rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
104 rm, rds_tcp_snd_nxt(tc), 107 rm, rds_tcp_snd_nxt(tc),
105 (unsigned long long)rm->m_ack_seq); 108 (unsigned long long)rm->m_ack_seq);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index e42df11bf30a..e36e333a0aa0 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -171,8 +171,7 @@ void rds_connect_worker(struct work_struct *work)
171 RDS_CONN_DOWN)) 171 RDS_CONN_DOWN))
172 rds_queue_reconnect(cp); 172 rds_queue_reconnect(cp);
173 else 173 else
174 rds_conn_path_error(cp, 174 rds_conn_path_error(cp, "connect failed\n");
175 "RDS: connect failed\n");
176 } 175 }
177 } 176 }
178} 177}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 129d357d2722..9ad301c46b88 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -34,7 +34,7 @@
34#include <linux/if_arp.h> 34#include <linux/if_arp.h>
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
36#include <net/sock.h> 36#include <net/sock.h>
37#include <asm/uaccess.h> 37#include <linux/uaccess.h>
38#include <linux/fcntl.h> 38#include <linux/fcntl.h>
39#include <linux/termios.h> 39#include <linux/termios.h>
40#include <linux/mm.h> 40#include <linux/mm.h>
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 0fc76d845103..452bbb38d943 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -25,7 +25,7 @@
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26#include <net/sock.h> 26#include <net/sock.h>
27#include <net/tcp_states.h> 27#include <net/tcp_states.h>
28#include <asm/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/fcntl.h> 29#include <linux/fcntl.h>
30#include <linux/termios.h> /* For TIOCINQ/OUTQ */ 30#include <linux/termios.h> /* For TIOCINQ/OUTQ */
31#include <linux/mm.h> 31#include <linux/mm.h>
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 2d59c9be40e1..5f63f6dcaabb 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -762,16 +762,17 @@ static const struct net_proto_family rxrpc_family_ops = {
762static int __init af_rxrpc_init(void) 762static int __init af_rxrpc_init(void)
763{ 763{
764 int ret = -1; 764 int ret = -1;
765 unsigned int tmp;
765 766
766 BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); 767 BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
767 768
768 get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch)); 769 get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
769 rxrpc_epoch |= RXRPC_RANDOM_EPOCH; 770 rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
770 get_random_bytes(&rxrpc_client_conn_ids.cur, 771 get_random_bytes(&tmp, sizeof(tmp));
771 sizeof(rxrpc_client_conn_ids.cur)); 772 tmp &= 0x3fffffff;
772 rxrpc_client_conn_ids.cur &= 0x3fffffff; 773 if (tmp == 0)
773 if (rxrpc_client_conn_ids.cur == 0) 774 tmp = 1;
774 rxrpc_client_conn_ids.cur = 1; 775 idr_set_cursor(&rxrpc_client_conn_ids, tmp);
775 776
776 ret = -ENOMEM; 777 ret = -ENOMEM;
777 rxrpc_call_jar = kmem_cache_create( 778 rxrpc_call_jar = kmem_cache_create(
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 60ef9605167e..6cbcdcc29853 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -263,12 +263,12 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
263 * times the maximum number of client conns away from the current 263 * times the maximum number of client conns away from the current
264 * allocation point to try and keep the IDs concentrated. 264 * allocation point to try and keep the IDs concentrated.
265 */ 265 */
266 id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur); 266 id_cursor = idr_get_cursor(&rxrpc_client_conn_ids);
267 id = conn->proto.cid >> RXRPC_CIDSHIFT; 267 id = conn->proto.cid >> RXRPC_CIDSHIFT;
268 distance = id - id_cursor; 268 distance = id - id_cursor;
269 if (distance < 0) 269 if (distance < 0)
270 distance = -distance; 270 distance = -distance;
271 limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4; 271 limit = max(rxrpc_max_client_connections * 4, 1024U);
272 if (distance > limit) 272 if (distance > limit)
273 goto mark_dont_reuse; 273 goto mark_dont_reuse;
274 274
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 44fb8d893c7d..1d87b5453ef7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
1053 1053
1054 ASSERT(!irqs_disabled()); 1054 ASSERT(!irqs_disabled());
1055 1055
1056 skb = skb_recv_datagram(udp_sk, 0, 1, &ret); 1056 skb = skb_recv_udp(udp_sk, 0, 1, &ret);
1057 if (!skb) { 1057 if (!skb) {
1058 if (ret == -EAGAIN) 1058 if (ret == -EAGAIN)
1059 return; 1059 return;
@@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
1075 1075
1076 __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); 1076 __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
1077 1077
1078 /* The socket buffer we have is owned by UDP, with UDP's data all over 1078 /* The UDP protocol already released all skb resources;
1079 * it, but we really want our own data there. 1079 * we are free to add our own data there.
1080 */ 1080 */
1081 skb_orphan(skb);
1082 sp = rxrpc_skb(skb); 1081 sp = rxrpc_skb(skb);
1083 1082
1084 /* dig out the RxRPC connection details */ 1083 /* dig out the RxRPC connection details */
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f893d180da1c..2095c83ce773 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -41,8 +41,7 @@ static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
41 spin_lock_bh(&hinfo->lock); 41 spin_lock_bh(&hinfo->lock);
42 hlist_del(&p->tcfa_head); 42 hlist_del(&p->tcfa_head);
43 spin_unlock_bh(&hinfo->lock); 43 spin_unlock_bh(&hinfo->lock);
44 gen_kill_estimator(&p->tcfa_bstats, 44 gen_kill_estimator(&p->tcfa_rate_est);
45 &p->tcfa_rate_est);
46 /* 45 /*
47 * gen_estimator est_timer() might access p->tcfa_lock 46 * gen_estimator est_timer() might access p->tcfa_lock
48 * or bstats, wait a RCU grace period before freeing p 47 * or bstats, wait a RCU grace period before freeing p
@@ -237,8 +236,7 @@ EXPORT_SYMBOL(tcf_hash_check);
237void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) 236void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
238{ 237{
239 if (est) 238 if (est)
240 gen_kill_estimator(&a->tcfa_bstats, 239 gen_kill_estimator(&a->tcfa_rate_est);
241 &a->tcfa_rate_est);
242 call_rcu(&a->tcfa_rcu, free_tcf); 240 call_rcu(&a->tcfa_rcu, free_tcf);
243} 241}
244EXPORT_SYMBOL(tcf_hash_cleanup); 242EXPORT_SYMBOL(tcf_hash_cleanup);
@@ -670,8 +668,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
670 goto errout; 668 goto errout;
671 669
672 if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 || 670 if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
673 gnet_stats_copy_rate_est(&d, &p->tcfa_bstats, 671 gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
674 &p->tcfa_rate_est) < 0 ||
675 gnet_stats_copy_queue(&d, p->cpu_qstats, 672 gnet_stats_copy_queue(&d, p->cpu_qstats,
676 &p->tcfa_qstats, 673 &p->tcfa_qstats,
677 p->tcfa_qstats.qlen) < 0) 674 p->tcfa_qstats.qlen) < 0)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1d3960033f61..1c60317f0121 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -28,12 +28,11 @@ struct tcf_bpf_cfg {
28 struct bpf_prog *filter; 28 struct bpf_prog *filter;
29 struct sock_filter *bpf_ops; 29 struct sock_filter *bpf_ops;
30 const char *bpf_name; 30 const char *bpf_name;
31 u32 bpf_fd;
32 u16 bpf_num_ops; 31 u16 bpf_num_ops;
33 bool is_ebpf; 32 bool is_ebpf;
34}; 33};
35 34
36static int bpf_net_id; 35static unsigned int bpf_net_id;
37static struct tc_action_ops act_bpf_ops; 36static struct tc_action_ops act_bpf_ops;
38 37
39static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, 38static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
@@ -118,13 +117,19 @@ static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
118static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, 117static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
119 struct sk_buff *skb) 118 struct sk_buff *skb)
120{ 119{
121 if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd)) 120 struct nlattr *nla;
122 return -EMSGSIZE;
123 121
124 if (prog->bpf_name && 122 if (prog->bpf_name &&
125 nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) 123 nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
126 return -EMSGSIZE; 124 return -EMSGSIZE;
127 125
126 nla = nla_reserve(skb, TCA_ACT_BPF_DIGEST,
127 sizeof(prog->filter->digest));
128 if (nla == NULL)
129 return -EMSGSIZE;
130
131 memcpy(nla_data(nla), prog->filter->digest, nla_len(nla));
132
128 return 0; 133 return 0;
129} 134}
130 135
@@ -226,16 +231,13 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
226 return PTR_ERR(fp); 231 return PTR_ERR(fp);
227 232
228 if (tb[TCA_ACT_BPF_NAME]) { 233 if (tb[TCA_ACT_BPF_NAME]) {
229 name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), 234 name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL);
230 nla_len(tb[TCA_ACT_BPF_NAME]),
231 GFP_KERNEL);
232 if (!name) { 235 if (!name) {
233 bpf_prog_put(fp); 236 bpf_prog_put(fp);
234 return -ENOMEM; 237 return -ENOMEM;
235 } 238 }
236 } 239 }
237 240
238 cfg->bpf_fd = bpf_fd;
239 cfg->bpf_name = name; 241 cfg->bpf_name = name;
240 cfg->filter = fp; 242 cfg->filter = fp;
241 cfg->is_ebpf = true; 243 cfg->is_ebpf = true;
@@ -334,8 +336,6 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
334 336
335 if (cfg.bpf_num_ops) 337 if (cfg.bpf_num_ops)
336 prog->bpf_num_ops = cfg.bpf_num_ops; 338 prog->bpf_num_ops = cfg.bpf_num_ops;
337 if (cfg.bpf_fd)
338 prog->bpf_fd = cfg.bpf_fd;
339 339
340 prog->tcf_action = parm->action; 340 prog->tcf_action = parm->action;
341 rcu_assign_pointer(prog->filter, cfg.filter); 341 rcu_assign_pointer(prog->filter, cfg.filter);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index eae07a2e774d..ab8062909962 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -30,7 +30,7 @@
30 30
31#define CONNMARK_TAB_MASK 3 31#define CONNMARK_TAB_MASK 3
32 32
33static int connmark_net_id; 33static unsigned int connmark_net_id;
34static struct tc_action_ops act_connmark_ops; 34static struct tc_action_ops act_connmark_ops;
35 35
36static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, 36static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e0defcef376d..a0edd80a44db 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -42,7 +42,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
42 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, 42 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
43}; 43};
44 44
45static int csum_net_id; 45static unsigned int csum_net_id;
46static struct tc_action_ops act_csum_ops; 46static struct tc_action_ops act_csum_ops;
47 47
48static int tcf_csum_init(struct net *net, struct nlattr *nla, 48static int tcf_csum_init(struct net *net, struct nlattr *nla,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e0aa30f83c6c..e6c874a2b283 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -25,7 +25,7 @@
25 25
26#define GACT_TAB_MASK 15 26#define GACT_TAB_MASK 15
27 27
28static int gact_net_id; 28static unsigned int gact_net_id;
29static struct tc_action_ops act_gact_ops; 29static struct tc_action_ops act_gact_ops;
30 30
31#ifdef CONFIG_GACT_PROB 31#ifdef CONFIG_GACT_PROB
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 95c463cbb9a6..80b848d3f096 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -35,7 +35,7 @@
35 35
36#define IFE_TAB_MASK 15 36#define IFE_TAB_MASK 15
37 37
38static int ife_net_id; 38static unsigned int ife_net_id;
39static int max_metacnt = IFE_META_MAX + 1; 39static int max_metacnt = IFE_META_MAX + 1;
40static struct tc_action_ops act_ife_ops; 40static struct tc_action_ops act_ife_ops;
41 41
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 378c1c976058..992ef8d624f1 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,10 +30,10 @@
30 30
31#define IPT_TAB_MASK 15 31#define IPT_TAB_MASK 15
32 32
33static int ipt_net_id; 33static unsigned int ipt_net_id;
34static struct tc_action_ops act_ipt_ops; 34static struct tc_action_ops act_ipt_ops;
35 35
36static int xt_net_id; 36static unsigned int xt_net_id;
37static struct tc_action_ops act_xt_ops; 37static struct tc_action_ops act_xt_ops;
38 38
39static int ipt_init_target(struct xt_entry_target *t, char *table, 39static int ipt_init_target(struct xt_entry_target *t, char *table,
@@ -213,6 +213,12 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
213 int ret = 0, result = 0; 213 int ret = 0, result = 0;
214 struct tcf_ipt *ipt = to_ipt(a); 214 struct tcf_ipt *ipt = to_ipt(a);
215 struct xt_action_param par; 215 struct xt_action_param par;
216 struct nf_hook_state state = {
217 .net = dev_net(skb->dev),
218 .in = skb->dev,
219 .hook = ipt->tcfi_hook,
220 .pf = NFPROTO_IPV4,
221 };
216 222
217 if (skb_unclone(skb, GFP_ATOMIC)) 223 if (skb_unclone(skb, GFP_ATOMIC))
218 return TC_ACT_UNSPEC; 224 return TC_ACT_UNSPEC;
@@ -226,13 +232,9 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
226 * worry later - danger - this API seems to have changed 232 * worry later - danger - this API seems to have changed
227 * from earlier kernels 233 * from earlier kernels
228 */ 234 */
229 par.net = dev_net(skb->dev); 235 par.state = &state;
230 par.in = skb->dev;
231 par.out = NULL;
232 par.hooknum = ipt->tcfi_hook;
233 par.target = ipt->tcfi_t->u.kernel.target; 236 par.target = ipt->tcfi_t->u.kernel.target;
234 par.targinfo = ipt->tcfi_t->data; 237 par.targinfo = ipt->tcfi_t->data;
235 par.family = NFPROTO_IPV4;
236 ret = par.target->target(skb, &par); 238 ret = par.target->target(skb, &par);
237 239
238 switch (ret) { 240 switch (ret) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6b07fba5770b..2d9fa6e0a1b4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -21,6 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/gfp.h> 23#include <linux/gfp.h>
24#include <linux/if_arp.h>
24#include <net/net_namespace.h> 25#include <net/net_namespace.h>
25#include <net/netlink.h> 26#include <net/netlink.h>
26#include <net/pkt_sched.h> 27#include <net/pkt_sched.h>
@@ -33,6 +34,25 @@
33static LIST_HEAD(mirred_list); 34static LIST_HEAD(mirred_list);
34static DEFINE_SPINLOCK(mirred_list_lock); 35static DEFINE_SPINLOCK(mirred_list_lock);
35 36
37static bool tcf_mirred_is_act_redirect(int action)
38{
39 return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
40}
41
42static u32 tcf_mirred_act_direction(int action)
43{
44 switch (action) {
45 case TCA_EGRESS_REDIR:
46 case TCA_EGRESS_MIRROR:
47 return AT_EGRESS;
48 case TCA_INGRESS_REDIR:
49 case TCA_INGRESS_MIRROR:
50 return AT_INGRESS;
51 default:
52 BUG();
53 }
54}
55
36static void tcf_mirred_release(struct tc_action *a, int bind) 56static void tcf_mirred_release(struct tc_action *a, int bind)
37{ 57{
38 struct tcf_mirred *m = to_mirred(a); 58 struct tcf_mirred *m = to_mirred(a);
@@ -51,7 +71,7 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
51 [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, 71 [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
52}; 72};
53 73
54static int mirred_net_id; 74static unsigned int mirred_net_id;
55static struct tc_action_ops act_mirred_ops; 75static struct tc_action_ops act_mirred_ops;
56 76
57static int tcf_mirred_init(struct net *net, struct nlattr *nla, 77static int tcf_mirred_init(struct net *net, struct nlattr *nla,
@@ -60,11 +80,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
60{ 80{
61 struct tc_action_net *tn = net_generic(net, mirred_net_id); 81 struct tc_action_net *tn = net_generic(net, mirred_net_id);
62 struct nlattr *tb[TCA_MIRRED_MAX + 1]; 82 struct nlattr *tb[TCA_MIRRED_MAX + 1];
83 bool mac_header_xmit = false;
63 struct tc_mirred *parm; 84 struct tc_mirred *parm;
64 struct tcf_mirred *m; 85 struct tcf_mirred *m;
65 struct net_device *dev; 86 struct net_device *dev;
66 int ret, ok_push = 0;
67 bool exists = false; 87 bool exists = false;
88 int ret;
68 89
69 if (nla == NULL) 90 if (nla == NULL)
70 return -EINVAL; 91 return -EINVAL;
@@ -82,6 +103,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
82 switch (parm->eaction) { 103 switch (parm->eaction) {
83 case TCA_EGRESS_MIRROR: 104 case TCA_EGRESS_MIRROR:
84 case TCA_EGRESS_REDIR: 105 case TCA_EGRESS_REDIR:
106 case TCA_INGRESS_REDIR:
107 case TCA_INGRESS_MIRROR:
85 break; 108 break;
86 default: 109 default:
87 if (exists) 110 if (exists)
@@ -95,19 +118,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
95 tcf_hash_release(*a, bind); 118 tcf_hash_release(*a, bind);
96 return -ENODEV; 119 return -ENODEV;
97 } 120 }
98 switch (dev->type) { 121 mac_header_xmit = dev_is_mac_header_xmit(dev);
99 case ARPHRD_TUNNEL:
100 case ARPHRD_TUNNEL6:
101 case ARPHRD_SIT:
102 case ARPHRD_IPGRE:
103 case ARPHRD_VOID:
104 case ARPHRD_NONE:
105 ok_push = 0;
106 break;
107 default:
108 ok_push = 1;
109 break;
110 }
111 } else { 122 } else {
112 dev = NULL; 123 dev = NULL;
113 } 124 }
@@ -136,7 +147,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
136 dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); 147 dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
137 dev_hold(dev); 148 dev_hold(dev);
138 rcu_assign_pointer(m->tcfm_dev, dev); 149 rcu_assign_pointer(m->tcfm_dev, dev);
139 m->tcfm_ok_push = ok_push; 150 m->tcfm_mac_header_xmit = mac_header_xmit;
140 } 151 }
141 152
142 if (ret == ACT_P_CREATED) { 153 if (ret == ACT_P_CREATED) {
@@ -153,15 +164,20 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
153 struct tcf_result *res) 164 struct tcf_result *res)
154{ 165{
155 struct tcf_mirred *m = to_mirred(a); 166 struct tcf_mirred *m = to_mirred(a);
167 bool m_mac_header_xmit;
156 struct net_device *dev; 168 struct net_device *dev;
157 struct sk_buff *skb2; 169 struct sk_buff *skb2;
158 int retval, err; 170 int retval, err = 0;
171 int m_eaction;
172 int mac_len;
159 u32 at; 173 u32 at;
160 174
161 tcf_lastuse_update(&m->tcf_tm); 175 tcf_lastuse_update(&m->tcf_tm);
162 bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); 176 bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
163 177
164 rcu_read_lock(); 178 rcu_read_lock();
179 m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
180 m_eaction = READ_ONCE(m->tcfm_eaction);
165 retval = READ_ONCE(m->tcf_action); 181 retval = READ_ONCE(m->tcf_action);
166 dev = rcu_dereference(m->tcfm_dev); 182 dev = rcu_dereference(m->tcfm_dev);
167 if (unlikely(!dev)) { 183 if (unlikely(!dev)) {
@@ -180,23 +196,36 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
180 if (!skb2) 196 if (!skb2)
181 goto out; 197 goto out;
182 198
183 if (!(at & AT_EGRESS)) { 199 /* If action's target direction differs than filter's direction,
184 if (m->tcfm_ok_push) 200 * and devices expect a mac header on xmit, then mac push/pull is
201 * needed.
202 */
203 if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) {
204 if (at & AT_EGRESS) {
205 /* caught at egress, act ingress: pull mac */
206 mac_len = skb_network_header(skb) - skb_mac_header(skb);
207 skb_pull_rcsum(skb2, mac_len);
208 } else {
209 /* caught at ingress, act egress: push mac */
185 skb_push_rcsum(skb2, skb->mac_len); 210 skb_push_rcsum(skb2, skb->mac_len);
211 }
186 } 212 }
187 213
188 /* mirror is always swallowed */ 214 /* mirror is always swallowed */
189 if (m->tcfm_eaction != TCA_EGRESS_MIRROR) 215 if (tcf_mirred_is_act_redirect(m_eaction))
190 skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); 216 skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
191 217
192 skb2->skb_iif = skb->dev->ifindex; 218 skb2->skb_iif = skb->dev->ifindex;
193 skb2->dev = dev; 219 skb2->dev = dev;
194 err = dev_queue_xmit(skb2); 220 if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS)
221 err = dev_queue_xmit(skb2);
222 else
223 err = netif_receive_skb(skb2);
195 224
196 if (err) { 225 if (err) {
197out: 226out:
198 qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats)); 227 qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
199 if (m->tcfm_eaction != TCA_EGRESS_MIRROR) 228 if (tcf_mirred_is_act_redirect(m_eaction))
200 retval = TC_ACT_SHOT; 229 retval = TC_ACT_SHOT;
201 } 230 }
202 rcu_read_unlock(); 231 rcu_read_unlock();
@@ -286,6 +315,17 @@ static struct notifier_block mirred_device_notifier = {
286 .notifier_call = mirred_device_event, 315 .notifier_call = mirred_device_event,
287}; 316};
288 317
318static int tcf_mirred_device(const struct tc_action *a, struct net *net,
319 struct net_device **mirred_dev)
320{
321 int ifindex = tcf_mirred_ifindex(a);
322
323 *mirred_dev = __dev_get_by_index(net, ifindex);
324 if (!*mirred_dev)
325 return -EINVAL;
326 return 0;
327}
328
289static struct tc_action_ops act_mirred_ops = { 329static struct tc_action_ops act_mirred_ops = {
290 .kind = "mirred", 330 .kind = "mirred",
291 .type = TCA_ACT_MIRRED, 331 .type = TCA_ACT_MIRRED,
@@ -298,6 +338,7 @@ static struct tc_action_ops act_mirred_ops = {
298 .walk = tcf_mirred_walker, 338 .walk = tcf_mirred_walker,
299 .lookup = tcf_mirred_search, 339 .lookup = tcf_mirred_search,
300 .size = sizeof(struct tcf_mirred), 340 .size = sizeof(struct tcf_mirred),
341 .get_dev = tcf_mirred_device,
301}; 342};
302 343
303static __net_init int mirred_init_net(struct net *net) 344static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 8e8b0cc30704..9b6aec665495 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -31,7 +31,7 @@
31 31
32#define NAT_TAB_MASK 15 32#define NAT_TAB_MASK 15
33 33
34static int nat_net_id; 34static unsigned int nat_net_id;
35static struct tc_action_ops act_nat_ops; 35static struct tc_action_ops act_nat_ops;
36 36
37static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { 37static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index cf9b2fe8eac6..b27c4daec88f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -25,7 +25,7 @@
25 25
26#define PEDIT_TAB_MASK 15 26#define PEDIT_TAB_MASK 15
27 27
28static int pedit_net_id; 28static unsigned int pedit_net_id;
29static struct tc_action_ops act_pedit_ops; 29static struct tc_action_ops act_pedit_ops;
30 30
31static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { 31static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index d1bd248fe146..0ba91d1ce994 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -55,7 +55,7 @@ struct tc_police_compat {
55 55
56/* Each policer is serialized by its individual spinlock */ 56/* Each policer is serialized by its individual spinlock */
57 57
58static int police_net_id; 58static unsigned int police_net_id;
59static struct tc_action_ops act_police_ops; 59static struct tc_action_ops act_police_ops;
60 60
61static int tcf_act_police_walker(struct net *net, struct sk_buff *skb, 61static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
@@ -142,8 +142,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
142 goto failure_unlock; 142 goto failure_unlock;
143 } else if (tb[TCA_POLICE_AVRATE] && 143 } else if (tb[TCA_POLICE_AVRATE] &&
144 (ret == ACT_P_CREATED || 144 (ret == ACT_P_CREATED ||
145 !gen_estimator_active(&police->tcf_bstats, 145 !gen_estimator_active(&police->tcf_rate_est))) {
146 &police->tcf_rate_est))) {
147 err = -EINVAL; 146 err = -EINVAL;
148 goto failure_unlock; 147 goto failure_unlock;
149 } 148 }
@@ -216,13 +215,17 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
216 bstats_update(&police->tcf_bstats, skb); 215 bstats_update(&police->tcf_bstats, skb);
217 tcf_lastuse_update(&police->tcf_tm); 216 tcf_lastuse_update(&police->tcf_tm);
218 217
219 if (police->tcfp_ewma_rate && 218 if (police->tcfp_ewma_rate) {
220 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { 219 struct gnet_stats_rate_est64 sample;
221 police->tcf_qstats.overlimits++; 220
222 if (police->tcf_action == TC_ACT_SHOT) 221 if (!gen_estimator_read(&police->tcf_rate_est, &sample) ||
223 police->tcf_qstats.drops++; 222 sample.bps >= police->tcfp_ewma_rate) {
224 spin_unlock(&police->tcf_lock); 223 police->tcf_qstats.overlimits++;
225 return police->tcf_action; 224 if (police->tcf_action == TC_ACT_SHOT)
225 police->tcf_qstats.drops++;
226 spin_unlock(&police->tcf_lock);
227 return police->tcf_action;
228 }
226 } 229 }
227 230
228 if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { 231 if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 289af6f9bb3b..823a73ad0c60 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -26,7 +26,7 @@
26 26
27#define SIMP_TAB_MASK 7 27#define SIMP_TAB_MASK 7
28 28
29static int simp_net_id; 29static unsigned int simp_net_id;
30static struct tc_action_ops act_simp_ops; 30static struct tc_action_ops act_simp_ops;
31 31
32#define SIMP_MAX_DATA 32 32#define SIMP_MAX_DATA 32
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index a133dcb82132..06ccae3c12ee 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -29,7 +29,7 @@
29 29
30#define SKBEDIT_TAB_MASK 15 30#define SKBEDIT_TAB_MASK 15
31 31
32static int skbedit_net_id; 32static unsigned int skbedit_net_id;
33static struct tc_action_ops act_skbedit_ops; 33static struct tc_action_ops act_skbedit_ops;
34 34
35static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, 35static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
@@ -46,8 +46,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
46 if (d->flags & SKBEDIT_F_QUEUE_MAPPING && 46 if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
47 skb->dev->real_num_tx_queues > d->queue_mapping) 47 skb->dev->real_num_tx_queues > d->queue_mapping)
48 skb_set_queue_mapping(skb, d->queue_mapping); 48 skb_set_queue_mapping(skb, d->queue_mapping);
49 if (d->flags & SKBEDIT_F_MARK) 49 if (d->flags & SKBEDIT_F_MARK) {
50 skb->mark = d->mark; 50 skb->mark &= ~d->mask;
51 skb->mark |= d->mark & d->mask;
52 }
51 if (d->flags & SKBEDIT_F_PTYPE) 53 if (d->flags & SKBEDIT_F_PTYPE)
52 skb->pkt_type = d->ptype; 54 skb->pkt_type = d->ptype;
53 55
@@ -61,6 +63,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
61 [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, 63 [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
62 [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, 64 [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) },
63 [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, 65 [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
66 [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) },
64}; 67};
65 68
66static int tcf_skbedit_init(struct net *net, struct nlattr *nla, 69static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -71,7 +74,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
71 struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; 74 struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
72 struct tc_skbedit *parm; 75 struct tc_skbedit *parm;
73 struct tcf_skbedit *d; 76 struct tcf_skbedit *d;
74 u32 flags = 0, *priority = NULL, *mark = NULL; 77 u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
75 u16 *queue_mapping = NULL, *ptype = NULL; 78 u16 *queue_mapping = NULL, *ptype = NULL;
76 bool exists = false; 79 bool exists = false;
77 int ret = 0, err; 80 int ret = 0, err;
@@ -108,6 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
108 mark = nla_data(tb[TCA_SKBEDIT_MARK]); 111 mark = nla_data(tb[TCA_SKBEDIT_MARK]);
109 } 112 }
110 113
114 if (tb[TCA_SKBEDIT_MASK] != NULL) {
115 flags |= SKBEDIT_F_MASK;
116 mask = nla_data(tb[TCA_SKBEDIT_MASK]);
117 }
118
111 parm = nla_data(tb[TCA_SKBEDIT_PARMS]); 119 parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
112 120
113 exists = tcf_hash_check(tn, parm->index, a, bind); 121 exists = tcf_hash_check(tn, parm->index, a, bind);
@@ -145,6 +153,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
145 d->mark = *mark; 153 d->mark = *mark;
146 if (flags & SKBEDIT_F_PTYPE) 154 if (flags & SKBEDIT_F_PTYPE)
147 d->ptype = *ptype; 155 d->ptype = *ptype;
156 /* default behaviour is to use all the bits */
157 d->mask = 0xffffffff;
158 if (flags & SKBEDIT_F_MASK)
159 d->mask = *mask;
148 160
149 d->tcf_action = parm->action; 161 d->tcf_action = parm->action;
150 162
@@ -182,6 +194,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
182 if ((d->flags & SKBEDIT_F_PTYPE) && 194 if ((d->flags & SKBEDIT_F_PTYPE) &&
183 nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype)) 195 nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
184 goto nla_put_failure; 196 goto nla_put_failure;
197 if ((d->flags & SKBEDIT_F_MASK) &&
198 nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
199 goto nla_put_failure;
185 200
186 tcf_tm_dump(&t, &d->tcf_tm); 201 tcf_tm_dump(&t, &d->tcf_tm);
187 if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD)) 202 if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index e7d96381c908..3b7074e23024 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -22,7 +22,7 @@
22 22
23#define SKBMOD_TAB_MASK 15 23#define SKBMOD_TAB_MASK 15
24 24
25static int skbmod_net_id; 25static unsigned int skbmod_net_id;
26static struct tc_action_ops act_skbmod_ops; 26static struct tc_action_ops act_skbmod_ops;
27 27
28#define MAX_EDIT_LEN ETH_HLEN 28#define MAX_EDIT_LEN ETH_HLEN
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index af47bdf2f483..e3a58e021198 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -16,14 +16,13 @@
16#include <net/netlink.h> 16#include <net/netlink.h>
17#include <net/pkt_sched.h> 17#include <net/pkt_sched.h>
18#include <net/dst.h> 18#include <net/dst.h>
19#include <net/dst_metadata.h>
20 19
21#include <linux/tc_act/tc_tunnel_key.h> 20#include <linux/tc_act/tc_tunnel_key.h>
22#include <net/tc_act/tc_tunnel_key.h> 21#include <net/tc_act/tc_tunnel_key.h>
23 22
24#define TUNNEL_KEY_TAB_MASK 15 23#define TUNNEL_KEY_TAB_MASK 15
25 24
26static int tunnel_key_net_id; 25static unsigned int tunnel_key_net_id;
27static struct tc_action_ops act_tunnel_key_ops; 26static struct tc_action_ops act_tunnel_key_ops;
28 27
29static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, 28static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
@@ -67,6 +66,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
67 [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 66 [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
68 [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 67 [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
69 [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, 68 [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 },
69 [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
70}; 70};
71 71
72static int tunnel_key_init(struct net *net, struct nlattr *nla, 72static int tunnel_key_init(struct net *net, struct nlattr *nla,
@@ -81,6 +81,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
81 struct tc_tunnel_key *parm; 81 struct tc_tunnel_key *parm;
82 struct tcf_tunnel_key *t; 82 struct tcf_tunnel_key *t;
83 bool exists = false; 83 bool exists = false;
84 __be16 dst_port = 0;
84 __be64 key_id; 85 __be64 key_id;
85 int ret = 0; 86 int ret = 0;
86 int err; 87 int err;
@@ -111,6 +112,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
111 112
112 key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); 113 key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
113 114
115 if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
116 dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
117
114 if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && 118 if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
115 tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { 119 tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
116 __be32 saddr; 120 __be32 saddr;
@@ -120,7 +124,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
120 daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); 124 daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
121 125
122 metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, 126 metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
123 TUNNEL_KEY, key_id, 0); 127 dst_port, TUNNEL_KEY,
128 key_id, 0);
124 } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && 129 } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
125 tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { 130 tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
126 struct in6_addr saddr; 131 struct in6_addr saddr;
@@ -129,8 +134,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
129 saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); 134 saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
130 daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); 135 daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
131 136
132 metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, 137 metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port,
133 TUNNEL_KEY, key_id, 0); 138 0, TUNNEL_KEY,
139 key_id, 0);
134 } 140 }
135 141
136 if (!metadata) { 142 if (!metadata) {
@@ -258,7 +264,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
258 264
259 if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || 265 if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
260 tunnel_key_dump_addresses(skb, 266 tunnel_key_dump_addresses(skb,
261 &params->tcft_enc_metadata->u.tun_info)) 267 &params->tcft_enc_metadata->u.tun_info) ||
268 nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst))
262 goto nla_put_failure; 269 goto nla_put_failure;
263 } 270 }
264 271
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index b57fcbcefea1..19e0dba305ce 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -21,7 +21,7 @@
21 21
22#define VLAN_TAB_MASK 15 22#define VLAN_TAB_MASK 15
23 23
24static int vlan_net_id; 24static unsigned int vlan_net_id;
25static struct tc_action_ops act_vlan_ops; 25static struct tc_action_ops act_vlan_ops;
26 26
27static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, 27static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b05d4a2155b0..3fbba79a4ef0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -682,6 +682,30 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
682} 682}
683EXPORT_SYMBOL(tcf_exts_dump_stats); 683EXPORT_SYMBOL(tcf_exts_dump_stats);
684 684
685int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
686 struct net_device **hw_dev)
687{
688#ifdef CONFIG_NET_CLS_ACT
689 const struct tc_action *a;
690 LIST_HEAD(actions);
691
692 if (tc_no_actions(exts))
693 return -EINVAL;
694
695 tcf_exts_to_list(exts, &actions);
696 list_for_each_entry(a, &actions, list) {
697 if (a->ops->get_dev) {
698 a->ops->get_dev(a, dev_net(dev), hw_dev);
699 break;
700 }
701 }
702 if (*hw_dev)
703 return 0;
704#endif
705 return -EOPNOTSUPP;
706}
707EXPORT_SYMBOL(tcf_exts_get_dev);
708
685static int __init tc_filter_init(void) 709static int __init tc_filter_init(void)
686{ 710{
687 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL); 711 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 0a47ba5e6109..adc776048d1a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -45,10 +45,7 @@ struct cls_bpf_prog {
45 u32 gen_flags; 45 u32 gen_flags;
46 struct tcf_exts exts; 46 struct tcf_exts exts;
47 u32 handle; 47 u32 handle;
48 union { 48 u16 bpf_num_ops;
49 u32 bpf_fd;
50 u16 bpf_num_ops;
51 };
52 struct sock_filter *bpf_ops; 49 struct sock_filter *bpf_ops;
53 const char *bpf_name; 50 const char *bpf_name;
54 struct tcf_proto *tp; 51 struct tcf_proto *tp;
@@ -244,7 +241,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
244 return 0; 241 return 0;
245} 242}
246 243
247static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) 244static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
248{ 245{
249 tcf_exts_destroy(&prog->exts); 246 tcf_exts_destroy(&prog->exts);
250 247
@@ -258,22 +255,22 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
258 kfree(prog); 255 kfree(prog);
259} 256}
260 257
261static void __cls_bpf_delete_prog(struct rcu_head *rcu) 258static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
262{ 259{
263 struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); 260 __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
264
265 cls_bpf_delete_prog(prog->tp, prog);
266} 261}
267 262
268static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) 263static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
269{ 264{
270 struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
271
272 cls_bpf_stop_offload(tp, prog); 265 cls_bpf_stop_offload(tp, prog);
273 list_del_rcu(&prog->link); 266 list_del_rcu(&prog->link);
274 tcf_unbind_filter(tp, &prog->res); 267 tcf_unbind_filter(tp, &prog->res);
275 call_rcu(&prog->rcu, __cls_bpf_delete_prog); 268 call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
269}
276 270
271static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
272{
273 __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
277 return 0; 274 return 0;
278} 275}
279 276
@@ -285,12 +282,8 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
285 if (!force && !list_empty(&head->plist)) 282 if (!force && !list_empty(&head->plist))
286 return false; 283 return false;
287 284
288 list_for_each_entry_safe(prog, tmp, &head->plist, link) { 285 list_for_each_entry_safe(prog, tmp, &head->plist, link)
289 cls_bpf_stop_offload(tp, prog); 286 __cls_bpf_delete(tp, prog);
290 list_del_rcu(&prog->link);
291 tcf_unbind_filter(tp, &prog->res);
292 call_rcu(&prog->rcu, __cls_bpf_delete_prog);
293 }
294 287
295 kfree_rcu(head, rcu); 288 kfree_rcu(head, rcu);
296 return true; 289 return true;
@@ -365,9 +358,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
365 return PTR_ERR(fp); 358 return PTR_ERR(fp);
366 359
367 if (tb[TCA_BPF_NAME]) { 360 if (tb[TCA_BPF_NAME]) {
368 name = kmemdup(nla_data(tb[TCA_BPF_NAME]), 361 name = nla_memdup(tb[TCA_BPF_NAME], GFP_KERNEL);
369 nla_len(tb[TCA_BPF_NAME]),
370 GFP_KERNEL);
371 if (!name) { 362 if (!name) {
372 bpf_prog_put(fp); 363 bpf_prog_put(fp);
373 return -ENOMEM; 364 return -ENOMEM;
@@ -375,7 +366,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
375 } 366 }
376 367
377 prog->bpf_ops = NULL; 368 prog->bpf_ops = NULL;
378 prog->bpf_fd = bpf_fd;
379 prog->bpf_name = name; 369 prog->bpf_name = name;
380 prog->filter = fp; 370 prog->filter = fp;
381 371
@@ -517,14 +507,14 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
517 507
518 ret = cls_bpf_offload(tp, prog, oldprog); 508 ret = cls_bpf_offload(tp, prog, oldprog);
519 if (ret) { 509 if (ret) {
520 cls_bpf_delete_prog(tp, prog); 510 __cls_bpf_delete_prog(prog);
521 return ret; 511 return ret;
522 } 512 }
523 513
524 if (oldprog) { 514 if (oldprog) {
525 list_replace_rcu(&oldprog->link, &prog->link); 515 list_replace_rcu(&oldprog->link, &prog->link);
526 tcf_unbind_filter(tp, &oldprog->res); 516 tcf_unbind_filter(tp, &oldprog->res);
527 call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); 517 call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
528 } else { 518 } else {
529 list_add_rcu(&prog->link, &head->plist); 519 list_add_rcu(&prog->link, &head->plist);
530 } 520 }
@@ -559,13 +549,18 @@ static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
559static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, 549static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
560 struct sk_buff *skb) 550 struct sk_buff *skb)
561{ 551{
562 if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd)) 552 struct nlattr *nla;
563 return -EMSGSIZE;
564 553
565 if (prog->bpf_name && 554 if (prog->bpf_name &&
566 nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) 555 nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
567 return -EMSGSIZE; 556 return -EMSGSIZE;
568 557
558 nla = nla_reserve(skb, TCA_BPF_DIGEST, sizeof(prog->filter->digest));
559 if (nla == NULL)
560 return -EMSGSIZE;
561
562 memcpy(nla_data(nla), prog->filter->digest, nla_len(nla));
563
569 return 0; 564 return 0;
570} 565}
571 566
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 904442421db3..333f8e268431 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -39,11 +39,13 @@ struct fl_flow_key {
39 struct flow_dissector_key_ipv6_addrs ipv6; 39 struct flow_dissector_key_ipv6_addrs ipv6;
40 }; 40 };
41 struct flow_dissector_key_ports tp; 41 struct flow_dissector_key_ports tp;
42 struct flow_dissector_key_icmp icmp;
42 struct flow_dissector_key_keyid enc_key_id; 43 struct flow_dissector_key_keyid enc_key_id;
43 union { 44 union {
44 struct flow_dissector_key_ipv4_addrs enc_ipv4; 45 struct flow_dissector_key_ipv4_addrs enc_ipv4;
45 struct flow_dissector_key_ipv6_addrs enc_ipv6; 46 struct flow_dissector_key_ipv6_addrs enc_ipv6;
46 }; 47 };
48 struct flow_dissector_key_ports enc_tp;
47} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ 49} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
48 50
49struct fl_flow_mask_range { 51struct fl_flow_mask_range {
@@ -81,6 +83,8 @@ struct cls_fl_filter {
81 u32 handle; 83 u32 handle;
82 u32 flags; 84 u32 flags;
83 struct rcu_head rcu; 85 struct rcu_head rcu;
86 struct tc_to_netdev tc;
87 struct net_device *hw_dev;
84}; 88};
85 89
86static unsigned short int fl_mask_range(const struct fl_flow_mask *mask) 90static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
@@ -159,6 +163,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
159 } 163 }
160 164
161 skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); 165 skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
166 skb_key.enc_tp.src = key->tp_src;
167 skb_key.enc_tp.dst = key->tp_dst;
162 } 168 }
163 169
164 skb_key.indev_ifindex = skb->skb_iif; 170 skb_key.indev_ifindex = skb->skb_iif;
@@ -202,75 +208,90 @@ static void fl_destroy_filter(struct rcu_head *head)
202 kfree(f); 208 kfree(f);
203} 209}
204 210
205static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie) 211static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
206{ 212{
207 struct net_device *dev = tp->q->dev_queue->dev;
208 struct tc_cls_flower_offload offload = {0}; 213 struct tc_cls_flower_offload offload = {0};
209 struct tc_to_netdev tc; 214 struct net_device *dev = f->hw_dev;
215 struct tc_to_netdev *tc = &f->tc;
210 216
211 if (!tc_should_offload(dev, tp, 0)) 217 if (!tc_can_offload(dev, tp))
212 return; 218 return;
213 219
214 offload.command = TC_CLSFLOWER_DESTROY; 220 offload.command = TC_CLSFLOWER_DESTROY;
215 offload.cookie = cookie; 221 offload.cookie = (unsigned long)f;
216 222
217 tc.type = TC_SETUP_CLSFLOWER; 223 tc->type = TC_SETUP_CLSFLOWER;
218 tc.cls_flower = &offload; 224 tc->cls_flower = &offload;
219 225
220 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); 226 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
221} 227}
222 228
223static int fl_hw_replace_filter(struct tcf_proto *tp, 229static int fl_hw_replace_filter(struct tcf_proto *tp,
224 struct flow_dissector *dissector, 230 struct flow_dissector *dissector,
225 struct fl_flow_key *mask, 231 struct fl_flow_key *mask,
226 struct fl_flow_key *key, 232 struct cls_fl_filter *f)
227 struct tcf_exts *actions,
228 unsigned long cookie, u32 flags)
229{ 233{
230 struct net_device *dev = tp->q->dev_queue->dev; 234 struct net_device *dev = tp->q->dev_queue->dev;
231 struct tc_cls_flower_offload offload = {0}; 235 struct tc_cls_flower_offload offload = {0};
232 struct tc_to_netdev tc; 236 struct tc_to_netdev *tc = &f->tc;
233 int err; 237 int err;
234 238
235 if (!tc_should_offload(dev, tp, flags)) 239 if (!tc_can_offload(dev, tp)) {
236 return tc_skip_sw(flags) ? -EINVAL : 0; 240 if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
241 (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) {
242 f->hw_dev = dev;
243 return tc_skip_sw(f->flags) ? -EINVAL : 0;
244 }
245 dev = f->hw_dev;
246 tc->egress_dev = true;
247 } else {
248 f->hw_dev = dev;
249 }
237 250
238 offload.command = TC_CLSFLOWER_REPLACE; 251 offload.command = TC_CLSFLOWER_REPLACE;
239 offload.cookie = cookie; 252 offload.cookie = (unsigned long)f;
240 offload.dissector = dissector; 253 offload.dissector = dissector;
241 offload.mask = mask; 254 offload.mask = mask;
242 offload.key = key; 255 offload.key = &f->mkey;
243 offload.exts = actions; 256 offload.exts = &f->exts;
244 257
245 tc.type = TC_SETUP_CLSFLOWER; 258 tc->type = TC_SETUP_CLSFLOWER;
246 tc.cls_flower = &offload; 259 tc->cls_flower = &offload;
247 260
248 err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, 261 err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
249 &tc); 262 tc);
250 263
251 if (tc_skip_sw(flags)) 264 if (tc_skip_sw(f->flags))
252 return err; 265 return err;
253
254 return 0; 266 return 0;
255} 267}
256 268
257static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) 269static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
258{ 270{
259 struct net_device *dev = tp->q->dev_queue->dev;
260 struct tc_cls_flower_offload offload = {0}; 271 struct tc_cls_flower_offload offload = {0};
261 struct tc_to_netdev tc; 272 struct net_device *dev = f->hw_dev;
273 struct tc_to_netdev *tc = &f->tc;
262 274
263 if (!tc_should_offload(dev, tp, 0)) 275 if (!tc_can_offload(dev, tp))
264 return; 276 return;
265 277
266 offload.command = TC_CLSFLOWER_STATS; 278 offload.command = TC_CLSFLOWER_STATS;
267 offload.cookie = (unsigned long)f; 279 offload.cookie = (unsigned long)f;
268 offload.exts = &f->exts; 280 offload.exts = &f->exts;
269 281
270 tc.type = TC_SETUP_CLSFLOWER; 282 tc->type = TC_SETUP_CLSFLOWER;
271 tc.cls_flower = &offload; 283 tc->cls_flower = &offload;
284
285 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
286}
272 287
273 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); 288static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
289{
290 list_del_rcu(&f->list);
291 if (!tc_skip_hw(f->flags))
292 fl_hw_destroy_filter(tp, f);
293 tcf_unbind_filter(tp, &f->res);
294 call_rcu(&f->rcu, fl_destroy_filter);
274} 295}
275 296
276static void fl_destroy_sleepable(struct work_struct *work) 297static void fl_destroy_sleepable(struct work_struct *work)
@@ -299,14 +320,12 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
299 if (!force && !list_empty(&head->filters)) 320 if (!force && !list_empty(&head->filters))
300 return false; 321 return false;
301 322
302 list_for_each_entry_safe(f, next, &head->filters, list) { 323 list_for_each_entry_safe(f, next, &head->filters, list)
303 fl_hw_destroy_filter(tp, (unsigned long)f); 324 __fl_delete(tp, f);
304 list_del_rcu(&f->list);
305 call_rcu(&f->rcu, fl_destroy_filter);
306 }
307 325
308 __module_get(THIS_MODULE); 326 __module_get(THIS_MODULE);
309 call_rcu(&head->rcu, fl_destroy_rcu); 327 call_rcu(&head->rcu, fl_destroy_rcu);
328
310 return true; 329 return true;
311} 330}
312 331
@@ -360,6 +379,24 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
360 [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, 379 [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 },
361 [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, 380 [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 },
362 [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, 381 [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 },
382 [TCA_FLOWER_KEY_SCTP_SRC_MASK] = { .type = NLA_U16 },
383 [TCA_FLOWER_KEY_SCTP_DST_MASK] = { .type = NLA_U16 },
384 [TCA_FLOWER_KEY_SCTP_SRC] = { .type = NLA_U16 },
385 [TCA_FLOWER_KEY_SCTP_DST] = { .type = NLA_U16 },
386 [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NLA_U16 },
387 [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 },
388 [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 },
389 [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 },
390 [TCA_FLOWER_KEY_FLAGS] = { .type = NLA_U32 },
391 [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NLA_U32 },
392 [TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NLA_U8 },
393 [TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
394 [TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NLA_U8 },
395 [TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
396 [TCA_FLOWER_KEY_ICMPV6_TYPE] = { .type = NLA_U8 },
397 [TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
398 [TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NLA_U8 },
399 [TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
363}; 400};
364 401
365static void fl_set_key_val(struct nlattr **tb, 402static void fl_set_key_val(struct nlattr **tb,
@@ -394,10 +431,43 @@ static void fl_set_key_vlan(struct nlattr **tb,
394 } 431 }
395} 432}
396 433
434static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
435 u32 *dissector_key, u32 *dissector_mask,
436 u32 flower_flag_bit, u32 dissector_flag_bit)
437{
438 if (flower_mask & flower_flag_bit) {
439 *dissector_mask |= dissector_flag_bit;
440 if (flower_key & flower_flag_bit)
441 *dissector_key |= dissector_flag_bit;
442 }
443}
444
445static int fl_set_key_flags(struct nlattr **tb,
446 u32 *flags_key, u32 *flags_mask)
447{
448 u32 key, mask;
449
450 /* mask is mandatory for flags */
451 if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
452 return -EINVAL;
453
454 key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
455 mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
456
457 *flags_key = 0;
458 *flags_mask = 0;
459
460 fl_set_key_flag(key, mask, flags_key, flags_mask,
461 TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
462
463 return 0;
464}
465
397static int fl_set_key(struct net *net, struct nlattr **tb, 466static int fl_set_key(struct net *net, struct nlattr **tb,
398 struct fl_flow_key *key, struct fl_flow_key *mask) 467 struct fl_flow_key *key, struct fl_flow_key *mask)
399{ 468{
400 __be16 ethertype; 469 __be16 ethertype;
470 int ret = 0;
401#ifdef CONFIG_NET_CLS_IND 471#ifdef CONFIG_NET_CLS_IND
402 if (tb[TCA_FLOWER_INDEV]) { 472 if (tb[TCA_FLOWER_INDEV]) {
403 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); 473 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
@@ -439,6 +509,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
439 509
440 if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) { 510 if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
441 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 511 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
512 mask->control.addr_type = ~0;
442 fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, 513 fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
443 &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, 514 &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
444 sizeof(key->ipv4.src)); 515 sizeof(key->ipv4.src));
@@ -447,6 +518,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
447 sizeof(key->ipv4.dst)); 518 sizeof(key->ipv4.dst));
448 } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) { 519 } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
449 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 520 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
521 mask->control.addr_type = ~0;
450 fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, 522 fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
451 &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, 523 &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
452 sizeof(key->ipv6.src)); 524 sizeof(key->ipv6.src));
@@ -469,11 +541,39 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
469 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, 541 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
470 &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, 542 &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
471 sizeof(key->tp.dst)); 543 sizeof(key->tp.dst));
544 } else if (key->basic.ip_proto == IPPROTO_SCTP) {
545 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
546 &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
547 sizeof(key->tp.src));
548 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
549 &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
550 sizeof(key->tp.dst));
551 } else if (key->basic.n_proto == htons(ETH_P_IP) &&
552 key->basic.ip_proto == IPPROTO_ICMP) {
553 fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
554 &mask->icmp.type,
555 TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
556 sizeof(key->icmp.type));
557 fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
558 &mask->icmp.code,
559 TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
560 sizeof(key->icmp.code));
561 } else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
562 key->basic.ip_proto == IPPROTO_ICMPV6) {
563 fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
564 &mask->icmp.type,
565 TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
566 sizeof(key->icmp.type));
567 fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
568 &mask->icmp.code,
569 TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
570 sizeof(key->icmp.code));
472 } 571 }
473 572
474 if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || 573 if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
475 tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { 574 tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
476 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 575 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
576 mask->enc_control.addr_type = ~0;
477 fl_set_key_val(tb, &key->enc_ipv4.src, 577 fl_set_key_val(tb, &key->enc_ipv4.src,
478 TCA_FLOWER_KEY_ENC_IPV4_SRC, 578 TCA_FLOWER_KEY_ENC_IPV4_SRC,
479 &mask->enc_ipv4.src, 579 &mask->enc_ipv4.src,
@@ -489,6 +589,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
489 if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] || 589 if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
490 tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) { 590 tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
491 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 591 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
592 mask->enc_control.addr_type = ~0;
492 fl_set_key_val(tb, &key->enc_ipv6.src, 593 fl_set_key_val(tb, &key->enc_ipv6.src,
493 TCA_FLOWER_KEY_ENC_IPV6_SRC, 594 TCA_FLOWER_KEY_ENC_IPV6_SRC,
494 &mask->enc_ipv6.src, 595 &mask->enc_ipv6.src,
@@ -505,7 +606,18 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
505 &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC, 606 &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
506 sizeof(key->enc_key_id.keyid)); 607 sizeof(key->enc_key_id.keyid));
507 608
508 return 0; 609 fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
610 &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
611 sizeof(key->enc_tp.src));
612
613 fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
614 &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
615 sizeof(key->enc_tp.dst));
616
617 if (tb[TCA_FLOWER_KEY_FLAGS])
618 ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
619
620 return ret;
509} 621}
510 622
511static bool fl_mask_eq(struct fl_flow_mask *mask1, 623static bool fl_mask_eq(struct fl_flow_mask *mask1,
@@ -571,7 +683,21 @@ static void fl_init_dissector(struct cls_fl_head *head,
571 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, 683 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
572 FLOW_DISSECTOR_KEY_PORTS, tp); 684 FLOW_DISSECTOR_KEY_PORTS, tp);
573 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, 685 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
686 FLOW_DISSECTOR_KEY_ICMP, icmp);
687 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
574 FLOW_DISSECTOR_KEY_VLAN, vlan); 688 FLOW_DISSECTOR_KEY_VLAN, vlan);
689 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
690 FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
691 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
692 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
693 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
694 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
695 if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
696 FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
697 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
698 enc_control);
699 FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
700 FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
575 701
576 skb_flow_dissector_init(&head->dissector, keys, cnt); 702 skb_flow_dissector_init(&head->dissector, keys, cnt);
577} 703}
@@ -721,21 +847,21 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
721 goto errout; 847 goto errout;
722 } 848 }
723 849
724 err = fl_hw_replace_filter(tp, 850 if (!tc_skip_hw(fnew->flags)) {
725 &head->dissector, 851 err = fl_hw_replace_filter(tp,
726 &mask.key, 852 &head->dissector,
727 &fnew->key, 853 &mask.key,
728 &fnew->exts, 854 fnew);
729 (unsigned long)fnew, 855 if (err)
730 fnew->flags); 856 goto errout;
731 if (err) 857 }
732 goto errout;
733 858
734 if (fold) { 859 if (fold) {
735 if (!tc_skip_sw(fold->flags)) 860 if (!tc_skip_sw(fold->flags))
736 rhashtable_remove_fast(&head->ht, &fold->ht_node, 861 rhashtable_remove_fast(&head->ht, &fold->ht_node,
737 head->ht_params); 862 head->ht_params);
738 fl_hw_destroy_filter(tp, (unsigned long)fold); 863 if (!tc_skip_hw(fold->flags))
864 fl_hw_destroy_filter(tp, fold);
739 } 865 }
740 866
741 *arg = (unsigned long) fnew; 867 *arg = (unsigned long) fnew;
@@ -764,10 +890,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
764 if (!tc_skip_sw(f->flags)) 890 if (!tc_skip_sw(f->flags))
765 rhashtable_remove_fast(&head->ht, &f->ht_node, 891 rhashtable_remove_fast(&head->ht, &f->ht_node,
766 head->ht_params); 892 head->ht_params);
767 list_del_rcu(&f->list); 893 __fl_delete(tp, f);
768 fl_hw_destroy_filter(tp, (unsigned long)f);
769 tcf_unbind_filter(tp, &f->res);
770 call_rcu(&f->rcu, fl_destroy_filter);
771 return 0; 894 return 0;
772} 895}
773 896
@@ -830,6 +953,42 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
830 return 0; 953 return 0;
831} 954}
832 955
956static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
957 u32 *flower_key, u32 *flower_mask,
958 u32 flower_flag_bit, u32 dissector_flag_bit)
959{
960 if (dissector_mask & dissector_flag_bit) {
961 *flower_mask |= flower_flag_bit;
962 if (dissector_key & dissector_flag_bit)
963 *flower_key |= flower_flag_bit;
964 }
965}
966
967static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
968{
969 u32 key, mask;
970 __be32 _key, _mask;
971 int err;
972
973 if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
974 return 0;
975
976 key = 0;
977 mask = 0;
978
979 fl_get_key_flag(flags_key, flags_mask, &key, &mask,
980 TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
981
982 _key = cpu_to_be32(key);
983 _mask = cpu_to_be32(mask);
984
985 err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
986 if (err)
987 return err;
988
989 return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
990}
991
833static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, 992static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
834 struct sk_buff *skb, struct tcmsg *t) 993 struct sk_buff *skb, struct tcmsg *t)
835{ 994{
@@ -862,7 +1021,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
862 goto nla_put_failure; 1021 goto nla_put_failure;
863 } 1022 }
864 1023
865 fl_hw_update_stats(tp, f); 1024 if (!tc_skip_hw(f->flags))
1025 fl_hw_update_stats(tp, f);
866 1026
867 if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, 1027 if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
868 mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, 1028 mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
@@ -918,6 +1078,36 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
918 &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, 1078 &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
919 sizeof(key->tp.dst)))) 1079 sizeof(key->tp.dst))))
920 goto nla_put_failure; 1080 goto nla_put_failure;
1081 else if (key->basic.ip_proto == IPPROTO_SCTP &&
1082 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
1083 &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
1084 sizeof(key->tp.src)) ||
1085 fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
1086 &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
1087 sizeof(key->tp.dst))))
1088 goto nla_put_failure;
1089 else if (key->basic.n_proto == htons(ETH_P_IP) &&
1090 key->basic.ip_proto == IPPROTO_ICMP &&
1091 (fl_dump_key_val(skb, &key->icmp.type,
1092 TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type,
1093 TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
1094 sizeof(key->icmp.type)) ||
1095 fl_dump_key_val(skb, &key->icmp.code,
1096 TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code,
1097 TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
1098 sizeof(key->icmp.code))))
1099 goto nla_put_failure;
1100 else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
1101 key->basic.ip_proto == IPPROTO_ICMPV6 &&
1102 (fl_dump_key_val(skb, &key->icmp.type,
1103 TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type,
1104 TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
1105 sizeof(key->icmp.type)) ||
1106 fl_dump_key_val(skb, &key->icmp.code,
1107 TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code,
1108 TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
1109 sizeof(key->icmp.code))))
1110 goto nla_put_failure;
921 1111
922 if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && 1112 if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
923 (fl_dump_key_val(skb, &key->enc_ipv4.src, 1113 (fl_dump_key_val(skb, &key->enc_ipv4.src,
@@ -943,7 +1133,20 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
943 1133
944 if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, 1134 if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
945 &mask->enc_key_id, TCA_FLOWER_UNSPEC, 1135 &mask->enc_key_id, TCA_FLOWER_UNSPEC,
946 sizeof(key->enc_key_id))) 1136 sizeof(key->enc_key_id)) ||
1137 fl_dump_key_val(skb, &key->enc_tp.src,
1138 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
1139 &mask->enc_tp.src,
1140 TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
1141 sizeof(key->enc_tp.src)) ||
1142 fl_dump_key_val(skb, &key->enc_tp.dst,
1143 TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
1144 &mask->enc_tp.dst,
1145 TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
1146 sizeof(key->enc_tp.dst)))
1147 goto nla_put_failure;
1148
1149 if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
947 goto nla_put_failure; 1150 goto nla_put_failure;
948 1151
949 nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); 1152 nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index c66ca9400ab4..c1b23e3060b8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -57,17 +57,20 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
57 struct xt_action_param acpar; 57 struct xt_action_param acpar;
58 const struct xt_set_info *set = (const void *) em->data; 58 const struct xt_set_info *set = (const void *) em->data;
59 struct net_device *dev, *indev = NULL; 59 struct net_device *dev, *indev = NULL;
60 struct nf_hook_state state = {
61 .net = em->net,
62 };
60 int ret, network_offset; 63 int ret, network_offset;
61 64
62 switch (tc_skb_protocol(skb)) { 65 switch (tc_skb_protocol(skb)) {
63 case htons(ETH_P_IP): 66 case htons(ETH_P_IP):
64 acpar.family = NFPROTO_IPV4; 67 state.pf = NFPROTO_IPV4;
65 if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) 68 if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
66 return 0; 69 return 0;
67 acpar.thoff = ip_hdrlen(skb); 70 acpar.thoff = ip_hdrlen(skb);
68 break; 71 break;
69 case htons(ETH_P_IPV6): 72 case htons(ETH_P_IPV6):
70 acpar.family = NFPROTO_IPV6; 73 state.pf = NFPROTO_IPV6;
71 if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) 74 if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
72 return 0; 75 return 0;
73 /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */ 76 /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */
@@ -77,9 +80,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
77 return 0; 80 return 0;
78 } 81 }
79 82
80 acpar.hooknum = 0; 83 opt.family = state.pf;
81
82 opt.family = acpar.family;
83 opt.dim = set->dim; 84 opt.dim = set->dim;
84 opt.flags = set->flags; 85 opt.flags = set->flags;
85 opt.cmdflags = 0; 86 opt.cmdflags = 0;
@@ -95,9 +96,9 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
95 if (skb->skb_iif) 96 if (skb->skb_iif)
96 indev = dev_get_by_index_rcu(em->net, skb->skb_iif); 97 indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
97 98
98 acpar.net = em->net; 99 state.in = indev ? indev : dev;
99 acpar.in = indev ? indev : dev; 100 state.out = dev;
100 acpar.out = dev; 101 acpar.state = &state;
101 102
102 ret = ip_set_test(set->index, skb, &acpar, &opt); 103 ret = ip_set_test(set->index, skb, &acpar, &opt);
103 104
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index a309a07ccb35..41c80b6c3906 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -176,11 +176,12 @@ META_COLLECTOR(int_vlan_tag)
176{ 176{
177 unsigned short tag; 177 unsigned short tag;
178 178
179 tag = skb_vlan_tag_get(skb); 179 if (skb_vlan_tag_present(skb))
180 if (!tag && __vlan_get_tag(skb, &tag)) 180 dst->value = skb_vlan_tag_get(skb);
181 *err = -1; 181 else if (!__vlan_get_tag(skb, &tag))
182 else
183 dst->value = tag; 182 dst->value = tag;
183 else
184 *err = -1;
184} 185}
185 186
186 187
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 206dc24add3a..d7b93429f0cc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -960,6 +960,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
960 960
961 sch->handle = handle; 961 sch->handle = handle;
962 962
963 /* This exist to keep backward compatible with a userspace
964 * loophole, what allowed userspace to get IFF_NO_QUEUE
965 * facility on older kernels by setting tx_queue_len=0 (prior
966 * to qdisc init), and then forgot to reinit tx_queue_len
967 * before again attaching a qdisc.
968 */
969 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
970 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
971 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
972 }
973
963 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { 974 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
964 if (qdisc_is_percpu_stats(sch)) { 975 if (qdisc_is_percpu_stats(sch)) {
965 sch->cpu_bstats = 976 sch->cpu_bstats =
@@ -1384,7 +1395,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1384 1395
1385 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), 1396 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1386 &d, cpu_bstats, &q->bstats) < 0 || 1397 &d, cpu_bstats, &q->bstats) < 0 ||
1387 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 || 1398 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1388 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) 1399 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1389 goto nla_put_failure; 1400 goto nla_put_failure;
1390 1401
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index beb554aa8cfb..f1207582cbf3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -122,7 +122,7 @@ struct cbq_class {
122 psched_time_t penalized; 122 psched_time_t penalized;
123 struct gnet_stats_basic_packed bstats; 123 struct gnet_stats_basic_packed bstats;
124 struct gnet_stats_queue qstats; 124 struct gnet_stats_queue qstats;
125 struct gnet_stats_rate_est64 rate_est; 125 struct net_rate_estimator __rcu *rate_est;
126 struct tc_cbq_xstats xstats; 126 struct tc_cbq_xstats xstats;
127 127
128 struct tcf_proto __rcu *filter_list; 128 struct tcf_proto __rcu *filter_list;
@@ -509,7 +509,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
509 if (delay) { 509 if (delay) {
510 ktime_t time; 510 ktime_t time;
511 511
512 time = ktime_set(0, 0); 512 time = 0;
513 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay)); 513 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
514 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED); 514 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
515 } 515 }
@@ -1346,7 +1346,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1346 1346
1347 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), 1347 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1348 d, NULL, &cl->bstats) < 0 || 1348 d, NULL, &cl->bstats) < 0 ||
1349 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || 1349 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1350 gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0) 1350 gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
1351 return -1; 1351 return -1;
1352 1352
@@ -1405,7 +1405,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1405 tcf_destroy_chain(&cl->filter_list); 1405 tcf_destroy_chain(&cl->filter_list);
1406 qdisc_destroy(cl->q); 1406 qdisc_destroy(cl->q);
1407 qdisc_put_rtab(cl->R_tab); 1407 qdisc_put_rtab(cl->R_tab);
1408 gen_kill_estimator(&cl->bstats, &cl->rate_est); 1408 gen_kill_estimator(&cl->rate_est);
1409 if (cl != &q->link) 1409 if (cl != &q->link)
1410 kfree(cl); 1410 kfree(cl);
1411} 1411}
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8af5c59eef84..bb4cbdf75004 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -25,7 +25,7 @@ struct drr_class {
25 25
26 struct gnet_stats_basic_packed bstats; 26 struct gnet_stats_basic_packed bstats;
27 struct gnet_stats_queue qstats; 27 struct gnet_stats_queue qstats;
28 struct gnet_stats_rate_est64 rate_est; 28 struct net_rate_estimator __rcu *rate_est;
29 struct list_head alist; 29 struct list_head alist;
30 struct Qdisc *qdisc; 30 struct Qdisc *qdisc;
31 31
@@ -142,7 +142,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
142 142
143static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) 143static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
144{ 144{
145 gen_kill_estimator(&cl->bstats, &cl->rate_est); 145 gen_kill_estimator(&cl->rate_est);
146 qdisc_destroy(cl->qdisc); 146 qdisc_destroy(cl->qdisc);
147 kfree(cl); 147 kfree(cl);
148} 148}
@@ -283,7 +283,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
283 283
284 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), 284 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
285 d, NULL, &cl->bstats) < 0 || 285 d, NULL, &cl->bstats) < 0 ||
286 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || 286 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
287 gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0) 287 gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
288 return -1; 288 return -1;
289 289
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 18e752439f6f..a4f738ac7728 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -136,7 +136,7 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
136 struct fq_flow *aux; 136 struct fq_flow *aux;
137 137
138 parent = *p; 138 parent = *p;
139 aux = container_of(parent, struct fq_flow, rate_node); 139 aux = rb_entry(parent, struct fq_flow, rate_node);
140 if (f->time_next_packet >= aux->time_next_packet) 140 if (f->time_next_packet >= aux->time_next_packet)
141 p = &parent->rb_right; 141 p = &parent->rb_right;
142 else 142 else
@@ -188,7 +188,7 @@ static void fq_gc(struct fq_sched_data *q,
188 while (*p) { 188 while (*p) {
189 parent = *p; 189 parent = *p;
190 190
191 f = container_of(parent, struct fq_flow, fq_node); 191 f = rb_entry(parent, struct fq_flow, fq_node);
192 if (f->sk == sk) 192 if (f->sk == sk)
193 break; 193 break;
194 194
@@ -245,7 +245,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
245 skb_orphan(skb); 245 skb_orphan(skb);
246 } 246 }
247 247
248 root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; 248 root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
249 249
250 if (q->flows >= (2U << q->fq_trees_log) && 250 if (q->flows >= (2U << q->fq_trees_log) &&
251 q->inactive_flows > q->flows/2) 251 q->inactive_flows > q->flows/2)
@@ -256,7 +256,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
256 while (*p) { 256 while (*p) {
257 parent = *p; 257 parent = *p;
258 258
259 f = container_of(parent, struct fq_flow, fq_node); 259 f = rb_entry(parent, struct fq_flow, fq_node);
260 if (f->sk == sk) { 260 if (f->sk == sk) {
261 /* socket might have been reallocated, so check 261 /* socket might have been reallocated, so check
262 * if its sk_hash is the same. 262 * if its sk_hash is the same.
@@ -424,7 +424,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
424 424
425 q->time_next_delayed_flow = ~0ULL; 425 q->time_next_delayed_flow = ~0ULL;
426 while ((p = rb_first(&q->delayed)) != NULL) { 426 while ((p = rb_first(&q->delayed)) != NULL) {
427 struct fq_flow *f = container_of(p, struct fq_flow, rate_node); 427 struct fq_flow *f = rb_entry(p, struct fq_flow, rate_node);
428 428
429 if (f->time_next_packet > now) { 429 if (f->time_next_packet > now) {
430 q->time_next_delayed_flow = f->time_next_packet; 430 q->time_next_delayed_flow = f->time_next_packet;
@@ -563,7 +563,7 @@ static void fq_reset(struct Qdisc *sch)
563 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { 563 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
564 root = &q->fq_root[idx]; 564 root = &q->fq_root[idx];
565 while ((p = rb_first(root)) != NULL) { 565 while ((p = rb_first(root)) != NULL) {
566 f = container_of(p, struct fq_flow, fq_node); 566 f = rb_entry(p, struct fq_flow, fq_node);
567 rb_erase(p, root); 567 rb_erase(p, root);
568 568
569 fq_flow_purge(f); 569 fq_flow_purge(f);
@@ -593,20 +593,20 @@ static void fq_rehash(struct fq_sched_data *q,
593 oroot = &old_array[idx]; 593 oroot = &old_array[idx];
594 while ((op = rb_first(oroot)) != NULL) { 594 while ((op = rb_first(oroot)) != NULL) {
595 rb_erase(op, oroot); 595 rb_erase(op, oroot);
596 of = container_of(op, struct fq_flow, fq_node); 596 of = rb_entry(op, struct fq_flow, fq_node);
597 if (fq_gc_candidate(of)) { 597 if (fq_gc_candidate(of)) {
598 fcnt++; 598 fcnt++;
599 kmem_cache_free(fq_flow_cachep, of); 599 kmem_cache_free(fq_flow_cachep, of);
600 continue; 600 continue;
601 } 601 }
602 nroot = &new_array[hash_32((u32)(long)of->sk, new_log)]; 602 nroot = &new_array[hash_ptr(of->sk, new_log)];
603 603
604 np = &nroot->rb_node; 604 np = &nroot->rb_node;
605 parent = NULL; 605 parent = NULL;
606 while (*np) { 606 while (*np) {
607 parent = *np; 607 parent = *np;
608 608
609 nf = container_of(parent, struct fq_flow, fq_node); 609 nf = rb_entry(parent, struct fq_flow, fq_node);
610 BUG_ON(nf->sk == of->sk); 610 BUG_ON(nf->sk == of->sk);
611 611
612 if (nf->sk > of->sk) 612 if (nf->sk > of->sk)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6cfb6e9038c2..6eb9c8e88519 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -709,7 +709,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
709 709
710 qdisc_put_stab(rtnl_dereference(qdisc->stab)); 710 qdisc_put_stab(rtnl_dereference(qdisc->stab));
711#endif 711#endif
712 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); 712 gen_kill_estimator(&qdisc->rate_est);
713 if (ops->reset) 713 if (ops->reset)
714 ops->reset(qdisc); 714 ops->reset(qdisc);
715 if (ops->destroy) 715 if (ops->destroy)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 000f1d36128e..3ffaa6fb0990 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -114,7 +114,7 @@ struct hfsc_class {
114 114
115 struct gnet_stats_basic_packed bstats; 115 struct gnet_stats_basic_packed bstats;
116 struct gnet_stats_queue qstats; 116 struct gnet_stats_queue qstats;
117 struct gnet_stats_rate_est64 rate_est; 117 struct net_rate_estimator __rcu *rate_est;
118 struct tcf_proto __rcu *filter_list; /* filter list */ 118 struct tcf_proto __rcu *filter_list; /* filter list */
119 unsigned int filter_cnt; /* filter count */ 119 unsigned int filter_cnt; /* filter count */
120 unsigned int level; /* class level in hierarchy */ 120 unsigned int level; /* class level in hierarchy */
@@ -1091,7 +1091,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
1091 1091
1092 tcf_destroy_chain(&cl->filter_list); 1092 tcf_destroy_chain(&cl->filter_list);
1093 qdisc_destroy(cl->qdisc); 1093 qdisc_destroy(cl->qdisc);
1094 gen_kill_estimator(&cl->bstats, &cl->rate_est); 1094 gen_kill_estimator(&cl->rate_est);
1095 if (cl != &q->root) 1095 if (cl != &q->root)
1096 kfree(cl); 1096 kfree(cl);
1097} 1097}
@@ -1348,7 +1348,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1348 xstats.rtwork = cl->cl_cumul; 1348 xstats.rtwork = cl->cl_cumul;
1349 1349
1350 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || 1350 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
1351 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || 1351 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1352 gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) 1352 gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
1353 return -1; 1353 return -1;
1354 1354
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c798d0de8a9d..760f39e7caee 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -111,7 +111,7 @@ struct htb_class {
111 unsigned int children; 111 unsigned int children;
112 struct htb_class *parent; /* parent class */ 112 struct htb_class *parent; /* parent class */
113 113
114 struct gnet_stats_rate_est64 rate_est; 114 struct net_rate_estimator __rcu *rate_est;
115 115
116 /* 116 /*
117 * Written often fields 117 * Written often fields
@@ -1145,7 +1145,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1145 1145
1146 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), 1146 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1147 d, NULL, &cl->bstats) < 0 || 1147 d, NULL, &cl->bstats) < 0 ||
1148 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || 1148 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1149 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) 1149 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1150 return -1; 1150 return -1;
1151 1151
@@ -1228,7 +1228,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1228 WARN_ON(!cl->un.leaf.q); 1228 WARN_ON(!cl->un.leaf.q);
1229 qdisc_destroy(cl->un.leaf.q); 1229 qdisc_destroy(cl->un.leaf.q);
1230 } 1230 }
1231 gen_kill_estimator(&cl->bstats, &cl->rate_est); 1231 gen_kill_estimator(&cl->rate_est);
1232 tcf_destroy_chain(&cl->filter_list); 1232 tcf_destroy_chain(&cl->filter_list);
1233 kfree(cl); 1233 kfree(cl);
1234} 1234}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 9f7b380cf0a3..bcfadfdea8e0 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -152,7 +152,7 @@ struct netem_skb_cb {
152 152
153static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) 153static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
154{ 154{
155 return container_of(rb, struct sk_buff, rbnode); 155 return rb_entry(rb, struct sk_buff, rbnode);
156} 156}
157 157
158static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 158static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
@@ -627,7 +627,7 @@ deliver:
627 * from the network (tstamp will be updated). 627 * from the network (tstamp will be updated).
628 */ 628 */
629 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) 629 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
630 skb->tstamp.tv64 = 0; 630 skb->tstamp = 0;
631#endif 631#endif
632 632
633 if (q->qdisc) { 633 if (q->qdisc) {
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index ca0516e6f743..f9e712ce2d15 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -137,7 +137,7 @@ struct qfq_class {
137 137
138 struct gnet_stats_basic_packed bstats; 138 struct gnet_stats_basic_packed bstats;
139 struct gnet_stats_queue qstats; 139 struct gnet_stats_queue qstats;
140 struct gnet_stats_rate_est64 rate_est; 140 struct net_rate_estimator __rcu *rate_est;
141 struct Qdisc *qdisc; 141 struct Qdisc *qdisc;
142 struct list_head alist; /* Link for active-classes list. */ 142 struct list_head alist; /* Link for active-classes list. */
143 struct qfq_aggregate *agg; /* Parent aggregate. */ 143 struct qfq_aggregate *agg; /* Parent aggregate. */
@@ -508,7 +508,7 @@ set_change_agg:
508 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL); 508 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
509 if (new_agg == NULL) { 509 if (new_agg == NULL) {
510 err = -ENOBUFS; 510 err = -ENOBUFS;
511 gen_kill_estimator(&cl->bstats, &cl->rate_est); 511 gen_kill_estimator(&cl->rate_est);
512 goto destroy_class; 512 goto destroy_class;
513 } 513 }
514 sch_tree_lock(sch); 514 sch_tree_lock(sch);
@@ -533,7 +533,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
533 struct qfq_sched *q = qdisc_priv(sch); 533 struct qfq_sched *q = qdisc_priv(sch);
534 534
535 qfq_rm_from_agg(q, cl); 535 qfq_rm_from_agg(q, cl);
536 gen_kill_estimator(&cl->bstats, &cl->rate_est); 536 gen_kill_estimator(&cl->rate_est);
537 qdisc_destroy(cl->qdisc); 537 qdisc_destroy(cl->qdisc);
538 kfree(cl); 538 kfree(cl);
539} 539}
@@ -667,7 +667,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
667 667
668 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), 668 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
669 d, NULL, &cl->bstats) < 0 || 669 d, NULL, &cl->bstats) < 0 ||
670 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || 670 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
671 gnet_stats_copy_queue(d, NULL, 671 gnet_stats_copy_queue(d, NULL,
672 &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0) 672 &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
673 return -1; 673 return -1;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 2cd9b4478b92..b0196366d58d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -418,9 +418,6 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
418 struct teql_master *m = netdev_priv(dev); 418 struct teql_master *m = netdev_priv(dev);
419 struct Qdisc *q; 419 struct Qdisc *q;
420 420
421 if (new_mtu < 68)
422 return -EINVAL;
423
424 q = m->slaves; 421 q = m->slaves;
425 if (q) { 422 if (q) {
426 do { 423 do {
@@ -460,6 +457,8 @@ static __init void teql_master_setup(struct net_device *dev)
460 dev->netdev_ops = &teql_netdev_ops; 457 dev->netdev_ops = &teql_netdev_ops;
461 dev->type = ARPHRD_VOID; 458 dev->type = ARPHRD_VOID;
462 dev->mtu = 1500; 459 dev->mtu = 1500;
460 dev->min_mtu = 68;
461 dev->max_mtu = 65535;
463 dev->tx_queue_len = 100; 462 dev->tx_queue_len = 100;
464 dev->flags = IFF_NOARP; 463 dev->flags = IFF_NOARP;
465 dev->hard_header_len = LL_MAX_HEADER; 464 dev->hard_header_len = LL_MAX_HEADER;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index f10d3397f917..d3cc30c25c41 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -700,11 +700,15 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
700 /* Set the peer's active state. */ 700 /* Set the peer's active state. */
701 peer->state = peer_state; 701 peer->state = peer_state;
702 702
703 /* Add this peer into the transport hashtable */
704 if (sctp_hash_transport(peer)) {
705 sctp_transport_free(peer);
706 return NULL;
707 }
708
703 /* Attach the remote transport to our asoc. */ 709 /* Attach the remote transport to our asoc. */
704 list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list); 710 list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
705 asoc->peer.transport_count++; 711 asoc->peer.transport_count++;
706 /* Add this peer into the transport hashtable */
707 sctp_hash_transport(peer);
708 712
709 /* If we do not yet have a primary path, set one. */ 713 /* If we do not yet have a primary path, set one. */
710 if (!asoc->peer.primary_path) { 714 if (!asoc->peer.primary_path) {
@@ -1467,7 +1471,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
1467 * threshold. The idea is to recover slowly, but up 1471 * threshold. The idea is to recover slowly, but up
1468 * to the initial advertised window. 1472 * to the initial advertised window.
1469 */ 1473 */
1470 if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) { 1474 if (asoc->rwnd_press) {
1471 int change = min(asoc->pathmtu, asoc->rwnd_press); 1475 int change = min(asoc->pathmtu, asoc->rwnd_press);
1472 asoc->rwnd += change; 1476 asoc->rwnd += change;
1473 asoc->rwnd_press -= change; 1477 asoc->rwnd_press -= change;
@@ -1535,7 +1539,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
1535 asoc->rwnd = 0; 1539 asoc->rwnd = 0;
1536 } 1540 }
1537 } else { 1541 } else {
1538 asoc->rwnd_over = len - asoc->rwnd; 1542 asoc->rwnd_over += len - asoc->rwnd;
1539 asoc->rwnd = 0; 1543 asoc->rwnd = 0;
1540 } 1544 }
1541 1545
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 401c60750b20..1ebc184a0e23 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -292,6 +292,8 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
292 } 292 }
293 293
294 af->from_addr_param(&addr, rawaddr, htons(port), 0); 294 af->from_addr_param(&addr, rawaddr, htons(port), 0);
295 if (sctp_bind_addr_state(bp, &addr) != -1)
296 goto next;
295 retval = sctp_add_bind_addr(bp, &addr, sizeof(addr), 297 retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
296 SCTP_ADDR_SRC, gfp); 298 SCTP_ADDR_SRC, gfp);
297 if (retval) { 299 if (retval) {
@@ -300,6 +302,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
300 break; 302 break;
301 } 303 }
302 304
305next:
303 len = ntohs(param->length); 306 len = ntohs(param->length);
304 addrs_len -= len; 307 addrs_len -= len;
305 raw_addr_list += len; 308 raw_addr_list += len;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7a1cdf43e49d..615f0ddd41df 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -52,7 +52,6 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
52 atomic_set(&msg->refcnt, 1); 52 atomic_set(&msg->refcnt, 1);
53 msg->send_failed = 0; 53 msg->send_failed = 0;
54 msg->send_error = 0; 54 msg->send_error = 0;
55 msg->can_abandon = 0;
56 msg->can_delay = 1; 55 msg->can_delay = 1;
57 msg->expires_at = 0; 56 msg->expires_at = 0;
58 INIT_LIST_HEAD(&msg->chunks); 57 INIT_LIST_HEAD(&msg->chunks);
@@ -182,20 +181,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
182 /* Note: Calculate this outside of the loop, so that all fragments 181 /* Note: Calculate this outside of the loop, so that all fragments
183 * have the same expiration. 182 * have the same expiration.
184 */ 183 */
185 if (sinfo->sinfo_timetolive) { 184 if (asoc->peer.prsctp_capable && sinfo->sinfo_timetolive &&
186 /* sinfo_timetolive is in milliseconds */ 185 (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags) ||
186 !SCTP_PR_POLICY(sinfo->sinfo_flags)))
187 msg->expires_at = jiffies + 187 msg->expires_at = jiffies +
188 msecs_to_jiffies(sinfo->sinfo_timetolive); 188 msecs_to_jiffies(sinfo->sinfo_timetolive);
189 msg->can_abandon = 1;
190
191 pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__,
192 msg, msg->expires_at, jiffies);
193 }
194
195 if (asoc->peer.prsctp_capable &&
196 SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
197 msg->expires_at =
198 jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
199 189
200 /* This is the biggest possible DATA chunk that can fit into 190 /* This is the biggest possible DATA chunk that can fit into
201 * the packet 191 * the packet
@@ -354,18 +344,8 @@ errout:
354/* Check whether this message has expired. */ 344/* Check whether this message has expired. */
355int sctp_chunk_abandoned(struct sctp_chunk *chunk) 345int sctp_chunk_abandoned(struct sctp_chunk *chunk)
356{ 346{
357 if (!chunk->asoc->peer.prsctp_capable || 347 if (!chunk->asoc->peer.prsctp_capable)
358 !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
359 struct sctp_datamsg *msg = chunk->msg;
360
361 if (!msg->can_abandon)
362 return 0;
363
364 if (time_after(jiffies, msg->expires_at))
365 return 1;
366
367 return 0; 348 return 0;
368 }
369 349
370 if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && 350 if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
371 time_after(jiffies, chunk->msg->expires_at)) { 351 time_after(jiffies, chunk->msg->expires_at)) {
@@ -378,6 +358,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
378 chunk->sent_count > chunk->sinfo.sinfo_timetolive) { 358 chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
379 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; 359 chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
380 return 1; 360 return 1;
361 } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
362 chunk->msg->expires_at &&
363 time_after(jiffies, chunk->msg->expires_at)) {
364 return 1;
381 } 365 }
382 /* PRIO policy is processed by sendmsg, not here */ 366 /* PRIO policy is processed by sendmsg, not here */
383 367
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1f03065686fe..410ddc1e3443 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -331,7 +331,9 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
331 * on this endpoint. 331 * on this endpoint.
332 */ 332 */
333 if (!ep->base.bind_addr.port) 333 if (!ep->base.bind_addr.port)
334 goto out; 334 return NULL;
335
336 rcu_read_lock();
335 t = sctp_epaddr_lookup_transport(ep, paddr); 337 t = sctp_epaddr_lookup_transport(ep, paddr);
336 if (!t) 338 if (!t)
337 goto out; 339 goto out;
@@ -339,6 +341,7 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
339 *transport = t; 341 *transport = t;
340 asoc = t->asoc; 342 asoc = t->asoc;
341out: 343out:
344 rcu_read_unlock();
342 return asoc; 345 return asoc;
343} 346}
344 347
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a01a56ec8b8c..458e506ef84b 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -790,10 +790,9 @@ hit:
790 790
791/* rhashtable for transport */ 791/* rhashtable for transport */
792struct sctp_hash_cmp_arg { 792struct sctp_hash_cmp_arg {
793 const struct sctp_endpoint *ep; 793 const union sctp_addr *paddr;
794 const union sctp_addr *laddr; 794 const struct net *net;
795 const union sctp_addr *paddr; 795 u16 lport;
796 const struct net *net;
797}; 796};
798 797
799static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, 798static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -801,7 +800,6 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
801{ 800{
802 struct sctp_transport *t = (struct sctp_transport *)ptr; 801 struct sctp_transport *t = (struct sctp_transport *)ptr;
803 const struct sctp_hash_cmp_arg *x = arg->key; 802 const struct sctp_hash_cmp_arg *x = arg->key;
804 struct sctp_association *asoc;
805 int err = 1; 803 int err = 1;
806 804
807 if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) 805 if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
@@ -809,19 +807,10 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
809 if (!sctp_transport_hold(t)) 807 if (!sctp_transport_hold(t))
810 return err; 808 return err;
811 809
812 asoc = t->asoc; 810 if (!net_eq(sock_net(t->asoc->base.sk), x->net))
813 if (!net_eq(sock_net(asoc->base.sk), x->net)) 811 goto out;
812 if (x->lport != htons(t->asoc->base.bind_addr.port))
814 goto out; 813 goto out;
815 if (x->ep) {
816 if (x->ep != asoc->ep)
817 goto out;
818 } else {
819 if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
820 goto out;
821 if (!sctp_bind_addr_match(&asoc->base.bind_addr,
822 x->laddr, sctp_sk(asoc->base.sk)))
823 goto out;
824 }
825 814
826 err = 0; 815 err = 0;
827out: 816out:
@@ -851,11 +840,9 @@ static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
851 const struct sctp_hash_cmp_arg *x = data; 840 const struct sctp_hash_cmp_arg *x = data;
852 const union sctp_addr *paddr = x->paddr; 841 const union sctp_addr *paddr = x->paddr;
853 const struct net *net = x->net; 842 const struct net *net = x->net;
854 u16 lport; 843 u16 lport = x->lport;
855 u32 addr; 844 u32 addr;
856 845
857 lport = x->ep ? htons(x->ep->base.bind_addr.port) :
858 x->laddr->v4.sin_port;
859 if (paddr->sa.sa_family == AF_INET6) 846 if (paddr->sa.sa_family == AF_INET6)
860 addr = jhash(&paddr->v6.sin6_addr, 16, seed); 847 addr = jhash(&paddr->v6.sin6_addr, 16, seed);
861 else 848 else
@@ -875,29 +862,32 @@ static const struct rhashtable_params sctp_hash_params = {
875 862
876int sctp_transport_hashtable_init(void) 863int sctp_transport_hashtable_init(void)
877{ 864{
878 return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params); 865 return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params);
879} 866}
880 867
881void sctp_transport_hashtable_destroy(void) 868void sctp_transport_hashtable_destroy(void)
882{ 869{
883 rhashtable_destroy(&sctp_transport_hashtable); 870 rhltable_destroy(&sctp_transport_hashtable);
884} 871}
885 872
886void sctp_hash_transport(struct sctp_transport *t) 873int sctp_hash_transport(struct sctp_transport *t)
887{ 874{
888 struct sctp_hash_cmp_arg arg; 875 struct sctp_hash_cmp_arg arg;
876 int err;
889 877
890 if (t->asoc->temp) 878 if (t->asoc->temp)
891 return; 879 return 0;
892 880
893 arg.ep = t->asoc->ep;
894 arg.paddr = &t->ipaddr;
895 arg.net = sock_net(t->asoc->base.sk); 881 arg.net = sock_net(t->asoc->base.sk);
882 arg.paddr = &t->ipaddr;
883 arg.lport = htons(t->asoc->base.bind_addr.port);
896 884
897reinsert: 885 err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
898 if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg, 886 &t->node, sctp_hash_params);
899 &t->node, sctp_hash_params) == -EBUSY) 887 if (err)
900 goto reinsert; 888 pr_err_once("insert transport fail, errno %d\n", err);
889
890 return err;
901} 891}
902 892
903void sctp_unhash_transport(struct sctp_transport *t) 893void sctp_unhash_transport(struct sctp_transport *t)
@@ -905,39 +895,62 @@ void sctp_unhash_transport(struct sctp_transport *t)
905 if (t->asoc->temp) 895 if (t->asoc->temp)
906 return; 896 return;
907 897
908 rhashtable_remove_fast(&sctp_transport_hashtable, &t->node, 898 rhltable_remove(&sctp_transport_hashtable, &t->node,
909 sctp_hash_params); 899 sctp_hash_params);
910} 900}
911 901
902/* return a transport with holding it */
912struct sctp_transport *sctp_addrs_lookup_transport( 903struct sctp_transport *sctp_addrs_lookup_transport(
913 struct net *net, 904 struct net *net,
914 const union sctp_addr *laddr, 905 const union sctp_addr *laddr,
915 const union sctp_addr *paddr) 906 const union sctp_addr *paddr)
916{ 907{
908 struct rhlist_head *tmp, *list;
909 struct sctp_transport *t;
917 struct sctp_hash_cmp_arg arg = { 910 struct sctp_hash_cmp_arg arg = {
918 .ep = NULL,
919 .laddr = laddr,
920 .paddr = paddr, 911 .paddr = paddr,
921 .net = net, 912 .net = net,
913 .lport = laddr->v4.sin_port,
922 }; 914 };
923 915
924 return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg, 916 list = rhltable_lookup(&sctp_transport_hashtable, &arg,
925 sctp_hash_params); 917 sctp_hash_params);
918
919 rhl_for_each_entry_rcu(t, tmp, list, node) {
920 if (!sctp_transport_hold(t))
921 continue;
922
923 if (sctp_bind_addr_match(&t->asoc->base.bind_addr,
924 laddr, sctp_sk(t->asoc->base.sk)))
925 return t;
926 sctp_transport_put(t);
927 }
928
929 return NULL;
926} 930}
927 931
932/* return a transport without holding it, as it's only used under sock lock */
928struct sctp_transport *sctp_epaddr_lookup_transport( 933struct sctp_transport *sctp_epaddr_lookup_transport(
929 const struct sctp_endpoint *ep, 934 const struct sctp_endpoint *ep,
930 const union sctp_addr *paddr) 935 const union sctp_addr *paddr)
931{ 936{
932 struct net *net = sock_net(ep->base.sk); 937 struct net *net = sock_net(ep->base.sk);
938 struct rhlist_head *tmp, *list;
939 struct sctp_transport *t;
933 struct sctp_hash_cmp_arg arg = { 940 struct sctp_hash_cmp_arg arg = {
934 .ep = ep,
935 .paddr = paddr, 941 .paddr = paddr,
936 .net = net, 942 .net = net,
943 .lport = htons(ep->base.bind_addr.port),
937 }; 944 };
938 945
939 return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg, 946 list = rhltable_lookup(&sctp_transport_hashtable, &arg,
940 sctp_hash_params); 947 sctp_hash_params);
948
949 rhl_for_each_entry_rcu(t, tmp, list, node)
950 if (ep == t->asoc->ep)
951 return t;
952
953 return NULL;
941} 954}
942 955
943/* Look up an association. */ 956/* Look up an association. */
@@ -951,7 +964,7 @@ static struct sctp_association *__sctp_lookup_association(
951 struct sctp_association *asoc = NULL; 964 struct sctp_association *asoc = NULL;
952 965
953 t = sctp_addrs_lookup_transport(net, local, peer); 966 t = sctp_addrs_lookup_transport(net, local, peer);
954 if (!t || !sctp_transport_hold(t)) 967 if (!t)
955 goto out; 968 goto out;
956 969
957 asoc = t->asoc; 970 asoc = t->asoc;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 176af3080a2b..5ed8e79bf102 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -71,7 +71,7 @@
71#include <net/inet_ecn.h> 71#include <net/inet_ecn.h>
72#include <net/sctp/sctp.h> 72#include <net/sctp/sctp.h>
73 73
74#include <asm/uaccess.h> 74#include <linux/uaccess.h>
75 75
76static inline int sctp_v6_addr_match_len(union sctp_addr *s1, 76static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
77 union sctp_addr *s2); 77 union sctp_addr *s2);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 6cb0df859195..f5320a87341e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -399,186 +399,72 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
399 atomic_inc(&sk->sk_wmem_alloc); 399 atomic_inc(&sk->sk_wmem_alloc);
400} 400}
401 401
402/* All packets are sent to the network through this function from 402static int sctp_packet_pack(struct sctp_packet *packet,
403 * sctp_outq_tail(). 403 struct sk_buff *head, int gso, gfp_t gfp)
404 *
405 * The return value is a normal kernel error return value.
406 */
407int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
408{ 404{
409 struct sctp_transport *tp = packet->transport; 405 struct sctp_transport *tp = packet->transport;
410 struct sctp_association *asoc = tp->asoc; 406 struct sctp_auth_chunk *auth = NULL;
411 struct sctphdr *sh;
412 struct sk_buff *nskb = NULL, *head = NULL;
413 struct sctp_chunk *chunk, *tmp; 407 struct sctp_chunk *chunk, *tmp;
414 struct sock *sk; 408 int pkt_count = 0, pkt_size;
415 int err = 0; 409 struct sock *sk = head->sk;
416 int padding; /* How much padding do we need? */ 410 struct sk_buff *nskb;
417 int pkt_size;
418 __u8 has_data = 0;
419 int gso = 0;
420 int pktcount = 0;
421 int auth_len = 0; 411 int auth_len = 0;
422 struct dst_entry *dst;
423 unsigned char *auth = NULL; /* pointer to auth in skb data */
424
425 pr_debug("%s: packet:%p\n", __func__, packet);
426
427 /* Do NOT generate a chunkless packet. */
428 if (list_empty(&packet->chunk_list))
429 return err;
430
431 /* Set up convenience variables... */
432 chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
433 sk = chunk->skb->sk;
434 412
435 /* Allocate the head skb, or main one if not in GSO */
436 if (packet->size > tp->pathmtu && !packet->ipfragok) {
437 if (sk_can_gso(sk)) {
438 gso = 1;
439 pkt_size = packet->overhead;
440 } else {
441 /* If this happens, we trash this packet and try
442 * to build a new one, hopefully correct this
443 * time. Application may notice this error.
444 */
445 pr_err_once("Trying to GSO but underlying device doesn't support it.");
446 goto err;
447 }
448 } else {
449 pkt_size = packet->size;
450 }
451 head = alloc_skb(pkt_size + MAX_HEADER, gfp);
452 if (!head)
453 goto err;
454 if (gso) { 413 if (gso) {
455 NAPI_GRO_CB(head)->last = head;
456 skb_shinfo(head)->gso_type = sk->sk_gso_type; 414 skb_shinfo(head)->gso_type = sk->sk_gso_type;
415 NAPI_GRO_CB(head)->last = head;
416 } else {
417 nskb = head;
418 pkt_size = packet->size;
419 goto merge;
457 } 420 }
458 421
459 /* Make sure the outbound skb has enough header room reserved. */
460 skb_reserve(head, packet->overhead + MAX_HEADER);
461
462 /* Set the owning socket so that we know where to get the
463 * destination IP address.
464 */
465 sctp_packet_set_owner_w(head, sk);
466
467 if (!sctp_transport_dst_check(tp)) {
468 sctp_transport_route(tp, NULL, sctp_sk(sk));
469 if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) {
470 sctp_assoc_sync_pmtu(sk, asoc);
471 }
472 }
473 dst = dst_clone(tp->dst);
474 if (!dst) {
475 if (asoc)
476 IP_INC_STATS(sock_net(asoc->base.sk),
477 IPSTATS_MIB_OUTNOROUTES);
478 goto nodst;
479 }
480 skb_dst_set(head, dst);
481
482 /* Build the SCTP header. */
483 sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
484 skb_reset_transport_header(head);
485 sh->source = htons(packet->source_port);
486 sh->dest = htons(packet->destination_port);
487
488 /* From 6.8 Adler-32 Checksum Calculation:
489 * After the packet is constructed (containing the SCTP common
490 * header and one or more control or DATA chunks), the
491 * transmitter shall:
492 *
493 * 1) Fill in the proper Verification Tag in the SCTP common
494 * header and initialize the checksum field to 0's.
495 */
496 sh->vtag = htonl(packet->vtag);
497 sh->checksum = 0;
498
499 pr_debug("***sctp_transmit_packet***\n");
500
501 do { 422 do {
502 /* Set up convenience variables... */ 423 /* calculate the pkt_size and alloc nskb */
503 chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); 424 pkt_size = packet->overhead;
504 pktcount++; 425 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list,
505 426 list) {
506 /* Calculate packet size, so it fits in PMTU. Leave 427 int padded = SCTP_PAD4(chunk->skb->len);
507 * other chunks for the next packets.
508 */
509 if (gso) {
510 pkt_size = packet->overhead;
511 list_for_each_entry(chunk, &packet->chunk_list, list) {
512 int padded = SCTP_PAD4(chunk->skb->len);
513
514 if (chunk == packet->auth)
515 auth_len = padded;
516 else if (auth_len + padded + packet->overhead >
517 tp->pathmtu)
518 goto nomem;
519 else if (pkt_size + padded > tp->pathmtu)
520 break;
521 pkt_size += padded;
522 }
523 428
524 /* Allocate a new skb. */ 429 if (chunk == packet->auth)
525 nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); 430 auth_len = padded;
526 if (!nskb) 431 else if (auth_len + padded + packet->overhead >
527 goto nomem; 432 tp->pathmtu)
528 433 return 0;
529 /* Make sure the outbound skb has enough header 434 else if (pkt_size + padded > tp->pathmtu)
530 * room reserved. 435 break;
531 */ 436 pkt_size += padded;
532 skb_reserve(nskb, packet->overhead + MAX_HEADER);
533 } else {
534 nskb = head;
535 } 437 }
438 nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
439 if (!nskb)
440 return 0;
441 skb_reserve(nskb, packet->overhead + MAX_HEADER);
536 442
537 /** 443merge:
538 * 3.2 Chunk Field Descriptions 444 /* merge chunks into nskb and append nskb into head list */
539 *
540 * The total length of a chunk (including Type, Length and
541 * Value fields) MUST be a multiple of 4 bytes. If the length
542 * of the chunk is not a multiple of 4 bytes, the sender MUST
543 * pad the chunk with all zero bytes and this padding is not
544 * included in the chunk length field. The sender should
545 * never pad with more than 3 bytes.
546 *
547 * [This whole comment explains SCTP_PAD4() below.]
548 */
549
550 pkt_size -= packet->overhead; 445 pkt_size -= packet->overhead;
551 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { 446 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
447 int padding;
448
552 list_del_init(&chunk->list); 449 list_del_init(&chunk->list);
553 if (sctp_chunk_is_data(chunk)) { 450 if (sctp_chunk_is_data(chunk)) {
554 /* 6.3.1 C4) When data is in flight and when allowed 451 if (!sctp_chunk_retransmitted(chunk) &&
555 * by rule C5, a new RTT measurement MUST be made each 452 !tp->rto_pending) {
556 * round trip. Furthermore, new RTT measurements
557 * SHOULD be made no more than once per round-trip
558 * for a given destination transport address.
559 */
560
561 if (!chunk->resent && !tp->rto_pending) {
562 chunk->rtt_in_progress = 1; 453 chunk->rtt_in_progress = 1;
563 tp->rto_pending = 1; 454 tp->rto_pending = 1;
564 } 455 }
565
566 has_data = 1;
567 } 456 }
568 457
569 padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; 458 padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len;
570 if (padding) 459 if (padding)
571 memset(skb_put(chunk->skb, padding), 0, padding); 460 memset(skb_put(chunk->skb, padding), 0, padding);
572 461
573 /* if this is the auth chunk that we are adding,
574 * store pointer where it will be added and put
575 * the auth into the packet.
576 */
577 if (chunk == packet->auth) 462 if (chunk == packet->auth)
578 auth = skb_tail_pointer(nskb); 463 auth = (struct sctp_auth_chunk *)
464 skb_tail_pointer(nskb);
579 465
580 memcpy(skb_put(nskb, chunk->skb->len), 466 memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data,
581 chunk->skb->data, chunk->skb->len); 467 chunk->skb->len);
582 468
583 pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n", 469 pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
584 chunk, 470 chunk,
@@ -588,11 +474,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
588 ntohs(chunk->chunk_hdr->length), chunk->skb->len, 474 ntohs(chunk->chunk_hdr->length), chunk->skb->len,
589 chunk->rtt_in_progress); 475 chunk->rtt_in_progress);
590 476
591 /* If this is a control chunk, this is our last
592 * reference. Free data chunks after they've been
593 * acknowledged or have failed.
594 * Re-queue auth chunks if needed.
595 */
596 pkt_size -= SCTP_PAD4(chunk->skb->len); 477 pkt_size -= SCTP_PAD4(chunk->skb->len);
597 478
598 if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) 479 if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
@@ -602,160 +483,161 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
602 break; 483 break;
603 } 484 }
604 485
605 /* SCTP-AUTH, Section 6.2 486 if (auth) {
606 * The sender MUST calculate the MAC as described in RFC2104 [2] 487 sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
607 * using the hash function H as described by the MAC Identifier and 488 /* free auth if no more chunks, or add it back */
608 * the shared association key K based on the endpoint pair shared key 489 if (list_empty(&packet->chunk_list))
609 * described by the shared key identifier. The 'data' used for the 490 sctp_chunk_free(packet->auth);
610 * computation of the AUTH-chunk is given by the AUTH chunk with its 491 else
611 * HMAC field set to zero (as shown in Figure 6) followed by all
612 * chunks that are placed after the AUTH chunk in the SCTP packet.
613 */
614 if (auth)
615 sctp_auth_calculate_hmac(asoc, nskb,
616 (struct sctp_auth_chunk *)auth,
617 gfp);
618
619 if (packet->auth) {
620 if (!list_empty(&packet->chunk_list)) {
621 /* We will generate more packets, so re-queue
622 * auth chunk.
623 */
624 list_add(&packet->auth->list, 492 list_add(&packet->auth->list,
625 &packet->chunk_list); 493 &packet->chunk_list);
626 } else {
627 sctp_chunk_free(packet->auth);
628 packet->auth = NULL;
629 }
630 } 494 }
631 495
632 if (!gso) 496 if (gso) {
633 break; 497 if (skb_gro_receive(&head, nskb)) {
634 498 kfree_skb(nskb);
635 if (skb_gro_receive(&head, nskb)) { 499 return 0;
636 kfree_skb(nskb); 500 }
637 goto nomem; 501 if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
502 sk->sk_gso_max_segs))
503 return 0;
638 } 504 }
639 nskb = NULL; 505
640 if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= 506 pkt_count++;
641 sk->sk_gso_max_segs))
642 goto nomem;
643 } while (!list_empty(&packet->chunk_list)); 507 } while (!list_empty(&packet->chunk_list));
644 508
645 /* 2) Calculate the Adler-32 checksum of the whole packet, 509 if (gso) {
646 * including the SCTP common header and all the 510 memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
647 * chunks. 511 sizeof(struct inet6_skb_parm)));
648 * 512 skb_shinfo(head)->gso_segs = pkt_count;
649 * Note: Adler-32 is no longer applicable, as has been replaced 513 skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
650 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. 514 rcu_read_lock();
651 * 515 if (skb_dst(head) != tp->dst) {
652 * If it's a GSO packet, it's postponed to sctp_skb_segment. 516 dst_hold(tp->dst);
653 */ 517 sk_setup_caps(sk, tp->dst);
654 if (!sctp_checksum_disable || gso) {
655 if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
656 dst_xfrm(dst) || packet->ipfragok)) {
657 sh->checksum = sctp_compute_cksum(head, 0);
658 } else {
659 /* no need to seed pseudo checksum for SCTP */
660 head->ip_summed = CHECKSUM_PARTIAL;
661 head->csum_start = skb_transport_header(head) - head->head;
662 head->csum_offset = offsetof(struct sctphdr, checksum);
663 } 518 }
519 rcu_read_unlock();
520 goto chksum;
664 } 521 }
665 522
666 /* IP layer ECN support 523 if (sctp_checksum_disable)
667 * From RFC 2481 524 return 1;
668 * "The ECN-Capable Transport (ECT) bit would be set by the
669 * data sender to indicate that the end-points of the
670 * transport protocol are ECN-capable."
671 *
672 * Now setting the ECT bit all the time, as it should not cause
673 * any problems protocol-wise even if our peer ignores it.
674 *
675 * Note: The works for IPv6 layer checks this bit too later
676 * in transmission. See IP6_ECN_flow_xmit().
677 */
678 tp->af_specific->ecn_capable(sk);
679 525
680 /* Set up the IP options. */ 526 if (!(skb_dst(head)->dev->features & NETIF_F_SCTP_CRC) ||
681 /* BUG: not implemented 527 dst_xfrm(skb_dst(head)) || packet->ipfragok) {
682 * For v4 this all lives somewhere in sk->sk_opt... 528 struct sctphdr *sh =
683 */ 529 (struct sctphdr *)skb_transport_header(head);
684 530
685 /* Dump that on IP! */ 531 sh->checksum = sctp_compute_cksum(head, 0);
686 if (asoc) { 532 } else {
687 asoc->stats.opackets += pktcount; 533chksum:
688 if (asoc->peer.last_sent_to != tp) 534 head->ip_summed = CHECKSUM_PARTIAL;
689 /* Considering the multiple CPU scenario, this is a 535 head->csum_start = skb_transport_header(head) - head->head;
690 * "correcter" place for last_sent_to. --xguo 536 head->csum_offset = offsetof(struct sctphdr, checksum);
691 */
692 asoc->peer.last_sent_to = tp;
693 } 537 }
694 538
695 if (has_data) { 539 return pkt_count;
696 struct timer_list *timer; 540}
697 unsigned long timeout; 541
542/* All packets are sent to the network through this function from
543 * sctp_outq_tail().
544 *
545 * The return value is always 0 for now.
546 */
547int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
548{
549 struct sctp_transport *tp = packet->transport;
550 struct sctp_association *asoc = tp->asoc;
551 struct sctp_chunk *chunk, *tmp;
552 int pkt_count, gso = 0;
553 struct dst_entry *dst;
554 struct sk_buff *head;
555 struct sctphdr *sh;
556 struct sock *sk;
698 557
699 /* Restart the AUTOCLOSE timer when sending data. */ 558 pr_debug("%s: packet:%p\n", __func__, packet);
700 if (sctp_state(asoc, ESTABLISHED) && 559 if (list_empty(&packet->chunk_list))
701 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { 560 return 0;
702 timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; 561 chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
703 timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; 562 sk = chunk->skb->sk;
704 563
705 if (!mod_timer(timer, jiffies + timeout)) 564 /* check gso */
706 sctp_association_hold(asoc); 565 if (packet->size > tp->pathmtu && !packet->ipfragok) {
566 if (!sk_can_gso(sk)) {
567 pr_err_once("Trying to GSO but underlying device doesn't support it.");
568 goto out;
707 } 569 }
570 gso = 1;
571 }
572
573 /* alloc head skb */
574 head = alloc_skb((gso ? packet->overhead : packet->size) +
575 MAX_HEADER, gfp);
576 if (!head)
577 goto out;
578 skb_reserve(head, packet->overhead + MAX_HEADER);
579 sctp_packet_set_owner_w(head, sk);
580
581 /* set sctp header */
582 sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
583 skb_reset_transport_header(head);
584 sh->source = htons(packet->source_port);
585 sh->dest = htons(packet->destination_port);
586 sh->vtag = htonl(packet->vtag);
587 sh->checksum = 0;
588
589 /* update dst if in need */
590 if (!sctp_transport_dst_check(tp)) {
591 sctp_transport_route(tp, NULL, sctp_sk(sk));
592 if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE)
593 sctp_assoc_sync_pmtu(sk, asoc);
708 } 594 }
595 dst = dst_clone(tp->dst);
596 if (!dst) {
597 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
598 kfree_skb(head);
599 goto out;
600 }
601 skb_dst_set(head, dst);
709 602
603 /* pack up chunks */
604 pkt_count = sctp_packet_pack(packet, head, gso, gfp);
605 if (!pkt_count) {
606 kfree_skb(head);
607 goto out;
608 }
710 pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len); 609 pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
711 610
712 if (gso) { 611 /* start autoclose timer */
713 /* Cleanup our debris for IP stacks */ 612 if (packet->has_data && sctp_state(asoc, ESTABLISHED) &&
714 memset(head->cb, 0, max(sizeof(struct inet_skb_parm), 613 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
715 sizeof(struct inet6_skb_parm))); 614 struct timer_list *timer =
615 &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
616 unsigned long timeout =
617 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
716 618
717 skb_shinfo(head)->gso_segs = pktcount; 619 if (!mod_timer(timer, jiffies + timeout))
718 skb_shinfo(head)->gso_size = GSO_BY_FRAGS; 620 sctp_association_hold(asoc);
621 }
719 622
720 /* We have to refresh this in case we are xmiting to 623 /* sctp xmit */
721 * more than one transport at a time 624 tp->af_specific->ecn_capable(sk);
722 */ 625 if (asoc) {
723 rcu_read_lock(); 626 asoc->stats.opackets += pkt_count;
724 if (__sk_dst_get(sk) != tp->dst) { 627 if (asoc->peer.last_sent_to != tp)
725 dst_hold(tp->dst); 628 asoc->peer.last_sent_to = tp;
726 sk_setup_caps(sk, tp->dst);
727 }
728 rcu_read_unlock();
729 } 629 }
730 head->ignore_df = packet->ipfragok; 630 head->ignore_df = packet->ipfragok;
731 tp->af_specific->sctp_xmit(head, tp); 631 tp->af_specific->sctp_xmit(head, tp);
732 goto out;
733
734nomem:
735 if (packet->auth && list_empty(&packet->auth->list))
736 sctp_chunk_free(packet->auth);
737
738nodst:
739 /* FIXME: Returning the 'err' will effect all the associations
740 * associated with a socket, although only one of the paths of the
741 * association is unreachable.
742 * The real failure of a transport or association can be passed on
743 * to the user via notifications. So setting this error may not be
744 * required.
745 */
746 /* err = -EHOSTUNREACH; */
747 kfree_skb(head);
748 632
749err: 633out:
750 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { 634 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
751 list_del_init(&chunk->list); 635 list_del_init(&chunk->list);
752 if (!sctp_chunk_is_data(chunk)) 636 if (!sctp_chunk_is_data(chunk))
753 sctp_chunk_free(chunk); 637 sctp_chunk_free(chunk);
754 } 638 }
755
756out:
757 sctp_packet_reset(packet); 639 sctp_packet_reset(packet);
758 return err; 640 return 0;
759} 641}
760 642
761/******************************************************************** 643/********************************************************************
@@ -871,9 +753,6 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
871 rwnd = 0; 753 rwnd = 0;
872 754
873 asoc->peer.rwnd = rwnd; 755 asoc->peer.rwnd = rwnd;
874 /* Has been accepted for transmission. */
875 if (!asoc->peer.prsctp_capable)
876 chunk->msg->can_abandon = 0;
877 sctp_chunk_assign_tsn(chunk); 756 sctp_chunk_assign_tsn(chunk);
878 sctp_chunk_assign_ssn(chunk); 757 sctp_chunk_assign_ssn(chunk);
879} 758}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 582585393d35..e54082699520 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -507,8 +507,6 @@ void sctp_retransmit_mark(struct sctp_outq *q,
507 transport->rto_pending = 0; 507 transport->rto_pending = 0;
508 } 508 }
509 509
510 chunk->resent = 1;
511
512 /* Move the chunk to the retransmit queue. The chunks 510 /* Move the chunk to the retransmit queue. The chunks
513 * on the retransmit queue are always kept in order. 511 * on the retransmit queue are always kept in order.
514 */ 512 */
@@ -1439,7 +1437,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1439 * instance). 1437 * instance).
1440 */ 1438 */
1441 if (!tchunk->tsn_gap_acked && 1439 if (!tchunk->tsn_gap_acked &&
1442 !tchunk->resent && 1440 !sctp_chunk_retransmitted(tchunk) &&
1443 tchunk->rtt_in_progress) { 1441 tchunk->rtt_in_progress) {
1444 tchunk->rtt_in_progress = 0; 1442 tchunk->rtt_in_progress = 0;
1445 rtt = jiffies - tchunk->sent_at; 1443 rtt = jiffies - tchunk->sent_at;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 7b523e3f551f..616a9428e0c4 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -205,26 +205,30 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
205 list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { 205 list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
206 if (!addr->valid) 206 if (!addr->valid)
207 continue; 207 continue;
208 if (sctp_in_scope(net, &addr->a, scope)) { 208 if (!sctp_in_scope(net, &addr->a, scope))
209 /* Now that the address is in scope, check to see if 209 continue;
210 * the address type is really supported by the local 210
211 * sock as well as the remote peer. 211 /* Now that the address is in scope, check to see if
212 */ 212 * the address type is really supported by the local
213 if ((((AF_INET == addr->a.sa.sa_family) && 213 * sock as well as the remote peer.
214 (copy_flags & SCTP_ADDR4_PEERSUPP))) || 214 */
215 (((AF_INET6 == addr->a.sa.sa_family) && 215 if (addr->a.sa.sa_family == AF_INET &&
216 (copy_flags & SCTP_ADDR6_ALLOWED) && 216 !(copy_flags & SCTP_ADDR4_PEERSUPP))
217 (copy_flags & SCTP_ADDR6_PEERSUPP)))) { 217 continue;
218 error = sctp_add_bind_addr(bp, &addr->a, 218 if (addr->a.sa.sa_family == AF_INET6 &&
219 sizeof(addr->a), 219 (!(copy_flags & SCTP_ADDR6_ALLOWED) ||
220 SCTP_ADDR_SRC, GFP_ATOMIC); 220 !(copy_flags & SCTP_ADDR6_PEERSUPP)))
221 if (error) 221 continue;
222 goto end_copy; 222
223 } 223 if (sctp_bind_addr_state(bp, &addr->a) != -1)
224 } 224 continue;
225
226 error = sctp_add_bind_addr(bp, &addr->a, sizeof(addr->a),
227 SCTP_ADDR_SRC, GFP_ATOMIC);
228 if (error)
229 break;
225 } 230 }
226 231
227end_copy:
228 rcu_read_unlock(); 232 rcu_read_unlock();
229 return error; 233 return error;
230} 234}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f23ad913dc7a..318c6786d653 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4392,10 +4392,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter)
4392{ 4392{
4393 int err; 4393 int err;
4394 4394
4395 err = rhashtable_walk_init(&sctp_transport_hashtable, iter, 4395 rhltable_walk_enter(&sctp_transport_hashtable, iter);
4396 GFP_KERNEL);
4397 if (err)
4398 return err;
4399 4396
4400 err = rhashtable_walk_start(iter); 4397 err = rhashtable_walk_start(iter);
4401 if (err && err != -EAGAIN) { 4398 if (err && err != -EAGAIN) {
@@ -4475,18 +4472,17 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
4475 const union sctp_addr *paddr, void *p) 4472 const union sctp_addr *paddr, void *p)
4476{ 4473{
4477 struct sctp_transport *transport; 4474 struct sctp_transport *transport;
4478 int err = -ENOENT; 4475 int err;
4479 4476
4480 rcu_read_lock(); 4477 rcu_read_lock();
4481 transport = sctp_addrs_lookup_transport(net, laddr, paddr); 4478 transport = sctp_addrs_lookup_transport(net, laddr, paddr);
4482 if (!transport || !sctp_transport_hold(transport))
4483 goto out;
4484
4485 rcu_read_unlock(); 4479 rcu_read_unlock();
4480 if (!transport)
4481 return -ENOENT;
4482
4486 err = cb(transport, p); 4483 err = cb(transport, p);
4487 sctp_transport_put(transport); 4484 sctp_transport_put(transport);
4488 4485
4489out:
4490 return err; 4486 return err;
4491} 4487}
4492EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); 4488EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ce54dce13ddb..a1652ab63918 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
72 */ 72 */
73 peer->rto = msecs_to_jiffies(net->sctp.rto_initial); 73 peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
74 74
75 peer->last_time_heard = ktime_set(0, 0); 75 peer->last_time_heard = 0;
76 peer->last_time_ecne_reduced = jiffies; 76 peer->last_time_ecne_reduced = jiffies;
77 77
78 peer->param_flags = SPP_HB_DISABLE | 78 peer->param_flags = SPP_HB_DISABLE |
diff --git a/net/socket.c b/net/socket.c
index 73dc69f9681e..8487bf136e5c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -90,7 +90,7 @@
90#include <linux/slab.h> 90#include <linux/slab.h>
91#include <linux/xattr.h> 91#include <linux/xattr.h>
92 92
93#include <asm/uaccess.h> 93#include <linux/uaccess.h>
94#include <asm/unistd.h> 94#include <asm/unistd.h>
95 95
96#include <net/compat.h> 96#include <net/compat.h>
@@ -533,8 +533,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
533 return used; 533 return used;
534} 534}
535 535
536int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
537{
538 int err = simple_setattr(dentry, iattr);
539
540 if (!err) {
541 struct socket *sock = SOCKET_I(d_inode(dentry));
542
543 sock->sk->sk_uid = iattr->ia_uid;
544 }
545
546 return err;
547}
548
536static const struct inode_operations sockfs_inode_ops = { 549static const struct inode_operations sockfs_inode_ops = {
537 .listxattr = sockfs_listxattr, 550 .listxattr = sockfs_listxattr,
551 .setattr = sockfs_setattr,
538}; 552};
539 553
540/** 554/**
@@ -654,7 +668,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
654 668
655 /* Race occurred between timestamp enabling and packet 669 /* Race occurred between timestamp enabling and packet
656 receiving. Fill in the current time for now. */ 670 receiving. Fill in the current time for now. */
657 if (need_software_tstamp && skb->tstamp.tv64 == 0) 671 if (need_software_tstamp && skb->tstamp == 0)
658 __net_timestamp(skb); 672 __net_timestamp(skb);
659 673
660 if (need_software_tstamp) { 674 if (need_software_tstamp) {
@@ -679,9 +693,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
679 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && 693 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
680 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) 694 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
681 empty = 0; 695 empty = 0;
682 if (!empty) 696 if (!empty) {
683 put_cmsg(msg, SOL_SOCKET, 697 put_cmsg(msg, SOL_SOCKET,
684 SCM_TIMESTAMPING, sizeof(tss), &tss); 698 SCM_TIMESTAMPING, sizeof(tss), &tss);
699
700 if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
701 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
702 skb->len, skb->data);
703 }
685} 704}
686EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 705EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
687 706
@@ -892,6 +911,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
892 * what to do with it - that's up to the protocol still. 911 * what to do with it - that's up to the protocol still.
893 */ 912 */
894 913
914static struct ns_common *get_net_ns(struct ns_common *ns)
915{
916 return &get_net(container_of(ns, struct net, ns))->ns;
917}
918
895static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 919static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
896{ 920{
897 struct socket *sock; 921 struct socket *sock;
@@ -960,6 +984,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
960 err = dlci_ioctl_hook(cmd, argp); 984 err = dlci_ioctl_hook(cmd, argp);
961 mutex_unlock(&dlci_ioctl_mutex); 985 mutex_unlock(&dlci_ioctl_mutex);
962 break; 986 break;
987 case SIOCGSKNS:
988 err = -EPERM;
989 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
990 break;
991
992 err = open_related_ns(&net->ns, get_net_ns);
993 break;
963 default: 994 default:
964 err = sock_do_ioctl(net, sock, cmd, arg); 995 err = sock_do_ioctl(net, sock, cmd, arg);
965 break; 996 break;
@@ -1887,7 +1918,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
1887 struct sockaddr_storage address; 1918 struct sockaddr_storage address;
1888 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1919 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1889 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1920 unsigned char ctl[sizeof(struct cmsghdr) + 20]
1890 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1921 __aligned(sizeof(__kernel_size_t));
1891 /* 20 is size of ipv6_pktinfo */ 1922 /* 20 is size of ipv6_pktinfo */
1892 unsigned char *ctl_buf = ctl; 1923 unsigned char *ctl_buf = ctl;
1893 int ctl_len; 1924 int ctl_len;
@@ -3110,6 +3141,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3110 case SIOCSIFVLAN: 3141 case SIOCSIFVLAN:
3111 case SIOCADDDLCI: 3142 case SIOCADDDLCI:
3112 case SIOCDELDLCI: 3143 case SIOCDELDLCI:
3144 case SIOCGSKNS:
3113 return sock_ioctl(file, cmd, arg); 3145 return sock_ioctl(file, cmd, arg);
3114 3146
3115 case SIOCGIFFLAGS: 3147 case SIOCGIFFLAGS:
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3dfd769dc5b5..cdeb1d814833 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -50,7 +50,7 @@
50#include <linux/workqueue.h> 50#include <linux/workqueue.h>
51#include <linux/sunrpc/rpc_pipe_fs.h> 51#include <linux/sunrpc/rpc_pipe_fs.h>
52#include <linux/sunrpc/gss_api.h> 52#include <linux/sunrpc/gss_api.h>
53#include <asm/uaccess.h> 53#include <linux/uaccess.h>
54#include <linux/hashtable.h> 54#include <linux/hashtable.h>
55 55
56#include "../netns.h" 56#include "../netns.h"
@@ -541,9 +541,13 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
541 return gss_new; 541 return gss_new;
542 gss_msg = gss_add_msg(gss_new); 542 gss_msg = gss_add_msg(gss_new);
543 if (gss_msg == gss_new) { 543 if (gss_msg == gss_new) {
544 int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); 544 int res;
545 atomic_inc(&gss_msg->count);
546 res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg);
545 if (res) { 547 if (res) {
546 gss_unhash_msg(gss_new); 548 gss_unhash_msg(gss_new);
549 atomic_dec(&gss_msg->count);
550 gss_release_msg(gss_new);
547 gss_msg = ERR_PTR(res); 551 gss_msg = ERR_PTR(res);
548 } 552 }
549 } else 553 } else
@@ -836,6 +840,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
836 warn_gssd(); 840 warn_gssd();
837 gss_release_msg(gss_msg); 841 gss_release_msg(gss_msg);
838 } 842 }
843 gss_release_msg(gss_msg);
839} 844}
840 845
841static void gss_pipe_dentry_destroy(struct dentry *dir, 846static void gss_pipe_dentry_destroy(struct dentry *dir,
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 90115ceefd49..fb39284ec174 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -200,7 +200,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
200 if (IS_ERR(hmac_md5)) 200 if (IS_ERR(hmac_md5))
201 goto out_free_md5; 201 goto out_free_md5;
202 202
203 req = ahash_request_alloc(md5, GFP_KERNEL); 203 req = ahash_request_alloc(md5, GFP_NOFS);
204 if (!req) 204 if (!req)
205 goto out_free_hmac_md5; 205 goto out_free_hmac_md5;
206 206
@@ -230,7 +230,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
230 goto out; 230 goto out;
231 231
232 ahash_request_free(req); 232 ahash_request_free(req);
233 req = ahash_request_alloc(hmac_md5, GFP_KERNEL); 233 req = ahash_request_alloc(hmac_md5, GFP_NOFS);
234 if (!req) 234 if (!req)
235 goto out_free_hmac_md5; 235 goto out_free_hmac_md5;
236 236
@@ -299,7 +299,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
299 if (IS_ERR(tfm)) 299 if (IS_ERR(tfm))
300 goto out_free_cksum; 300 goto out_free_cksum;
301 301
302 req = ahash_request_alloc(tfm, GFP_KERNEL); 302 req = ahash_request_alloc(tfm, GFP_NOFS);
303 if (!req) 303 if (!req)
304 goto out_free_ahash; 304 goto out_free_ahash;
305 305
@@ -397,7 +397,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
397 goto out_free_cksum; 397 goto out_free_cksum;
398 checksumlen = crypto_ahash_digestsize(tfm); 398 checksumlen = crypto_ahash_digestsize(tfm);
399 399
400 req = ahash_request_alloc(tfm, GFP_KERNEL); 400 req = ahash_request_alloc(tfm, GFP_NOFS);
401 if (!req) 401 if (!req)
402 goto out_free_ahash; 402 goto out_free_ahash;
403 403
@@ -963,7 +963,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
963 } 963 }
964 964
965 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), 965 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
966 GFP_KERNEL); 966 GFP_NOFS);
967 if (!desc) { 967 if (!desc) {
968 dprintk("%s: failed to allocate shash descriptor for '%s'\n", 968 dprintk("%s: failed to allocate shash descriptor for '%s'\n",
969 __func__, kctx->gk5e->cksum_name); 969 __func__, kctx->gk5e->cksum_name);
@@ -1030,7 +1030,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
1030 } 1030 }
1031 1031
1032 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), 1032 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac),
1033 GFP_KERNEL); 1033 GFP_NOFS);
1034 if (!desc) { 1034 if (!desc) {
1035 dprintk("%s: failed to allocate shash descriptor for '%s'\n", 1035 dprintk("%s: failed to allocate shash descriptor for '%s'\n",
1036 __func__, kctx->gk5e->cksum_name); 1036 __func__, kctx->gk5e->cksum_name);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 60595835317a..7bb2514aadd9 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -451,8 +451,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
451 goto out_err_free_hmac; 451 goto out_err_free_hmac;
452 452
453 453
454 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), 454 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), GFP_NOFS);
455 GFP_KERNEL);
456 if (!desc) { 455 if (!desc) {
457 dprintk("%s: failed to allocate hash descriptor for '%s'\n", 456 dprintk("%s: failed to allocate hash descriptor for '%s'\n",
458 __func__, ctx->gk5e->cksum_name); 457 __func__, ctx->gk5e->cksum_name);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 45662d7f0943..886e9d381771 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1548,7 +1548,7 @@ complete:
1548 ret = SVC_COMPLETE; 1548 ret = SVC_COMPLETE;
1549 goto out; 1549 goto out;
1550drop: 1550drop:
1551 ret = SVC_DROP; 1551 ret = SVC_CLOSE;
1552out: 1552out:
1553 if (rsci) 1553 if (rsci)
1554 cache_put(&rsci->h, sn->rsc_cache); 1554 cache_put(&rsci->h, sn->rsc_cache);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8aabe12201f8..8147e8d56eb2 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -21,7 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/string_helpers.h> 23#include <linux/string_helpers.h>
24#include <asm/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/poll.h> 25#include <linux/poll.h>
26#include <linux/seq_file.h> 26#include <linux/seq_file.h>
27#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 62a482790937..1efbe48e794f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1926,6 +1926,8 @@ call_connect_status(struct rpc_task *task)
1926 case -EADDRINUSE: 1926 case -EADDRINUSE:
1927 case -ENOBUFS: 1927 case -ENOBUFS:
1928 case -EPIPE: 1928 case -EPIPE:
1929 xprt_conditional_disconnect(task->tk_rqstp->rq_xprt,
1930 task->tk_rqstp->rq_connect_cookie);
1929 if (RPC_IS_SOFTCONN(task)) 1931 if (RPC_IS_SOFTCONN(task))
1930 break; 1932 break;
1931 /* retry with existing socket, after a delay */ 1933 /* retry with existing socket, after a delay */
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index df5826876535..394ce523174c 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -34,7 +34,7 @@ struct sunrpc_net {
34 struct proc_dir_entry *use_gssp_proc; 34 struct proc_dir_entry *use_gssp_proc;
35}; 35};
36 36
37extern int sunrpc_net_id; 37extern unsigned int sunrpc_net_id;
38 38
39int ip_map_cache_create(struct net *); 39int ip_map_cache_create(struct net *);
40void ip_map_cache_destroy(struct net *); 40void ip_map_cache_destroy(struct net *);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 2ecb994314c1..caeb01ad2b5a 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -157,15 +157,17 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
157 spin_lock(&op_metrics->om_lock); 157 spin_lock(&op_metrics->om_lock);
158 158
159 op_metrics->om_ops++; 159 op_metrics->om_ops++;
160 op_metrics->om_ntrans += req->rq_ntrans; 160 /* kernel API: om_ops must never become larger than om_ntrans */
161 op_metrics->om_ntrans += max(req->rq_ntrans, 1);
161 op_metrics->om_timeouts += task->tk_timeouts; 162 op_metrics->om_timeouts += task->tk_timeouts;
162 163
163 op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent; 164 op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
164 op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; 165 op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
165 166
166 delta = ktime_sub(req->rq_xtime, task->tk_start); 167 if (ktime_to_ns(req->rq_xtime)) {
167 op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta); 168 delta = ktime_sub(req->rq_xtime, task->tk_start);
168 169 op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
170 }
169 op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); 171 op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
170 172
171 delta = ktime_sub(now, task->tk_start); 173 delta = ktime_sub(now, task->tk_start);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index ee5d3d253102..d1c330a7953a 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -24,7 +24,7 @@
24 24
25#include "netns.h" 25#include "netns.h"
26 26
27int sunrpc_net_id; 27unsigned int sunrpc_net_id;
28EXPORT_SYMBOL_GPL(sunrpc_net_id); 28EXPORT_SYMBOL_GPL(sunrpc_net_id);
29 29
30static __net_init int sunrpc_init_net(struct net *net) 30static __net_init int sunrpc_init_net(struct net *net)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 7c8070ec93c8..75f290bddca1 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1155,8 +1155,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1155 case SVC_DENIED: 1155 case SVC_DENIED:
1156 goto err_bad_auth; 1156 goto err_bad_auth;
1157 case SVC_CLOSE: 1157 case SVC_CLOSE:
1158 if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) 1158 goto close;
1159 svc_close_xprt(rqstp->rq_xprt);
1160 case SVC_DROP: 1159 case SVC_DROP:
1161 goto dropit; 1160 goto dropit;
1162 case SVC_COMPLETE: 1161 case SVC_COMPLETE:
@@ -1246,7 +1245,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1246 1245
1247 sendit: 1246 sendit:
1248 if (svc_authorise(rqstp)) 1247 if (svc_authorise(rqstp))
1249 goto dropit; 1248 goto close;
1250 return 1; /* Caller can now send it */ 1249 return 1; /* Caller can now send it */
1251 1250
1252 dropit: 1251 dropit:
@@ -1254,11 +1253,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1254 dprintk("svc: svc_process dropit\n"); 1253 dprintk("svc: svc_process dropit\n");
1255 return 0; 1254 return 0;
1256 1255
1256 close:
1257 if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
1258 svc_close_xprt(rqstp->rq_xprt);
1259 dprintk("svc: svc_process close\n");
1260 return 0;
1261
1257err_short_len: 1262err_short_len:
1258 svc_printk(rqstp, "short len %Zd, dropping request\n", 1263 svc_printk(rqstp, "short len %Zd, dropping request\n",
1259 argv->iov_len); 1264 argv->iov_len);
1260 1265 goto close;
1261 goto dropit; /* drop request */
1262 1266
1263err_bad_rpc: 1267err_bad_rpc:
1264 serv->sv_stats->rpcbadfmt++; 1268 serv->sv_stats->rpcbadfmt++;
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 69841db1f533..e112da8005b5 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -124,8 +124,7 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister);
124#define DN_HASHMAX (1<<DN_HASHBITS) 124#define DN_HASHMAX (1<<DN_HASHBITS)
125 125
126static struct hlist_head auth_domain_table[DN_HASHMAX]; 126static struct hlist_head auth_domain_table[DN_HASHMAX];
127static spinlock_t auth_domain_lock = 127static DEFINE_SPINLOCK(auth_domain_lock);
128 __SPIN_LOCK_UNLOCKED(auth_domain_lock);
129 128
130void auth_domain_put(struct auth_domain *dom) 129void auth_domain_put(struct auth_domain *dom)
131{ 130{
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a4bc98265d88..de066acdb34e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -39,9 +39,10 @@
39#include <net/checksum.h> 39#include <net/checksum.h>
40#include <net/ip.h> 40#include <net/ip.h>
41#include <net/ipv6.h> 41#include <net/ipv6.h>
42#include <net/udp.h>
42#include <net/tcp.h> 43#include <net/tcp.h>
43#include <net/tcp_states.h> 44#include <net/tcp_states.h>
44#include <asm/uaccess.h> 45#include <linux/uaccess.h>
45#include <asm/ioctls.h> 46#include <asm/ioctls.h>
46#include <trace/events/skb.h> 47#include <trace/events/skb.h>
47 48
@@ -129,6 +130,18 @@ static void svc_release_skb(struct svc_rqst *rqstp)
129 } 130 }
130} 131}
131 132
133static void svc_release_udp_skb(struct svc_rqst *rqstp)
134{
135 struct sk_buff *skb = rqstp->rq_xprt_ctxt;
136
137 if (skb) {
138 rqstp->rq_xprt_ctxt = NULL;
139
140 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
141 consume_skb(skb);
142 }
143}
144
132union svc_pktinfo_u { 145union svc_pktinfo_u {
133 struct in_pktinfo pkti; 146 struct in_pktinfo pkti;
134 struct in6_pktinfo pkti6; 147 struct in6_pktinfo pkti6;
@@ -549,7 +562,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
549 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 562 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
550 0, 0, MSG_PEEK | MSG_DONTWAIT); 563 0, 0, MSG_PEEK | MSG_DONTWAIT);
551 if (err >= 0) 564 if (err >= 0)
552 skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); 565 skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
553 566
554 if (skb == NULL) { 567 if (skb == NULL) {
555 if (err != -EAGAIN) { 568 if (err != -EAGAIN) {
@@ -561,7 +574,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
561 } 574 }
562 len = svc_addr_len(svc_addr(rqstp)); 575 len = svc_addr_len(svc_addr(rqstp));
563 rqstp->rq_addrlen = len; 576 rqstp->rq_addrlen = len;
564 if (skb->tstamp.tv64 == 0) { 577 if (skb->tstamp == 0) {
565 skb->tstamp = ktime_get_real(); 578 skb->tstamp = ktime_get_real();
566 /* Don't enable netstamp, sunrpc doesn't 579 /* Don't enable netstamp, sunrpc doesn't
567 need that much accuracy */ 580 need that much accuracy */
@@ -590,7 +603,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
590 goto out_free; 603 goto out_free;
591 } 604 }
592 local_bh_enable(); 605 local_bh_enable();
593 skb_free_datagram_locked(svsk->sk_sk, skb); 606 consume_skb(skb);
594 } else { 607 } else {
595 /* we can use it in-place */ 608 /* we can use it in-place */
596 rqstp->rq_arg.head[0].iov_base = skb->data; 609 rqstp->rq_arg.head[0].iov_base = skb->data;
@@ -617,8 +630,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
617 630
618 return len; 631 return len;
619out_free: 632out_free:
620 trace_kfree_skb(skb, svc_udp_recvfrom); 633 kfree_skb(skb);
621 skb_free_datagram_locked(svsk->sk_sk, skb);
622 return 0; 634 return 0;
623} 635}
624 636
@@ -679,7 +691,7 @@ static struct svc_xprt_ops svc_udp_ops = {
679 .xpo_create = svc_udp_create, 691 .xpo_create = svc_udp_create,
680 .xpo_recvfrom = svc_udp_recvfrom, 692 .xpo_recvfrom = svc_udp_recvfrom,
681 .xpo_sendto = svc_udp_sendto, 693 .xpo_sendto = svc_udp_sendto,
682 .xpo_release_rqst = svc_release_skb, 694 .xpo_release_rqst = svc_release_udp_skb,
683 .xpo_detach = svc_sock_detach, 695 .xpo_detach = svc_sock_detach,
684 .xpo_free = svc_sock_free, 696 .xpo_free = svc_sock_free,
685 .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, 697 .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index c88d9bc06f5c..8c3936403fea 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -14,7 +14,7 @@
14#include <linux/sysctl.h> 14#include <linux/sysctl.h>
15#include <linux/module.h> 15#include <linux/module.h>
16 16
17#include <asm/uaccess.h> 17#include <linux/uaccess.h>
18#include <linux/sunrpc/types.h> 18#include <linux/sunrpc/types.h>
19#include <linux/sunrpc/sched.h> 19#include <linux/sunrpc/sched.h>
20#include <linux/sunrpc/stats.h> 20#include <linux/sunrpc/stats.h>
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 685e6d225414..9a6be030ca7d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -669,7 +669,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
669 spin_lock_bh(&xprt->transport_lock); 669 spin_lock_bh(&xprt->transport_lock);
670 if (cookie != xprt->connect_cookie) 670 if (cookie != xprt->connect_cookie)
671 goto out; 671 goto out;
672 if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt)) 672 if (test_bit(XPRT_CLOSING, &xprt->state))
673 goto out; 673 goto out;
674 set_bit(XPRT_CLOSE_WAIT, &xprt->state); 674 set_bit(XPRT_CLOSE_WAIT, &xprt->state);
675 /* Try to schedule an autoclose RPC call */ 675 /* Try to schedule an autoclose RPC call */
@@ -772,6 +772,7 @@ void xprt_connect(struct rpc_task *task)
772 if (!xprt_connected(xprt)) { 772 if (!xprt_connected(xprt)) {
773 task->tk_rqstp->rq_bytes_sent = 0; 773 task->tk_rqstp->rq_bytes_sent = 0;
774 task->tk_timeout = task->tk_rqstp->rq_timeout; 774 task->tk_timeout = task->tk_rqstp->rq_timeout;
775 task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
775 rpc_sleep_on(&xprt->pending, task, xprt_connect_status); 776 rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
776 777
777 if (test_bit(XPRT_CLOSING, &xprt->state)) 778 if (test_bit(XPRT_CLOSING, &xprt->state))
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 2c472e1b4827..24fedd4b117e 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -55,7 +55,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
55 if (IS_ERR(rb)) 55 if (IS_ERR(rb))
56 goto out_fail; 56 goto out_fail;
57 req->rl_sendbuf = rb; 57 req->rl_sendbuf = rb;
58 xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size); 58 xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
59 min_t(size_t, size, PAGE_SIZE));
59 rpcrdma_set_xprtdata(rqst, req); 60 rpcrdma_set_xprtdata(rqst, req);
60 return 0; 61 return 0;
61 62
@@ -191,6 +192,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
191 size_t maxmsg; 192 size_t maxmsg;
192 193
193 maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize); 194 maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize);
195 maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
194 return maxmsg - RPCRDMA_HDRLEN_MIN; 196 return maxmsg - RPCRDMA_HDRLEN_MIN;
195} 197}
196 198
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 26b26beef2d4..47bed5333c7f 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -101,7 +101,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
101 struct rpcrdma_frmr *f = &r->frmr; 101 struct rpcrdma_frmr *f = &r->frmr;
102 int rc; 102 int rc;
103 103
104 f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth); 104 f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
105 if (IS_ERR(f->fr_mr)) 105 if (IS_ERR(f->fr_mr))
106 goto out_mr_err; 106 goto out_mr_err;
107 107
@@ -157,7 +157,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
157 return rc; 157 return rc;
158 } 158 }
159 159
160 f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, 160 f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
161 ia->ri_max_frmr_depth); 161 ia->ri_max_frmr_depth);
162 if (IS_ERR(f->fr_mr)) { 162 if (IS_ERR(f->fr_mr)) {
163 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", 163 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
@@ -171,10 +171,6 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
171} 171}
172 172
173/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. 173/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
174 *
175 * There's no recovery if this fails. The FRMR is abandoned, but
176 * remains in rb_all. It will be cleaned up when the transport is
177 * destroyed.
178 */ 174 */
179static void 175static void
180frwr_op_recover_mr(struct rpcrdma_mw *mw) 176frwr_op_recover_mr(struct rpcrdma_mw *mw)
@@ -210,11 +206,16 @@ static int
210frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 206frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
211 struct rpcrdma_create_data_internal *cdata) 207 struct rpcrdma_create_data_internal *cdata)
212{ 208{
209 struct ib_device_attr *attrs = &ia->ri_device->attrs;
213 int depth, delta; 210 int depth, delta;
214 211
212 ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
213 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
214 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
215
215 ia->ri_max_frmr_depth = 216 ia->ri_max_frmr_depth =
216 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 217 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
217 ia->ri_device->attrs.max_fast_reg_page_list_len); 218 attrs->max_fast_reg_page_list_len);
218 dprintk("RPC: %s: device's max FR page list len = %u\n", 219 dprintk("RPC: %s: device's max FR page list len = %u\n",
219 __func__, ia->ri_max_frmr_depth); 220 __func__, ia->ri_max_frmr_depth);
220 221
@@ -241,8 +242,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
241 } 242 }
242 243
243 ep->rep_attr.cap.max_send_wr *= depth; 244 ep->rep_attr.cap.max_send_wr *= depth;
244 if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) { 245 if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) {
245 cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth; 246 cdata->max_requests = attrs->max_qp_wr / depth;
246 if (!cdata->max_requests) 247 if (!cdata->max_requests)
247 return -EINVAL; 248 return -EINVAL;
248 ep->rep_attr.cap.max_send_wr = cdata->max_requests * 249 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
@@ -348,6 +349,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
348 int nsegs, bool writing, struct rpcrdma_mw **out) 349 int nsegs, bool writing, struct rpcrdma_mw **out)
349{ 350{
350 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 351 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
352 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
351 struct rpcrdma_mw *mw; 353 struct rpcrdma_mw *mw;
352 struct rpcrdma_frmr *frmr; 354 struct rpcrdma_frmr *frmr;
353 struct ib_mr *mr; 355 struct ib_mr *mr;
@@ -383,8 +385,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
383 385
384 ++seg; 386 ++seg;
385 ++i; 387 ++i;
386 388 if (holes_ok)
387 /* Check for holes */ 389 continue;
388 if ((i < nsegs && offset_in_page(seg->mr_offset)) || 390 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
389 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 391 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
390 break; 392 break;
@@ -421,7 +423,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
421 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 423 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
422 IB_ACCESS_REMOTE_READ; 424 IB_ACCESS_REMOTE_READ;
423 425
424 DECR_CQCOUNT(&r_xprt->rx_ep); 426 rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
425 rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr); 427 rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
426 if (rc) 428 if (rc)
427 goto out_senderr; 429 goto out_senderr;
@@ -451,26 +453,6 @@ out_senderr:
451 return -ENOTCONN; 453 return -ENOTCONN;
452} 454}
453 455
454static struct ib_send_wr *
455__frwr_prepare_linv_wr(struct rpcrdma_mw *mw)
456{
457 struct rpcrdma_frmr *f = &mw->frmr;
458 struct ib_send_wr *invalidate_wr;
459
460 dprintk("RPC: %s: invalidating frmr %p\n", __func__, f);
461
462 f->fr_state = FRMR_IS_INVALID;
463 invalidate_wr = &f->fr_invwr;
464
465 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
466 f->fr_cqe.done = frwr_wc_localinv;
467 invalidate_wr->wr_cqe = &f->fr_cqe;
468 invalidate_wr->opcode = IB_WR_LOCAL_INV;
469 invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
470
471 return invalidate_wr;
472}
473
474/* Invalidate all memory regions that were registered for "req". 456/* Invalidate all memory regions that were registered for "req".
475 * 457 *
476 * Sleeps until it is safe for the host CPU to access the 458 * Sleeps until it is safe for the host CPU to access the
@@ -481,12 +463,12 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw)
481static void 463static void
482frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 464frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
483{ 465{
484 struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; 466 struct ib_send_wr *first, **prev, *last, *bad_wr;
485 struct rpcrdma_rep *rep = req->rl_reply; 467 struct rpcrdma_rep *rep = req->rl_reply;
486 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 468 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
487 struct rpcrdma_mw *mw, *tmp; 469 struct rpcrdma_mw *mw, *tmp;
488 struct rpcrdma_frmr *f; 470 struct rpcrdma_frmr *f;
489 int rc; 471 int count, rc;
490 472
491 dprintk("RPC: %s: req %p\n", __func__, req); 473 dprintk("RPC: %s: req %p\n", __func__, req);
492 474
@@ -496,22 +478,29 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
496 * a single ib_post_send() call. 478 * a single ib_post_send() call.
497 */ 479 */
498 f = NULL; 480 f = NULL;
499 invalidate_wrs = pos = prev = NULL; 481 count = 0;
482 prev = &first;
500 list_for_each_entry(mw, &req->rl_registered, mw_list) { 483 list_for_each_entry(mw, &req->rl_registered, mw_list) {
484 mw->frmr.fr_state = FRMR_IS_INVALID;
485
501 if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) && 486 if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
502 (mw->mw_handle == rep->rr_inv_rkey)) { 487 (mw->mw_handle == rep->rr_inv_rkey))
503 mw->frmr.fr_state = FRMR_IS_INVALID;
504 continue; 488 continue;
505 }
506
507 pos = __frwr_prepare_linv_wr(mw);
508 489
509 if (!invalidate_wrs)
510 invalidate_wrs = pos;
511 else
512 prev->next = pos;
513 prev = pos;
514 f = &mw->frmr; 490 f = &mw->frmr;
491 dprintk("RPC: %s: invalidating frmr %p\n",
492 __func__, f);
493
494 f->fr_cqe.done = frwr_wc_localinv;
495 last = &f->fr_invwr;
496 memset(last, 0, sizeof(*last));
497 last->wr_cqe = &f->fr_cqe;
498 last->opcode = IB_WR_LOCAL_INV;
499 last->ex.invalidate_rkey = mw->mw_handle;
500 count++;
501
502 *prev = last;
503 prev = &last->next;
515 } 504 }
516 if (!f) 505 if (!f)
517 goto unmap; 506 goto unmap;
@@ -520,17 +509,22 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
520 * last WR in the chain completes, all WRs in the chain 509 * last WR in the chain completes, all WRs in the chain
521 * are complete. 510 * are complete.
522 */ 511 */
523 f->fr_invwr.send_flags = IB_SEND_SIGNALED; 512 last->send_flags = IB_SEND_SIGNALED;
524 f->fr_cqe.done = frwr_wc_localinv_wake; 513 f->fr_cqe.done = frwr_wc_localinv_wake;
525 reinit_completion(&f->fr_linv_done); 514 reinit_completion(&f->fr_linv_done);
526 INIT_CQCOUNT(&r_xprt->rx_ep); 515
516 /* Initialize CQ count, since there is always a signaled
517 * WR being posted here. The new cqcount depends on how
518 * many SQEs are about to be consumed.
519 */
520 rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
527 521
528 /* Transport disconnect drains the receive CQ before it 522 /* Transport disconnect drains the receive CQ before it
529 * replaces the QP. The RPC reply handler won't call us 523 * replaces the QP. The RPC reply handler won't call us
530 * unless ri_id->qp is a valid pointer. 524 * unless ri_id->qp is a valid pointer.
531 */ 525 */
532 r_xprt->rx_stats.local_inv_needed++; 526 r_xprt->rx_stats.local_inv_needed++;
533 rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); 527 rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
534 if (rc) 528 if (rc)
535 goto reset_mrs; 529 goto reset_mrs;
536 530
@@ -541,7 +535,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
541 */ 535 */
542unmap: 536unmap:
543 list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { 537 list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
544 dprintk("RPC: %s: unmapping frmr %p\n", 538 dprintk("RPC: %s: DMA unmapping frmr %p\n",
545 __func__, &mw->frmr); 539 __func__, &mw->frmr);
546 list_del_init(&mw->mw_list); 540 list_del_init(&mw->mw_list);
547 ib_dma_unmap_sg(ia->ri_device, 541 ib_dma_unmap_sg(ia->ri_device,
@@ -559,7 +553,7 @@ reset_mrs:
559 */ 553 */
560 list_for_each_entry(mw, &req->rl_registered, mw_list) { 554 list_for_each_entry(mw, &req->rl_registered, mw_list) {
561 f = &mw->frmr; 555 f = &mw->frmr;
562 if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) { 556 if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
563 __frwr_reset_mr(ia, mw); 557 __frwr_reset_mr(ia, mw);
564 bad_wr = bad_wr->next; 558 bad_wr = bad_wr->next;
565 } 559 }
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index d987c2d3dd6e..c52e0f2ffe52 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -786,7 +786,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp)
786 ifdebug(FACILITY) { 786 ifdebug(FACILITY) {
787 u64 off; 787 u64 off;
788 xdr_decode_hyper((__be32 *)&seg->rs_offset, &off); 788 xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
789 dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", 789 dprintk("RPC: %s: chunk %d@0x%016llx:0x%08x\n",
790 __func__, 790 __func__,
791 be32_to_cpu(seg->rs_length), 791 be32_to_cpu(seg->rs_length),
792 (unsigned long long)off, 792 (unsigned long long)off,
@@ -906,28 +906,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
906 return fixup_copy_count; 906 return fixup_copy_count;
907} 907}
908 908
909void
910rpcrdma_connect_worker(struct work_struct *work)
911{
912 struct rpcrdma_ep *ep =
913 container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
914 struct rpcrdma_xprt *r_xprt =
915 container_of(ep, struct rpcrdma_xprt, rx_ep);
916 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
917
918 spin_lock_bh(&xprt->transport_lock);
919 if (++xprt->connect_cookie == 0) /* maintain a reserved value */
920 ++xprt->connect_cookie;
921 if (ep->rep_connected > 0) {
922 if (!xprt_test_and_set_connected(xprt))
923 xprt_wake_pending_tasks(xprt, 0);
924 } else {
925 if (xprt_test_and_clear_connected(xprt))
926 xprt_wake_pending_tasks(xprt, -ENOTCONN);
927 }
928 spin_unlock_bh(&xprt->transport_lock);
929}
930
931#if defined(CONFIG_SUNRPC_BACKCHANNEL) 909#if defined(CONFIG_SUNRPC_BACKCHANNEL)
932/* By convention, backchannel calls arrive via rdma_msg type 910/* By convention, backchannel calls arrive via rdma_msg type
933 * messages, and never populate the chunk lists. This makes 911 * messages, and never populate the chunk lists. This makes
@@ -959,18 +937,6 @@ rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
959} 937}
960#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 938#endif /* CONFIG_SUNRPC_BACKCHANNEL */
961 939
962/*
963 * This function is called when an async event is posted to
964 * the connection which changes the connection state. All it
965 * does at this point is mark the connection up/down, the rpc
966 * timers do the rest.
967 */
968void
969rpcrdma_conn_func(struct rpcrdma_ep *ep)
970{
971 schedule_delayed_work(&ep->rep_connect_worker, 0);
972}
973
974/* Process received RPC/RDMA messages. 940/* Process received RPC/RDMA messages.
975 * 941 *
976 * Errors must result in the RPC task either being awakened, or 942 * Errors must result in the RPC task either being awakened, or
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 20027f8de129..288e35c2d8f4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -164,13 +164,9 @@ static int
164xprt_rdma_bc_allocate(struct rpc_task *task) 164xprt_rdma_bc_allocate(struct rpc_task *task)
165{ 165{
166 struct rpc_rqst *rqst = task->tk_rqstp; 166 struct rpc_rqst *rqst = task->tk_rqstp;
167 struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
168 size_t size = rqst->rq_callsize; 167 size_t size = rqst->rq_callsize;
169 struct svcxprt_rdma *rdma;
170 struct page *page; 168 struct page *page;
171 169
172 rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
173
174 if (size > PAGE_SIZE) { 170 if (size > PAGE_SIZE) {
175 WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", 171 WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
176 size); 172 size);
@@ -359,6 +355,7 @@ xprt_setup_rdma_bc(struct xprt_create *args)
359out_fail: 355out_fail:
360 xprt_rdma_free_addresses(xprt); 356 xprt_rdma_free_addresses(xprt);
361 args->bc_xprt->xpt_bc_xprt = NULL; 357 args->bc_xprt->xpt_bc_xprt = NULL;
358 args->bc_xprt->xpt_bc_xps = NULL;
362 xprt_put(xprt); 359 xprt_put(xprt);
363 xprt_free(xprt); 360 xprt_free(xprt);
364 return ERR_PTR(-EINVAL); 361 return ERR_PTR(-EINVAL);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ad1df979b3f0..57d35fbb1c28 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -279,7 +279,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
279 frmr->sg); 279 frmr->sg);
280 return -ENOMEM; 280 return -ENOMEM;
281 } 281 }
282 atomic_inc(&xprt->sc_dma_used);
283 282
284 n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); 283 n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
285 if (unlikely(n != frmr->sg_nents)) { 284 if (unlikely(n != frmr->sg_nents)) {
@@ -374,9 +373,7 @@ rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
374 u32 position, u32 byte_count, u32 page_offset, int page_no) 373 u32 position, u32 byte_count, u32 page_offset, int page_no)
375{ 374{
376 char *srcp, *destp; 375 char *srcp, *destp;
377 int ret;
378 376
379 ret = 0;
380 srcp = head->arg.head[0].iov_base + position; 377 srcp = head->arg.head[0].iov_base + position;
381 byte_count = head->arg.head[0].iov_len - position; 378 byte_count = head->arg.head[0].iov_len - position;
382 if (byte_count > PAGE_SIZE) { 379 if (byte_count > PAGE_SIZE) {
@@ -415,6 +412,20 @@ done:
415 return 1; 412 return 1;
416} 413}
417 414
415/* Returns the address of the first read chunk or <nul> if no read chunk
416 * is present
417 */
418static struct rpcrdma_read_chunk *
419svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
420{
421 struct rpcrdma_read_chunk *ch =
422 (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
423
424 if (ch->rc_discrim == xdr_zero)
425 return NULL;
426 return ch;
427}
428
418static int rdma_read_chunks(struct svcxprt_rdma *xprt, 429static int rdma_read_chunks(struct svcxprt_rdma *xprt,
419 struct rpcrdma_msg *rmsgp, 430 struct rpcrdma_msg *rmsgp,
420 struct svc_rqst *rqstp, 431 struct svc_rqst *rqstp,
@@ -627,8 +638,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
627 goto defer; 638 goto defer;
628 goto out; 639 goto out;
629 } 640 }
630 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", 641 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n",
631 ctxt, rdma_xprt, rqstp, ctxt->wc_status); 642 ctxt, rdma_xprt, rqstp);
632 atomic_inc(&rdma_stat_recv); 643 atomic_inc(&rdma_stat_recv);
633 644
634 /* Build up the XDR from the receive buffers. */ 645 /* Build up the XDR from the receive buffers. */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index f5a91edcd233..ad4d286a83c5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -153,76 +153,35 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
153 return dma_addr; 153 return dma_addr;
154} 154}
155 155
156/* Returns the address of the first read chunk or <nul> if no read chunk 156/* Parse the RPC Call's transport header.
157 * is present
158 */ 157 */
159struct rpcrdma_read_chunk * 158static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp,
160svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) 159 struct rpcrdma_write_array **write,
160 struct rpcrdma_write_array **reply)
161{ 161{
162 struct rpcrdma_read_chunk *ch = 162 __be32 *p;
163 (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
164 163
165 if (ch->rc_discrim == xdr_zero) 164 p = (__be32 *)&rmsgp->rm_body.rm_chunks[0];
166 return NULL;
167 return ch;
168}
169 165
170/* Returns the address of the first read write array element or <nul> 166 /* Read list */
171 * if no write array list is present 167 while (*p++ != xdr_zero)
172 */ 168 p += 5;
173static struct rpcrdma_write_array *
174svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
175{
176 if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
177 rmsgp->rm_body.rm_chunks[1] == xdr_zero)
178 return NULL;
179 return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
180}
181 169
182/* Returns the address of the first reply array element or <nul> if no 170 /* Write list */
183 * reply array is present 171 if (*p != xdr_zero) {
184 */ 172 *write = (struct rpcrdma_write_array *)p;
185static struct rpcrdma_write_array * 173 while (*p++ != xdr_zero)
186svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp, 174 p += 1 + be32_to_cpu(*p) * 4;
187 struct rpcrdma_write_array *wr_ary) 175 } else {
188{ 176 *write = NULL;
189 struct rpcrdma_read_chunk *rch; 177 p++;
190 struct rpcrdma_write_array *rp_ary;
191
192 /* XXX: Need to fix when reply chunk may occur with read list
193 * and/or write list.
194 */
195 if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
196 rmsgp->rm_body.rm_chunks[1] != xdr_zero)
197 return NULL;
198
199 rch = svc_rdma_get_read_chunk(rmsgp);
200 if (rch) {
201 while (rch->rc_discrim != xdr_zero)
202 rch++;
203
204 /* The reply chunk follows an empty write array located
205 * at 'rc_position' here. The reply array is at rc_target.
206 */
207 rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
208 goto found_it;
209 }
210
211 if (wr_ary) {
212 int chunk = be32_to_cpu(wr_ary->wc_nchunks);
213
214 rp_ary = (struct rpcrdma_write_array *)
215 &wr_ary->wc_array[chunk].wc_target.rs_length;
216 goto found_it;
217 } 178 }
218 179
219 /* No read list, no write list */ 180 /* Reply chunk */
220 rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2]; 181 if (*p != xdr_zero)
221 182 *reply = (struct rpcrdma_write_array *)p;
222 found_it: 183 else
223 if (rp_ary->wc_discrim == xdr_zero) 184 *reply = NULL;
224 return NULL;
225 return rp_ary;
226} 185}
227 186
228/* RPC-over-RDMA Version One private extension: Remote Invalidation. 187/* RPC-over-RDMA Version One private extension: Remote Invalidation.
@@ -240,31 +199,22 @@ static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
240{ 199{
241 struct rpcrdma_read_chunk *rd_ary; 200 struct rpcrdma_read_chunk *rd_ary;
242 struct rpcrdma_segment *arg_ch; 201 struct rpcrdma_segment *arg_ch;
243 u32 inv_rkey;
244 202
245 inv_rkey = 0; 203 rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0];
246 204 if (rd_ary->rc_discrim != xdr_zero)
247 rd_ary = svc_rdma_get_read_chunk(rdma_argp); 205 return be32_to_cpu(rd_ary->rc_target.rs_handle);
248 if (rd_ary) {
249 inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
250 goto out;
251 }
252 206
253 if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) { 207 if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
254 arg_ch = &wr_ary->wc_array[0].wc_target; 208 arg_ch = &wr_ary->wc_array[0].wc_target;
255 inv_rkey = be32_to_cpu(arg_ch->rs_handle); 209 return be32_to_cpu(arg_ch->rs_handle);
256 goto out;
257 } 210 }
258 211
259 if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) { 212 if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
260 arg_ch = &rp_ary->wc_array[0].wc_target; 213 arg_ch = &rp_ary->wc_array[0].wc_target;
261 inv_rkey = be32_to_cpu(arg_ch->rs_handle); 214 return be32_to_cpu(arg_ch->rs_handle);
262 goto out;
263 } 215 }
264 216
265out: 217 return 0;
266 dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
267 return inv_rkey;
268} 218}
269 219
270/* Assumptions: 220/* Assumptions:
@@ -622,8 +572,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
622 * places this at the start of page 0. 572 * places this at the start of page 0.
623 */ 573 */
624 rdma_argp = page_address(rqstp->rq_pages[0]); 574 rdma_argp = page_address(rqstp->rq_pages[0]);
625 wr_ary = svc_rdma_get_write_array(rdma_argp); 575 svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary);
626 rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
627 576
628 inv_rkey = 0; 577 inv_rkey = 0;
629 if (rdma->sc_snd_w_inv) 578 if (rdma->sc_snd_w_inv)
@@ -636,7 +585,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
636 goto err0; 585 goto err0;
637 inline_bytes = rqstp->rq_res.len; 586 inline_bytes = rqstp->rq_res.len;
638 587
639 /* Create the RDMA response header */ 588 /* Create the RDMA response header. xprt->xpt_mutex,
589 * acquired in svc_send(), serializes RPC replies. The
590 * code path below that inserts the credit grant value
591 * into each transport header runs only inside this
592 * critical section.
593 */
640 ret = -ENOMEM; 594 ret = -ENOMEM;
641 res_page = alloc_page(GFP_KERNEL); 595 res_page = alloc_page(GFP_KERNEL);
642 if (!res_page) 596 if (!res_page)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 1334de2715c2..ca2799af05a6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -41,6 +41,7 @@
41 */ 41 */
42 42
43#include <linux/sunrpc/svc_xprt.h> 43#include <linux/sunrpc/svc_xprt.h>
44#include <linux/sunrpc/addr.h>
44#include <linux/sunrpc/debug.h> 45#include <linux/sunrpc/debug.h>
45#include <linux/sunrpc/rpc_rdma.h> 46#include <linux/sunrpc/rpc_rdma.h>
46#include <linux/interrupt.h> 47#include <linux/interrupt.h>
@@ -226,25 +227,22 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
226 struct svcxprt_rdma *xprt = ctxt->xprt; 227 struct svcxprt_rdma *xprt = ctxt->xprt;
227 struct ib_device *device = xprt->sc_cm_id->device; 228 struct ib_device *device = xprt->sc_cm_id->device;
228 u32 lkey = xprt->sc_pd->local_dma_lkey; 229 u32 lkey = xprt->sc_pd->local_dma_lkey;
229 unsigned int i, count; 230 unsigned int i;
230 231
231 for (count = 0, i = 0; i < ctxt->mapped_sges; i++) { 232 for (i = 0; i < ctxt->mapped_sges; i++) {
232 /* 233 /*
233 * Unmap the DMA addr in the SGE if the lkey matches 234 * Unmap the DMA addr in the SGE if the lkey matches
234 * the local_dma_lkey, otherwise, ignore it since it is 235 * the local_dma_lkey, otherwise, ignore it since it is
235 * an FRMR lkey and will be unmapped later when the 236 * an FRMR lkey and will be unmapped later when the
236 * last WR that uses it completes. 237 * last WR that uses it completes.
237 */ 238 */
238 if (ctxt->sge[i].lkey == lkey) { 239 if (ctxt->sge[i].lkey == lkey)
239 count++;
240 ib_dma_unmap_page(device, 240 ib_dma_unmap_page(device,
241 ctxt->sge[i].addr, 241 ctxt->sge[i].addr,
242 ctxt->sge[i].length, 242 ctxt->sge[i].length,
243 ctxt->direction); 243 ctxt->direction);
244 }
245 } 244 }
246 ctxt->mapped_sges = 0; 245 ctxt->mapped_sges = 0;
247 atomic_sub(count, &xprt->sc_dma_used);
248} 246}
249 247
250void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) 248void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -398,7 +396,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
398 396
399 /* WARNING: Only wc->wr_cqe and wc->status are reliable */ 397 /* WARNING: Only wc->wr_cqe and wc->status are reliable */
400 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); 398 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
401 ctxt->wc_status = wc->status;
402 svc_rdma_unmap_dma(ctxt); 399 svc_rdma_unmap_dma(ctxt);
403 400
404 if (wc->status != IB_WC_SUCCESS) 401 if (wc->status != IB_WC_SUCCESS)
@@ -436,7 +433,7 @@ static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
436 goto err; 433 goto err;
437 434
438out: 435out:
439 atomic_dec(&xprt->sc_sq_count); 436 atomic_inc(&xprt->sc_sq_avail);
440 wake_up(&xprt->sc_send_wait); 437 wake_up(&xprt->sc_send_wait);
441 return; 438 return;
442 439
@@ -946,7 +943,6 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
946 if (frmr) { 943 if (frmr) {
947 ib_dma_unmap_sg(rdma->sc_cm_id->device, 944 ib_dma_unmap_sg(rdma->sc_cm_id->device,
948 frmr->sg, frmr->sg_nents, frmr->direction); 945 frmr->sg, frmr->sg_nents, frmr->direction);
949 atomic_dec(&rdma->sc_dma_used);
950 spin_lock_bh(&rdma->sc_frmr_q_lock); 946 spin_lock_bh(&rdma->sc_frmr_q_lock);
951 WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); 947 WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
952 list_add(&frmr->frmr_list, &rdma->sc_frmr_q); 948 list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
@@ -973,6 +969,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
973 struct rpcrdma_connect_private pmsg; 969 struct rpcrdma_connect_private pmsg;
974 struct ib_qp_init_attr qp_attr; 970 struct ib_qp_init_attr qp_attr;
975 struct ib_device *dev; 971 struct ib_device *dev;
972 struct sockaddr *sap;
976 unsigned int i; 973 unsigned int i;
977 int ret = 0; 974 int ret = 0;
978 975
@@ -1010,6 +1007,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1010 newxprt->sc_rq_depth = newxprt->sc_max_requests + 1007 newxprt->sc_rq_depth = newxprt->sc_max_requests +
1011 newxprt->sc_max_bc_requests; 1008 newxprt->sc_max_bc_requests;
1012 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth; 1009 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
1010 atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
1013 1011
1014 if (!svc_rdma_prealloc_ctxts(newxprt)) 1012 if (!svc_rdma_prealloc_ctxts(newxprt))
1015 goto errout; 1013 goto errout;
@@ -1052,18 +1050,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1052 qp_attr.qp_type = IB_QPT_RC; 1050 qp_attr.qp_type = IB_QPT_RC;
1053 qp_attr.send_cq = newxprt->sc_sq_cq; 1051 qp_attr.send_cq = newxprt->sc_sq_cq;
1054 qp_attr.recv_cq = newxprt->sc_rq_cq; 1052 qp_attr.recv_cq = newxprt->sc_rq_cq;
1055 dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n" 1053 dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n",
1056 " cm_id->device=%p, sc_pd->device=%p\n" 1054 newxprt->sc_cm_id, newxprt->sc_pd);
1057 " cap.max_send_wr = %d\n" 1055 dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n",
1058 " cap.max_recv_wr = %d\n" 1056 qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
1059 " cap.max_send_sge = %d\n" 1057 dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
1060 " cap.max_recv_sge = %d\n", 1058 qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
1061 newxprt->sc_cm_id, newxprt->sc_pd,
1062 dev, newxprt->sc_pd->device,
1063 qp_attr.cap.max_send_wr,
1064 qp_attr.cap.max_recv_wr,
1065 qp_attr.cap.max_send_sge,
1066 qp_attr.cap.max_recv_sge);
1067 1059
1068 ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); 1060 ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
1069 if (ret) { 1061 if (ret) {
@@ -1146,31 +1138,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1146 goto errout; 1138 goto errout;
1147 } 1139 }
1148 1140
1149 dprintk("svcrdma: new connection %p accepted with the following " 1141 dprintk("svcrdma: new connection %p accepted:\n", newxprt);
1150 "attributes:\n" 1142 sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
1151 " local_ip : %pI4\n" 1143 dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
1152 " local_port : %d\n" 1144 sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
1153 " remote_ip : %pI4\n" 1145 dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
1154 " remote_port : %d\n" 1146 dprintk(" max_sge : %d\n", newxprt->sc_max_sge);
1155 " max_sge : %d\n" 1147 dprintk(" max_sge_rd : %d\n", newxprt->sc_max_sge_rd);
1156 " max_sge_rd : %d\n" 1148 dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
1157 " sq_depth : %d\n" 1149 dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
1158 " max_requests : %d\n" 1150 dprintk(" ord : %d\n", newxprt->sc_ord);
1159 " ord : %d\n",
1160 newxprt,
1161 &((struct sockaddr_in *)&newxprt->sc_cm_id->
1162 route.addr.src_addr)->sin_addr.s_addr,
1163 ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
1164 route.addr.src_addr)->sin_port),
1165 &((struct sockaddr_in *)&newxprt->sc_cm_id->
1166 route.addr.dst_addr)->sin_addr.s_addr,
1167 ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
1168 route.addr.dst_addr)->sin_port),
1169 newxprt->sc_max_sge,
1170 newxprt->sc_max_sge_rd,
1171 newxprt->sc_sq_depth,
1172 newxprt->sc_max_requests,
1173 newxprt->sc_ord);
1174 1151
1175 return &newxprt->sc_xprt; 1152 return &newxprt->sc_xprt;
1176 1153
@@ -1257,9 +1234,6 @@ static void __svc_rdma_free(struct work_struct *work)
1257 if (rdma->sc_ctxt_used != 0) 1234 if (rdma->sc_ctxt_used != 0)
1258 pr_err("svcrdma: ctxt still in use? (%d)\n", 1235 pr_err("svcrdma: ctxt still in use? (%d)\n",
1259 rdma->sc_ctxt_used); 1236 rdma->sc_ctxt_used);
1260 if (atomic_read(&rdma->sc_dma_used) != 0)
1261 pr_err("svcrdma: dma still in use? (%d)\n",
1262 atomic_read(&rdma->sc_dma_used));
1263 1237
1264 /* Final put of backchannel client transport */ 1238 /* Final put of backchannel client transport */
1265 if (xprt->xpt_bc_xprt) { 1239 if (xprt->xpt_bc_xprt) {
@@ -1339,15 +1313,13 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1339 1313
1340 /* If the SQ is full, wait until an SQ entry is available */ 1314 /* If the SQ is full, wait until an SQ entry is available */
1341 while (1) { 1315 while (1) {
1342 spin_lock_bh(&xprt->sc_lock); 1316 if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) {
1343 if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
1344 spin_unlock_bh(&xprt->sc_lock);
1345 atomic_inc(&rdma_stat_sq_starve); 1317 atomic_inc(&rdma_stat_sq_starve);
1346 1318
1347 /* Wait until SQ WR available if SQ still full */ 1319 /* Wait until SQ WR available if SQ still full */
1320 atomic_add(wr_count, &xprt->sc_sq_avail);
1348 wait_event(xprt->sc_send_wait, 1321 wait_event(xprt->sc_send_wait,
1349 atomic_read(&xprt->sc_sq_count) < 1322 atomic_read(&xprt->sc_sq_avail) > wr_count);
1350 xprt->sc_sq_depth);
1351 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1323 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1352 return -ENOTCONN; 1324 return -ENOTCONN;
1353 continue; 1325 continue;
@@ -1357,21 +1329,17 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1357 svc_xprt_get(&xprt->sc_xprt); 1329 svc_xprt_get(&xprt->sc_xprt);
1358 1330
1359 /* Bump used SQ WR count and post */ 1331 /* Bump used SQ WR count and post */
1360 atomic_add(wr_count, &xprt->sc_sq_count);
1361 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); 1332 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1362 if (ret) { 1333 if (ret) {
1363 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 1334 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
1364 atomic_sub(wr_count, &xprt->sc_sq_count);
1365 for (i = 0; i < wr_count; i ++) 1335 for (i = 0; i < wr_count; i ++)
1366 svc_xprt_put(&xprt->sc_xprt); 1336 svc_xprt_put(&xprt->sc_xprt);
1367 dprintk("svcrdma: failed to post SQ WR rc=%d, " 1337 dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret);
1368 "sc_sq_count=%d, sc_sq_depth=%d\n", 1338 dprintk(" sc_sq_avail=%d, sc_sq_depth=%d\n",
1369 ret, atomic_read(&xprt->sc_sq_count), 1339 atomic_read(&xprt->sc_sq_avail),
1370 xprt->sc_sq_depth); 1340 xprt->sc_sq_depth);
1371 }
1372 spin_unlock_bh(&xprt->sc_lock);
1373 if (ret)
1374 wake_up(&xprt->sc_send_wait); 1341 wake_up(&xprt->sc_send_wait);
1342 }
1375 break; 1343 break;
1376 } 1344 }
1377 return ret; 1345 return ret;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ed5e285fd2ea..534c178d2a7e 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -219,6 +219,34 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt)
219 } 219 }
220} 220}
221 221
222void
223rpcrdma_conn_func(struct rpcrdma_ep *ep)
224{
225 schedule_delayed_work(&ep->rep_connect_worker, 0);
226}
227
228void
229rpcrdma_connect_worker(struct work_struct *work)
230{
231 struct rpcrdma_ep *ep =
232 container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
233 struct rpcrdma_xprt *r_xprt =
234 container_of(ep, struct rpcrdma_xprt, rx_ep);
235 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
236
237 spin_lock_bh(&xprt->transport_lock);
238 if (++xprt->connect_cookie == 0) /* maintain a reserved value */
239 ++xprt->connect_cookie;
240 if (ep->rep_connected > 0) {
241 if (!xprt_test_and_set_connected(xprt))
242 xprt_wake_pending_tasks(xprt, 0);
243 } else {
244 if (xprt_test_and_clear_connected(xprt))
245 xprt_wake_pending_tasks(xprt, -ENOTCONN);
246 }
247 spin_unlock_bh(&xprt->transport_lock);
248}
249
222static void 250static void
223xprt_rdma_connect_worker(struct work_struct *work) 251xprt_rdma_connect_worker(struct work_struct *work)
224{ 252{
@@ -621,7 +649,8 @@ xprt_rdma_free(struct rpc_task *task)
621 649
622 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); 650 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
623 651
624 ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); 652 if (unlikely(!list_empty(&req->rl_registered)))
653 ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
625 rpcrdma_unmap_sges(ia, req); 654 rpcrdma_unmap_sges(ia, req);
626 rpcrdma_buffer_put(req); 655 rpcrdma_buffer_put(req);
627} 656}
@@ -657,7 +686,8 @@ xprt_rdma_send_request(struct rpc_task *task)
657 int rc = 0; 686 int rc = 0;
658 687
659 /* On retransmit, remove any previously registered chunks */ 688 /* On retransmit, remove any previously registered chunks */
660 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); 689 if (unlikely(!list_empty(&req->rl_registered)))
690 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
661 691
662 rc = rpcrdma_marshal_req(rqst); 692 rc = rpcrdma_marshal_req(rqst);
663 if (rc < 0) 693 if (rc < 0)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index ec74289af7ec..11d07748f699 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -103,9 +103,9 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
103{ 103{
104 struct rpcrdma_ep *ep = context; 104 struct rpcrdma_ep *ep = context;
105 105
106 pr_err("RPC: %s: %s on device %s ep %p\n", 106 pr_err("rpcrdma: %s on device %s ep %p\n",
107 __func__, ib_event_msg(event->event), 107 ib_event_msg(event->event), event->device->name, context);
108 event->device->name, context); 108
109 if (ep->rep_connected == 1) { 109 if (ep->rep_connected == 1) {
110 ep->rep_connected = -EIO; 110 ep->rep_connected = -EIO;
111 rpcrdma_conn_func(ep); 111 rpcrdma_conn_func(ep);
@@ -223,8 +223,8 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
223 cdata->inline_rsize = rsize; 223 cdata->inline_rsize = rsize;
224 if (wsize < cdata->inline_wsize) 224 if (wsize < cdata->inline_wsize)
225 cdata->inline_wsize = wsize; 225 cdata->inline_wsize = wsize;
226 pr_info("rpcrdma: max send %u, max recv %u\n", 226 dprintk("RPC: %s: max send %u, max recv %u\n",
227 cdata->inline_wsize, cdata->inline_rsize); 227 __func__, cdata->inline_wsize, cdata->inline_rsize);
228 rpcrdma_set_max_header_sizes(r_xprt); 228 rpcrdma_set_max_header_sizes(r_xprt);
229} 229}
230 230
@@ -331,6 +331,7 @@ static struct rdma_cm_id *
331rpcrdma_create_id(struct rpcrdma_xprt *xprt, 331rpcrdma_create_id(struct rpcrdma_xprt *xprt,
332 struct rpcrdma_ia *ia, struct sockaddr *addr) 332 struct rpcrdma_ia *ia, struct sockaddr *addr)
333{ 333{
334 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
334 struct rdma_cm_id *id; 335 struct rdma_cm_id *id;
335 int rc; 336 int rc;
336 337
@@ -352,8 +353,12 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
352 __func__, rc); 353 __func__, rc);
353 goto out; 354 goto out;
354 } 355 }
355 wait_for_completion_interruptible_timeout(&ia->ri_done, 356 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
356 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); 357 if (rc < 0) {
358 dprintk("RPC: %s: wait() exited: %i\n",
359 __func__, rc);
360 goto out;
361 }
357 362
358 /* FIXME: 363 /* FIXME:
359 * Until xprtrdma supports DEVICE_REMOVAL, the provider must 364 * Until xprtrdma supports DEVICE_REMOVAL, the provider must
@@ -376,8 +381,12 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
376 __func__, rc); 381 __func__, rc);
377 goto put; 382 goto put;
378 } 383 }
379 wait_for_completion_interruptible_timeout(&ia->ri_done, 384 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
380 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); 385 if (rc < 0) {
386 dprintk("RPC: %s: wait() exited: %i\n",
387 __func__, rc);
388 goto put;
389 }
381 rc = ia->ri_async_rc; 390 rc = ia->ri_async_rc;
382 if (rc) 391 if (rc)
383 goto put; 392 goto put;
@@ -532,7 +541,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
532 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; 541 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
533 if (ep->rep_cqinit <= 2) 542 if (ep->rep_cqinit <= 2)
534 ep->rep_cqinit = 0; /* always signal? */ 543 ep->rep_cqinit = 0; /* always signal? */
535 INIT_CQCOUNT(ep); 544 rpcrdma_init_cqcount(ep, 0);
536 init_waitqueue_head(&ep->rep_connect_wait); 545 init_waitqueue_head(&ep->rep_connect_wait);
537 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 546 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
538 547
@@ -1311,13 +1320,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1311 dprintk("RPC: %s: posting %d s/g entries\n", 1320 dprintk("RPC: %s: posting %d s/g entries\n",
1312 __func__, send_wr->num_sge); 1321 __func__, send_wr->num_sge);
1313 1322
1314 if (DECR_CQCOUNT(ep) > 0) 1323 rpcrdma_set_signaled(ep, send_wr);
1315 send_wr->send_flags = 0;
1316 else { /* Provider must take a send completion every now and then */
1317 INIT_CQCOUNT(ep);
1318 send_wr->send_flags = IB_SEND_SIGNALED;
1319 }
1320
1321 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); 1324 rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
1322 if (rc) 1325 if (rc)
1323 goto out_postsend_err; 1326 goto out_postsend_err;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 6e1bba358203..e35efd4ac1e4 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -75,6 +75,7 @@ struct rpcrdma_ia {
75 unsigned int ri_max_inline_write; 75 unsigned int ri_max_inline_write;
76 unsigned int ri_max_inline_read; 76 unsigned int ri_max_inline_read;
77 bool ri_reminv_expected; 77 bool ri_reminv_expected;
78 enum ib_mr_type ri_mrtype;
78 struct ib_qp_attr ri_qp_attr; 79 struct ib_qp_attr ri_qp_attr;
79 struct ib_qp_init_attr ri_qp_init_attr; 80 struct ib_qp_init_attr ri_qp_init_attr;
80}; 81};
@@ -95,8 +96,24 @@ struct rpcrdma_ep {
95 struct delayed_work rep_connect_worker; 96 struct delayed_work rep_connect_worker;
96}; 97};
97 98
98#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) 99static inline void
99#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) 100rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
101{
102 atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
103}
104
105/* To update send queue accounting, provider must take a
106 * send completion every now and then.
107 */
108static inline void
109rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
110{
111 send_wr->send_flags = 0;
112 if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
113 rpcrdma_init_cqcount(ep, 0);
114 send_wr->send_flags = IB_SEND_SIGNALED;
115 }
116}
100 117
101/* Pre-allocate extra Work Requests for handling backward receives 118/* Pre-allocate extra Work Requests for handling backward receives
102 * and sends. This is a fixed value because the Work Queues are 119 * and sends. This is a fixed value because the Work Queues are
@@ -473,6 +490,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
473 struct rpcrdma_create_data_internal *); 490 struct rpcrdma_create_data_internal *);
474void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); 491void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
475int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); 492int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
493void rpcrdma_conn_func(struct rpcrdma_ep *ep);
476void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); 494void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
477 495
478int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, 496int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
@@ -532,13 +550,6 @@ rpcrdma_data_dir(bool writing)
532} 550}
533 551
534/* 552/*
535 * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
536 */
537void rpcrdma_connect_worker(struct work_struct *);
538void rpcrdma_conn_func(struct rpcrdma_ep *);
539void rpcrdma_reply_handler(struct work_struct *);
540
541/*
542 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c 553 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
543 */ 554 */
544 555
@@ -555,12 +566,14 @@ bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
555void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); 566void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
556int rpcrdma_marshal_req(struct rpc_rqst *); 567int rpcrdma_marshal_req(struct rpc_rqst *);
557void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 568void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
569void rpcrdma_reply_handler(struct work_struct *work);
558 570
559/* RPC/RDMA module init - xprtrdma/transport.c 571/* RPC/RDMA module init - xprtrdma/transport.c
560 */ 572 */
561extern unsigned int xprt_rdma_max_inline_read; 573extern unsigned int xprt_rdma_max_inline_read;
562void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); 574void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
563void xprt_rdma_free_addresses(struct rpc_xprt *xprt); 575void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
576void rpcrdma_connect_worker(struct work_struct *work);
564void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); 577void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
565int xprt_rdma_init(void); 578int xprt_rdma_init(void);
566void xprt_rdma_cleanup(void); 579void xprt_rdma_cleanup(void);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e01c825bc683..af392d9b9cec 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1080,10 +1080,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
1080 if (sk == NULL) 1080 if (sk == NULL)
1081 goto out; 1081 goto out;
1082 for (;;) { 1082 for (;;) {
1083 skb = skb_recv_datagram(sk, 0, 1, &err); 1083 skb = skb_recv_udp(sk, 0, 1, &err);
1084 if (skb != NULL) { 1084 if (skb != NULL) {
1085 xs_udp_data_read_skb(&transport->xprt, sk, skb); 1085 xs_udp_data_read_skb(&transport->xprt, sk, skb);
1086 skb_free_datagram_locked(sk, skb); 1086 consume_skb(skb);
1087 continue; 1087 continue;
1088 } 1088 }
1089 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) 1089 if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 3b95fe980fa2..017801f9dbaa 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -624,13 +624,10 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
624int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 624int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
625 struct switchdev_notifier_info *info) 625 struct switchdev_notifier_info *info)
626{ 626{
627 int err;
628
629 ASSERT_RTNL(); 627 ASSERT_RTNL();
630 628
631 info->dev = dev; 629 info->dev = dev;
632 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); 630 return raw_notifier_call_chain(&switchdev_notif_chain, val, info);
633 return err;
634} 631}
635EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 632EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
636 633
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 236b043a4156..0b982d048fb9 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -47,7 +47,7 @@
47#include <linux/module.h> 47#include <linux/module.h>
48 48
49/* configurable TIPC parameters */ 49/* configurable TIPC parameters */
50int tipc_net_id __read_mostly; 50unsigned int tipc_net_id __read_mostly;
51int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ 51int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */
52 52
53static int __net_init tipc_init_net(struct net *net) 53static int __net_init tipc_init_net(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index a1845fb27d80..5cc5398be722 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -74,7 +74,7 @@ struct tipc_monitor;
74#define MAX_BEARERS 3 74#define MAX_BEARERS 3
75#define TIPC_DEF_MON_THRESHOLD 32 75#define TIPC_DEF_MON_THRESHOLD 32
76 76
77extern int tipc_net_id __read_mostly; 77extern unsigned int tipc_net_id __read_mostly;
78extern int sysctl_tipc_rmem[3] __read_mostly; 78extern int sysctl_tipc_rmem[3] __read_mostly;
79extern int sysctl_tipc_named_timeout __read_mostly; 79extern int sysctl_tipc_named_timeout __read_mostly;
80 80
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 17201aa8423d..a22be502f1bd 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -268,7 +268,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
268 __skb_queue_tail(list, skb); 268 __skb_queue_tail(list, skb);
269 skb_copy_to_linear_data(skb, mhdr, mhsz); 269 skb_copy_to_linear_data(skb, mhdr, mhsz);
270 pktpos = skb->data + mhsz; 270 pktpos = skb->data + mhsz;
271 if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz) 271 if (copy_from_iter_full(pktpos, dsz, &m->msg_iter))
272 return dsz; 272 return dsz;
273 rc = -EFAULT; 273 rc = -EFAULT;
274 goto error; 274 goto error;
@@ -299,7 +299,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
299 if (drem < pktrem) 299 if (drem < pktrem)
300 pktrem = drem; 300 pktrem = drem;
301 301
302 if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) { 302 if (!copy_from_iter_full(pktpos, pktrem, &m->msg_iter)) {
303 rc = -EFAULT; 303 rc = -EFAULT;
304 goto error; 304 goto error;
305 } 305 }
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 50a739860d37..8d408612ffa4 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -95,7 +95,7 @@ struct plist;
95#define TIPC_MEDIA_INFO_OFFSET 5 95#define TIPC_MEDIA_INFO_OFFSET 5
96 96
97struct tipc_skb_cb { 97struct tipc_skb_cb {
98 void *handle; 98 u32 bytes_read;
99 struct sk_buff *tail; 99 struct sk_buff *tail;
100 bool validated; 100 bool validated;
101 bool wakeup_pending; 101 bool wakeup_pending;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 3200059d14b2..26ca8dd64ded 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -135,15 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
135/* Users of the legacy API (tipc-config) can't handle that we add operations, 135/* Users of the legacy API (tipc-config) can't handle that we add operations,
136 * so we have a separate genl handling for the new API. 136 * so we have a separate genl handling for the new API.
137 */ 137 */
138struct genl_family tipc_genl_family = {
139 .id = GENL_ID_GENERATE,
140 .name = TIPC_GENL_V2_NAME,
141 .version = TIPC_GENL_V2_VERSION,
142 .hdrsize = 0,
143 .maxattr = TIPC_NLA_MAX,
144 .netnsok = true,
145};
146
147static const struct genl_ops tipc_genl_v2_ops[] = { 138static const struct genl_ops tipc_genl_v2_ops[] = {
148 { 139 {
149 .cmd = TIPC_NL_BEARER_DISABLE, 140 .cmd = TIPC_NL_BEARER_DISABLE,
@@ -258,23 +249,33 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
258#endif 249#endif
259}; 250};
260 251
252struct genl_family tipc_genl_family __ro_after_init = {
253 .name = TIPC_GENL_V2_NAME,
254 .version = TIPC_GENL_V2_VERSION,
255 .hdrsize = 0,
256 .maxattr = TIPC_NLA_MAX,
257 .netnsok = true,
258 .module = THIS_MODULE,
259 .ops = tipc_genl_v2_ops,
260 .n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
261};
262
261int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) 263int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
262{ 264{
263 u32 maxattr = tipc_genl_family.maxattr; 265 u32 maxattr = tipc_genl_family.maxattr;
264 266
265 *attr = tipc_genl_family.attrbuf; 267 *attr = genl_family_attrbuf(&tipc_genl_family);
266 if (!*attr) 268 if (!*attr)
267 return -EOPNOTSUPP; 269 return -EOPNOTSUPP;
268 270
269 return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy); 271 return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy);
270} 272}
271 273
272int tipc_netlink_start(void) 274int __init tipc_netlink_start(void)
273{ 275{
274 int res; 276 int res;
275 277
276 res = genl_register_family_with_ops(&tipc_genl_family, 278 res = genl_register_family(&tipc_genl_family);
277 tipc_genl_v2_ops);
278 if (res) { 279 if (res) {
279 pr_err("Failed to register netlink interface\n"); 280 pr_err("Failed to register netlink interface\n");
280 return res; 281 return res;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 1fd464764765..e1ae8a8a2b8e 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1215,15 +1215,6 @@ send:
1215 return err; 1215 return err;
1216} 1216}
1217 1217
1218static struct genl_family tipc_genl_compat_family = {
1219 .id = GENL_ID_GENERATE,
1220 .name = TIPC_GENL_NAME,
1221 .version = TIPC_GENL_VERSION,
1222 .hdrsize = TIPC_GENL_HDRLEN,
1223 .maxattr = 0,
1224 .netnsok = true,
1225};
1226
1227static struct genl_ops tipc_genl_compat_ops[] = { 1218static struct genl_ops tipc_genl_compat_ops[] = {
1228 { 1219 {
1229 .cmd = TIPC_GENL_CMD, 1220 .cmd = TIPC_GENL_CMD,
@@ -1231,12 +1222,22 @@ static struct genl_ops tipc_genl_compat_ops[] = {
1231 }, 1222 },
1232}; 1223};
1233 1224
1234int tipc_netlink_compat_start(void) 1225static struct genl_family tipc_genl_compat_family __ro_after_init = {
1226 .name = TIPC_GENL_NAME,
1227 .version = TIPC_GENL_VERSION,
1228 .hdrsize = TIPC_GENL_HDRLEN,
1229 .maxattr = 0,
1230 .netnsok = true,
1231 .module = THIS_MODULE,
1232 .ops = tipc_genl_compat_ops,
1233 .n_ops = ARRAY_SIZE(tipc_genl_compat_ops),
1234};
1235
1236int __init tipc_netlink_compat_start(void)
1235{ 1237{
1236 int res; 1238 int res;
1237 1239
1238 res = genl_register_family_with_ops(&tipc_genl_compat_family, 1240 res = genl_register_family(&tipc_genl_compat_family);
1239 tipc_genl_compat_ops);
1240 if (res) { 1241 if (res) {
1241 pr_err("Failed to register legacy compat interface\n"); 1242 pr_err("Failed to register legacy compat interface\n");
1242 return res; 1243 return res;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 41f013888f07..333c5dae0072 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -44,44 +44,43 @@
44#include "bcast.h" 44#include "bcast.h"
45#include "netlink.h" 45#include "netlink.h"
46 46
47#define SS_LISTENING -1 /* socket is listening */
48#define SS_READY -2 /* socket is connectionless */
49
50#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ 47#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
51#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ 48#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */
52#define TIPC_FWD_MSG 1 49#define TIPC_FWD_MSG 1
53#define TIPC_CONN_OK 0
54#define TIPC_CONN_PROBING 1
55#define TIPC_MAX_PORT 0xffffffff 50#define TIPC_MAX_PORT 0xffffffff
56#define TIPC_MIN_PORT 1 51#define TIPC_MIN_PORT 1
57 52
53enum {
54 TIPC_LISTEN = TCP_LISTEN,
55 TIPC_ESTABLISHED = TCP_ESTABLISHED,
56 TIPC_OPEN = TCP_CLOSE,
57 TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
58 TIPC_CONNECTING = TCP_SYN_SENT,
59};
60
58/** 61/**
59 * struct tipc_sock - TIPC socket structure 62 * struct tipc_sock - TIPC socket structure
60 * @sk: socket - interacts with 'port' and with user via the socket API 63 * @sk: socket - interacts with 'port' and with user via the socket API
61 * @connected: non-zero if port is currently connected to a peer port
62 * @conn_type: TIPC type used when connection was established 64 * @conn_type: TIPC type used when connection was established
63 * @conn_instance: TIPC instance used when connection was established 65 * @conn_instance: TIPC instance used when connection was established
64 * @published: non-zero if port has one or more associated names 66 * @published: non-zero if port has one or more associated names
65 * @max_pkt: maximum packet size "hint" used when building messages sent by port 67 * @max_pkt: maximum packet size "hint" used when building messages sent by port
66 * @portid: unique port identity in TIPC socket hash table 68 * @portid: unique port identity in TIPC socket hash table
67 * @phdr: preformatted message header used when sending messages 69 * @phdr: preformatted message header used when sending messages
68 * @port_list: adjacent ports in TIPC's global list of ports
69 * @publications: list of publications for port 70 * @publications: list of publications for port
70 * @pub_count: total # of publications port has made during its lifetime 71 * @pub_count: total # of publications port has made during its lifetime
71 * @probing_state: 72 * @probing_state:
72 * @probing_intv:
73 * @conn_timeout: the time we can wait for an unresponded setup request 73 * @conn_timeout: the time we can wait for an unresponded setup request
74 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue 74 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
75 * @link_cong: non-zero if owner must sleep because of link congestion 75 * @link_cong: non-zero if owner must sleep because of link congestion
76 * @sent_unacked: # messages sent by socket, and not yet acked by peer 76 * @sent_unacked: # messages sent by socket, and not yet acked by peer
77 * @rcv_unacked: # messages read by user, but not yet acked back to peer 77 * @rcv_unacked: # messages read by user, but not yet acked back to peer
78 * @remote: 'connected' peer for dgram/rdm 78 * @peer: 'connected' peer for dgram/rdm
79 * @node: hash table node 79 * @node: hash table node
80 * @rcu: rcu struct for tipc_sock 80 * @rcu: rcu struct for tipc_sock
81 */ 81 */
82struct tipc_sock { 82struct tipc_sock {
83 struct sock sk; 83 struct sock sk;
84 int connected;
85 u32 conn_type; 84 u32 conn_type;
86 u32 conn_instance; 85 u32 conn_instance;
87 int published; 86 int published;
@@ -91,17 +90,16 @@ struct tipc_sock {
91 struct list_head sock_list; 90 struct list_head sock_list;
92 struct list_head publications; 91 struct list_head publications;
93 u32 pub_count; 92 u32 pub_count;
94 u32 probing_state;
95 unsigned long probing_intv;
96 uint conn_timeout; 93 uint conn_timeout;
97 atomic_t dupl_rcvcnt; 94 atomic_t dupl_rcvcnt;
95 bool probe_unacked;
98 bool link_cong; 96 bool link_cong;
99 u16 snt_unacked; 97 u16 snt_unacked;
100 u16 snd_win; 98 u16 snd_win;
101 u16 peer_caps; 99 u16 peer_caps;
102 u16 rcv_unacked; 100 u16 rcv_unacked;
103 u16 rcv_win; 101 u16 rcv_win;
104 struct sockaddr_tipc remote; 102 struct sockaddr_tipc peer;
105 struct rhash_head node; 103 struct rhash_head node;
106 struct rcu_head rcu; 104 struct rcu_head rcu;
107}; 105};
@@ -248,6 +246,21 @@ static void tsk_rej_rx_queue(struct sock *sk)
248 tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); 246 tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
249} 247}
250 248
249static bool tipc_sk_connected(struct sock *sk)
250{
251 return sk->sk_state == TIPC_ESTABLISHED;
252}
253
254/* tipc_sk_type_connectionless - check if the socket is datagram socket
255 * @sk: socket
256 *
257 * Returns true if connection less, false otherwise
258 */
259static bool tipc_sk_type_connectionless(struct sock *sk)
260{
261 return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
262}
263
251/* tsk_peer_msg - verify if message was sent by connected port's peer 264/* tsk_peer_msg - verify if message was sent by connected port's peer
252 * 265 *
253 * Handles cases where the node's network address has changed from 266 * Handles cases where the node's network address has changed from
@@ -255,12 +268,13 @@ static void tsk_rej_rx_queue(struct sock *sk)
255 */ 268 */
256static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) 269static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
257{ 270{
258 struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); 271 struct sock *sk = &tsk->sk;
272 struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
259 u32 peer_port = tsk_peer_port(tsk); 273 u32 peer_port = tsk_peer_port(tsk);
260 u32 orig_node; 274 u32 orig_node;
261 u32 peer_node; 275 u32 peer_node;
262 276
263 if (unlikely(!tsk->connected)) 277 if (unlikely(!tipc_sk_connected(sk)))
264 return false; 278 return false;
265 279
266 if (unlikely(msg_origport(msg) != peer_port)) 280 if (unlikely(msg_origport(msg) != peer_port))
@@ -281,6 +295,45 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
281 return false; 295 return false;
282} 296}
283 297
298/* tipc_set_sk_state - set the sk_state of the socket
299 * @sk: socket
300 *
301 * Caller must hold socket lock
302 *
303 * Returns 0 on success, errno otherwise
304 */
305static int tipc_set_sk_state(struct sock *sk, int state)
306{
307 int oldsk_state = sk->sk_state;
308 int res = -EINVAL;
309
310 switch (state) {
311 case TIPC_OPEN:
312 res = 0;
313 break;
314 case TIPC_LISTEN:
315 case TIPC_CONNECTING:
316 if (oldsk_state == TIPC_OPEN)
317 res = 0;
318 break;
319 case TIPC_ESTABLISHED:
320 if (oldsk_state == TIPC_CONNECTING ||
321 oldsk_state == TIPC_OPEN)
322 res = 0;
323 break;
324 case TIPC_DISCONNECTING:
325 if (oldsk_state == TIPC_CONNECTING ||
326 oldsk_state == TIPC_ESTABLISHED)
327 res = 0;
328 break;
329 }
330
331 if (!res)
332 sk->sk_state = state;
333
334 return res;
335}
336
284/** 337/**
285 * tipc_sk_create - create a TIPC socket 338 * tipc_sk_create - create a TIPC socket
286 * @net: network namespace (must be default network) 339 * @net: network namespace (must be default network)
@@ -298,7 +351,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
298{ 351{
299 struct tipc_net *tn; 352 struct tipc_net *tn;
300 const struct proto_ops *ops; 353 const struct proto_ops *ops;
301 socket_state state;
302 struct sock *sk; 354 struct sock *sk;
303 struct tipc_sock *tsk; 355 struct tipc_sock *tsk;
304 struct tipc_msg *msg; 356 struct tipc_msg *msg;
@@ -310,16 +362,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
310 switch (sock->type) { 362 switch (sock->type) {
311 case SOCK_STREAM: 363 case SOCK_STREAM:
312 ops = &stream_ops; 364 ops = &stream_ops;
313 state = SS_UNCONNECTED;
314 break; 365 break;
315 case SOCK_SEQPACKET: 366 case SOCK_SEQPACKET:
316 ops = &packet_ops; 367 ops = &packet_ops;
317 state = SS_UNCONNECTED;
318 break; 368 break;
319 case SOCK_DGRAM: 369 case SOCK_DGRAM:
320 case SOCK_RDM: 370 case SOCK_RDM:
321 ops = &msg_ops; 371 ops = &msg_ops;
322 state = SS_READY;
323 break; 372 break;
324 default: 373 default:
325 return -EPROTOTYPE; 374 return -EPROTOTYPE;
@@ -340,14 +389,15 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
340 389
341 /* Finish initializing socket data structures */ 390 /* Finish initializing socket data structures */
342 sock->ops = ops; 391 sock->ops = ops;
343 sock->state = state;
344 sock_init_data(sock, sk); 392 sock_init_data(sock, sk);
393 tipc_set_sk_state(sk, TIPC_OPEN);
345 if (tipc_sk_insert(tsk)) { 394 if (tipc_sk_insert(tsk)) {
346 pr_warn("Socket create failed; port number exhausted\n"); 395 pr_warn("Socket create failed; port number exhausted\n");
347 return -EINVAL; 396 return -EINVAL;
348 } 397 }
349 msg_set_origport(msg, tsk->portid); 398 msg_set_origport(msg, tsk->portid);
350 setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); 399 setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
400 sk->sk_shutdown = 0;
351 sk->sk_backlog_rcv = tipc_backlog_rcv; 401 sk->sk_backlog_rcv = tipc_backlog_rcv;
352 sk->sk_rcvbuf = sysctl_tipc_rmem[1]; 402 sk->sk_rcvbuf = sysctl_tipc_rmem[1];
353 sk->sk_data_ready = tipc_data_ready; 403 sk->sk_data_ready = tipc_data_ready;
@@ -360,11 +410,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
360 tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); 410 tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
361 tsk->rcv_win = tsk->snd_win; 411 tsk->rcv_win = tsk->snd_win;
362 412
363 if (sock->state == SS_READY) { 413 if (tipc_sk_type_connectionless(sk)) {
364 tsk_set_unreturnable(tsk, true); 414 tsk_set_unreturnable(tsk, true);
365 if (sock->type == SOCK_DGRAM) 415 if (sock->type == SOCK_DGRAM)
366 tsk_set_unreliable(tsk, true); 416 tsk_set_unreliable(tsk, true);
367 } 417 }
418
368 return 0; 419 return 0;
369} 420}
370 421
@@ -375,6 +426,44 @@ static void tipc_sk_callback(struct rcu_head *head)
375 sock_put(&tsk->sk); 426 sock_put(&tsk->sk);
376} 427}
377 428
429/* Caller should hold socket lock for the socket. */
430static void __tipc_shutdown(struct socket *sock, int error)
431{
432 struct sock *sk = sock->sk;
433 struct tipc_sock *tsk = tipc_sk(sk);
434 struct net *net = sock_net(sk);
435 u32 dnode = tsk_peer_node(tsk);
436 struct sk_buff *skb;
437
438 /* Reject all unreceived messages, except on an active connection
439 * (which disconnects locally & sends a 'FIN+' to peer).
440 */
441 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
442 if (TIPC_SKB_CB(skb)->bytes_read) {
443 kfree_skb(skb);
444 } else {
445 if (!tipc_sk_type_connectionless(sk) &&
446 sk->sk_state != TIPC_DISCONNECTING) {
447 tipc_set_sk_state(sk, TIPC_DISCONNECTING);
448 tipc_node_remove_conn(net, dnode, tsk->portid);
449 }
450 tipc_sk_respond(sk, skb, error);
451 }
452 }
453 if (sk->sk_state != TIPC_DISCONNECTING) {
454 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
455 TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
456 tsk_own_node(tsk), tsk_peer_port(tsk),
457 tsk->portid, error);
458 if (skb)
459 tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
460 if (!tipc_sk_type_connectionless(sk)) {
461 tipc_node_remove_conn(net, dnode, tsk->portid);
462 tipc_set_sk_state(sk, TIPC_DISCONNECTING);
463 }
464 }
465}
466
378/** 467/**
379 * tipc_release - destroy a TIPC socket 468 * tipc_release - destroy a TIPC socket
380 * @sock: socket to destroy 469 * @sock: socket to destroy
@@ -394,10 +483,7 @@ static void tipc_sk_callback(struct rcu_head *head)
394static int tipc_release(struct socket *sock) 483static int tipc_release(struct socket *sock)
395{ 484{
396 struct sock *sk = sock->sk; 485 struct sock *sk = sock->sk;
397 struct net *net;
398 struct tipc_sock *tsk; 486 struct tipc_sock *tsk;
399 struct sk_buff *skb;
400 u32 dnode;
401 487
402 /* 488 /*
403 * Exit if socket isn't fully initialized (occurs when a failed accept() 489 * Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -406,47 +492,16 @@ static int tipc_release(struct socket *sock)
406 if (sk == NULL) 492 if (sk == NULL)
407 return 0; 493 return 0;
408 494
409 net = sock_net(sk);
410 tsk = tipc_sk(sk); 495 tsk = tipc_sk(sk);
411 lock_sock(sk); 496 lock_sock(sk);
412 497
413 /* 498 __tipc_shutdown(sock, TIPC_ERR_NO_PORT);
414 * Reject all unreceived messages, except on an active connection 499 sk->sk_shutdown = SHUTDOWN_MASK;
415 * (which disconnects locally & sends a 'FIN+' to peer)
416 */
417 dnode = tsk_peer_node(tsk);
418 while (sock->state != SS_DISCONNECTING) {
419 skb = __skb_dequeue(&sk->sk_receive_queue);
420 if (skb == NULL)
421 break;
422 if (TIPC_SKB_CB(skb)->handle != NULL)
423 kfree_skb(skb);
424 else {
425 if ((sock->state == SS_CONNECTING) ||
426 (sock->state == SS_CONNECTED)) {
427 sock->state = SS_DISCONNECTING;
428 tsk->connected = 0;
429 tipc_node_remove_conn(net, dnode, tsk->portid);
430 }
431 tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
432 }
433 }
434
435 tipc_sk_withdraw(tsk, 0, NULL); 500 tipc_sk_withdraw(tsk, 0, NULL);
436 sk_stop_timer(sk, &sk->sk_timer); 501 sk_stop_timer(sk, &sk->sk_timer);
437 tipc_sk_remove(tsk); 502 tipc_sk_remove(tsk);
438 if (tsk->connected) {
439 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
440 TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
441 tsk_own_node(tsk), tsk_peer_port(tsk),
442 tsk->portid, TIPC_ERR_NO_PORT);
443 if (skb)
444 tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
445 tipc_node_remove_conn(net, dnode, tsk->portid);
446 }
447 503
448 /* Reject any messages that accumulated in backlog queue */ 504 /* Reject any messages that accumulated in backlog queue */
449 sock->state = SS_DISCONNECTING;
450 release_sock(sk); 505 release_sock(sk);
451 506
452 call_rcu(&tsk->rcu, tipc_sk_callback); 507 call_rcu(&tsk->rcu, tipc_sk_callback);
@@ -532,13 +587,14 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
532 int *uaddr_len, int peer) 587 int *uaddr_len, int peer)
533{ 588{
534 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 589 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
535 struct tipc_sock *tsk = tipc_sk(sock->sk); 590 struct sock *sk = sock->sk;
591 struct tipc_sock *tsk = tipc_sk(sk);
536 struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); 592 struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
537 593
538 memset(addr, 0, sizeof(*addr)); 594 memset(addr, 0, sizeof(*addr));
539 if (peer) { 595 if (peer) {
540 if ((sock->state != SS_CONNECTED) && 596 if ((!tipc_sk_connected(sk)) &&
541 ((peer != 2) || (sock->state != SS_DISCONNECTING))) 597 ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
542 return -ENOTCONN; 598 return -ENOTCONN;
543 addr->addr.id.ref = tsk_peer_port(tsk); 599 addr->addr.id.ref = tsk_peer_port(tsk);
544 addr->addr.id.node = tsk_peer_node(tsk); 600 addr->addr.id.node = tsk_peer_node(tsk);
@@ -570,28 +626,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
570 * exits. TCP and other protocols seem to rely on higher level poll routines 626 * exits. TCP and other protocols seem to rely on higher level poll routines
571 * to handle any preventable race conditions, so TIPC will do the same ... 627 * to handle any preventable race conditions, so TIPC will do the same ...
572 * 628 *
573 * TIPC sets the returned events as follows:
574 *
575 * socket state flags set
576 * ------------ ---------
577 * unconnected no read flags
578 * POLLOUT if port is not congested
579 *
580 * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
581 * no write flags
582 *
583 * connected POLLIN/POLLRDNORM if data in rx queue
584 * POLLOUT if port is not congested
585 *
586 * disconnecting POLLIN/POLLRDNORM/POLLHUP
587 * no write flags
588 *
589 * listening POLLIN if SYN in rx queue
590 * no write flags
591 *
592 * ready POLLIN/POLLRDNORM if data in rx queue
593 * [connectionless] POLLOUT (since port cannot be congested)
594 *
595 * IMPORTANT: The fact that a read or write operation is indicated does NOT 629 * IMPORTANT: The fact that a read or write operation is indicated does NOT
596 * imply that the operation will succeed, merely that it should be performed 630 * imply that the operation will succeed, merely that it should be performed
597 * and will not block. 631 * and will not block.
@@ -605,22 +639,29 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
605 639
606 sock_poll_wait(file, sk_sleep(sk), wait); 640 sock_poll_wait(file, sk_sleep(sk), wait);
607 641
608 switch ((int)sock->state) { 642 if (sk->sk_shutdown & RCV_SHUTDOWN)
609 case SS_UNCONNECTED: 643 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
610 if (!tsk->link_cong) 644 if (sk->sk_shutdown == SHUTDOWN_MASK)
611 mask |= POLLOUT; 645 mask |= POLLHUP;
612 break; 646
613 case SS_READY: 647 switch (sk->sk_state) {
614 case SS_CONNECTED: 648 case TIPC_ESTABLISHED:
615 if (!tsk->link_cong && !tsk_conn_cong(tsk)) 649 if (!tsk->link_cong && !tsk_conn_cong(tsk))
616 mask |= POLLOUT; 650 mask |= POLLOUT;
617 /* fall thru' */ 651 /* fall thru' */
618 case SS_CONNECTING: 652 case TIPC_LISTEN:
619 case SS_LISTENING: 653 case TIPC_CONNECTING:
620 if (!skb_queue_empty(&sk->sk_receive_queue)) 654 if (!skb_queue_empty(&sk->sk_receive_queue))
621 mask |= (POLLIN | POLLRDNORM); 655 mask |= (POLLIN | POLLRDNORM);
622 break; 656 break;
623 case SS_DISCONNECTING: 657 case TIPC_OPEN:
658 if (!tsk->link_cong)
659 mask |= POLLOUT;
660 if (tipc_sk_type_connectionless(sk) &&
661 (!skb_queue_empty(&sk->sk_receive_queue)))
662 mask |= (POLLIN | POLLRDNORM);
663 break;
664 case TIPC_DISCONNECTING:
624 mask = (POLLIN | POLLRDNORM | POLLHUP); 665 mask = (POLLIN | POLLRDNORM | POLLHUP);
625 break; 666 break;
626 } 667 }
@@ -651,6 +692,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
651 uint mtu; 692 uint mtu;
652 int rc; 693 int rc;
653 694
695 if (!timeo && tsk->link_cong)
696 return -ELINKCONG;
697
654 msg_set_type(mhdr, TIPC_MCAST_MSG); 698 msg_set_type(mhdr, TIPC_MCAST_MSG);
655 msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); 699 msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
656 msg_set_destport(mhdr, 0); 700 msg_set_destport(mhdr, 0);
@@ -763,7 +807,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
763 if (!tsk_peer_msg(tsk, hdr)) 807 if (!tsk_peer_msg(tsk, hdr))
764 goto exit; 808 goto exit;
765 809
766 tsk->probing_state = TIPC_CONN_OK; 810 tsk->probe_unacked = false;
767 811
768 if (mtyp == CONN_PROBE) { 812 if (mtyp == CONN_PROBE) {
769 msg_set_type(hdr, CONN_PROBE_REPLY); 813 msg_set_type(hdr, CONN_PROBE_REPLY);
@@ -786,25 +830,25 @@ exit:
786 830
787static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) 831static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
788{ 832{
833 DEFINE_WAIT_FUNC(wait, woken_wake_function);
789 struct sock *sk = sock->sk; 834 struct sock *sk = sock->sk;
790 struct tipc_sock *tsk = tipc_sk(sk); 835 struct tipc_sock *tsk = tipc_sk(sk);
791 DEFINE_WAIT(wait);
792 int done; 836 int done;
793 837
794 do { 838 do {
795 int err = sock_error(sk); 839 int err = sock_error(sk);
796 if (err) 840 if (err)
797 return err; 841 return err;
798 if (sock->state == SS_DISCONNECTING) 842 if (sk->sk_shutdown & SEND_SHUTDOWN)
799 return -EPIPE; 843 return -EPIPE;
800 if (!*timeo_p) 844 if (!*timeo_p)
801 return -EAGAIN; 845 return -EAGAIN;
802 if (signal_pending(current)) 846 if (signal_pending(current))
803 return sock_intr_errno(*timeo_p); 847 return sock_intr_errno(*timeo_p);
804 848
805 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 849 add_wait_queue(sk_sleep(sk), &wait);
806 done = sk_wait_event(sk, timeo_p, !tsk->link_cong); 850 done = sk_wait_event(sk, timeo_p, !tsk->link_cong, &wait);
807 finish_wait(sk_sleep(sk), &wait); 851 remove_wait_queue(sk_sleep(sk), &wait);
808 } while (!done); 852 } while (!done);
809 return 0; 853 return 0;
810} 854}
@@ -844,6 +888,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
844 struct tipc_msg *mhdr = &tsk->phdr; 888 struct tipc_msg *mhdr = &tsk->phdr;
845 u32 dnode, dport; 889 u32 dnode, dport;
846 struct sk_buff_head pktchain; 890 struct sk_buff_head pktchain;
891 bool is_connectionless = tipc_sk_type_connectionless(sk);
847 struct sk_buff *skb; 892 struct sk_buff *skb;
848 struct tipc_name_seq *seq; 893 struct tipc_name_seq *seq;
849 struct iov_iter save; 894 struct iov_iter save;
@@ -854,18 +899,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
854 if (dsz > TIPC_MAX_USER_MSG_SIZE) 899 if (dsz > TIPC_MAX_USER_MSG_SIZE)
855 return -EMSGSIZE; 900 return -EMSGSIZE;
856 if (unlikely(!dest)) { 901 if (unlikely(!dest)) {
857 if (tsk->connected && sock->state == SS_READY) 902 if (is_connectionless && tsk->peer.family == AF_TIPC)
858 dest = &tsk->remote; 903 dest = &tsk->peer;
859 else 904 else
860 return -EDESTADDRREQ; 905 return -EDESTADDRREQ;
861 } else if (unlikely(m->msg_namelen < sizeof(*dest)) || 906 } else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
862 dest->family != AF_TIPC) { 907 dest->family != AF_TIPC) {
863 return -EINVAL; 908 return -EINVAL;
864 } 909 }
865 if (unlikely(sock->state != SS_READY)) { 910 if (!is_connectionless) {
866 if (sock->state == SS_LISTENING) 911 if (sk->sk_state == TIPC_LISTEN)
867 return -EPIPE; 912 return -EPIPE;
868 if (sock->state != SS_UNCONNECTED) 913 if (sk->sk_state != TIPC_OPEN)
869 return -EISCONN; 914 return -EISCONN;
870 if (tsk->published) 915 if (tsk->published)
871 return -EOPNOTSUPP; 916 return -EOPNOTSUPP;
@@ -917,8 +962,8 @@ new_mtu:
917 TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; 962 TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
918 rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); 963 rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
919 if (likely(!rc)) { 964 if (likely(!rc)) {
920 if (sock->state != SS_READY) 965 if (!is_connectionless)
921 sock->state = SS_CONNECTING; 966 tipc_set_sk_state(sk, TIPC_CONNECTING);
922 return dsz; 967 return dsz;
923 } 968 }
924 if (rc == -ELINKCONG) { 969 if (rc == -ELINKCONG) {
@@ -940,30 +985,30 @@ new_mtu:
940 985
941static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) 986static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
942{ 987{
988 DEFINE_WAIT_FUNC(wait, woken_wake_function);
943 struct sock *sk = sock->sk; 989 struct sock *sk = sock->sk;
944 struct tipc_sock *tsk = tipc_sk(sk); 990 struct tipc_sock *tsk = tipc_sk(sk);
945 DEFINE_WAIT(wait);
946 int done; 991 int done;
947 992
948 do { 993 do {
949 int err = sock_error(sk); 994 int err = sock_error(sk);
950 if (err) 995 if (err)
951 return err; 996 return err;
952 if (sock->state == SS_DISCONNECTING) 997 if (sk->sk_state == TIPC_DISCONNECTING)
953 return -EPIPE; 998 return -EPIPE;
954 else if (sock->state != SS_CONNECTED) 999 else if (!tipc_sk_connected(sk))
955 return -ENOTCONN; 1000 return -ENOTCONN;
956 if (!*timeo_p) 1001 if (!*timeo_p)
957 return -EAGAIN; 1002 return -EAGAIN;
958 if (signal_pending(current)) 1003 if (signal_pending(current))
959 return sock_intr_errno(*timeo_p); 1004 return sock_intr_errno(*timeo_p);
960 1005
961 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1006 add_wait_queue(sk_sleep(sk), &wait);
962 done = sk_wait_event(sk, timeo_p, 1007 done = sk_wait_event(sk, timeo_p,
963 (!tsk->link_cong && 1008 (!tsk->link_cong &&
964 !tsk_conn_cong(tsk)) || 1009 !tsk_conn_cong(tsk)) ||
965 !tsk->connected); 1010 !tipc_sk_connected(sk), &wait);
966 finish_wait(sk_sleep(sk), &wait); 1011 remove_wait_queue(sk_sleep(sk), &wait);
967 } while (!done); 1012 } while (!done);
968 return 0; 1013 return 0;
969} 1014}
@@ -1018,14 +1063,17 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
1018 if (dsz > (uint)INT_MAX) 1063 if (dsz > (uint)INT_MAX)
1019 return -EMSGSIZE; 1064 return -EMSGSIZE;
1020 1065
1021 if (unlikely(sock->state != SS_CONNECTED)) { 1066 if (unlikely(!tipc_sk_connected(sk))) {
1022 if (sock->state == SS_DISCONNECTING) 1067 if (sk->sk_state == TIPC_DISCONNECTING)
1023 return -EPIPE; 1068 return -EPIPE;
1024 else 1069 else
1025 return -ENOTCONN; 1070 return -ENOTCONN;
1026 } 1071 }
1027 1072
1028 timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); 1073 timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
1074 if (!timeo && tsk->link_cong)
1075 return -ELINKCONG;
1076
1029 dnode = tsk_peer_node(tsk); 1077 dnode = tsk_peer_node(tsk);
1030 skb_queue_head_init(&pktchain); 1078 skb_queue_head_init(&pktchain);
1031 1079
@@ -1099,10 +1147,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1099 msg_set_lookup_scope(msg, 0); 1147 msg_set_lookup_scope(msg, 0);
1100 msg_set_hdr_sz(msg, SHORT_H_SIZE); 1148 msg_set_hdr_sz(msg, SHORT_H_SIZE);
1101 1149
1102 tsk->probing_intv = CONN_PROBING_INTERVAL; 1150 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
1103 tsk->probing_state = TIPC_CONN_OK; 1151 tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1104 tsk->connected = 1;
1105 sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
1106 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); 1152 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
1107 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); 1153 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1108 tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); 1154 tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
@@ -1210,13 +1256,14 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
1210 1256
1211static void tipc_sk_send_ack(struct tipc_sock *tsk) 1257static void tipc_sk_send_ack(struct tipc_sock *tsk)
1212{ 1258{
1213 struct net *net = sock_net(&tsk->sk); 1259 struct sock *sk = &tsk->sk;
1260 struct net *net = sock_net(sk);
1214 struct sk_buff *skb = NULL; 1261 struct sk_buff *skb = NULL;
1215 struct tipc_msg *msg; 1262 struct tipc_msg *msg;
1216 u32 peer_port = tsk_peer_port(tsk); 1263 u32 peer_port = tsk_peer_port(tsk);
1217 u32 dnode = tsk_peer_node(tsk); 1264 u32 dnode = tsk_peer_node(tsk);
1218 1265
1219 if (!tsk->connected) 1266 if (!tipc_sk_connected(sk))
1220 return; 1267 return;
1221 skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, 1268 skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
1222 dnode, tsk_own_node(tsk), peer_port, 1269 dnode, tsk_own_node(tsk), peer_port,
@@ -1245,7 +1292,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
1245 for (;;) { 1292 for (;;) {
1246 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1293 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1247 if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { 1294 if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1248 if (sock->state == SS_DISCONNECTING) { 1295 if (sk->sk_shutdown & RCV_SHUTDOWN) {
1249 err = -ENOTCONN; 1296 err = -ENOTCONN;
1250 break; 1297 break;
1251 } 1298 }
@@ -1286,6 +1333,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
1286 struct tipc_sock *tsk = tipc_sk(sk); 1333 struct tipc_sock *tsk = tipc_sk(sk);
1287 struct sk_buff *buf; 1334 struct sk_buff *buf;
1288 struct tipc_msg *msg; 1335 struct tipc_msg *msg;
1336 bool is_connectionless = tipc_sk_type_connectionless(sk);
1289 long timeo; 1337 long timeo;
1290 unsigned int sz; 1338 unsigned int sz;
1291 u32 err; 1339 u32 err;
@@ -1297,7 +1345,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
1297 1345
1298 lock_sock(sk); 1346 lock_sock(sk);
1299 1347
1300 if (unlikely(sock->state == SS_UNCONNECTED)) { 1348 if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) {
1301 res = -ENOTCONN; 1349 res = -ENOTCONN;
1302 goto exit; 1350 goto exit;
1303 } 1351 }
@@ -1342,8 +1390,8 @@ restart:
1342 goto exit; 1390 goto exit;
1343 res = sz; 1391 res = sz;
1344 } else { 1392 } else {
1345 if ((sock->state == SS_READY) || 1393 if (is_connectionless || err == TIPC_CONN_SHUTDOWN ||
1346 ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)) 1394 m->msg_control)
1347 res = 0; 1395 res = 0;
1348 else 1396 else
1349 res = -ECONNRESET; 1397 res = -ECONNRESET;
@@ -1352,7 +1400,7 @@ restart:
1352 if (unlikely(flags & MSG_PEEK)) 1400 if (unlikely(flags & MSG_PEEK))
1353 goto exit; 1401 goto exit;
1354 1402
1355 if (likely(sock->state != SS_READY)) { 1403 if (likely(!is_connectionless)) {
1356 tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); 1404 tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
1357 if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) 1405 if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
1358 tipc_sk_send_ack(tsk); 1406 tipc_sk_send_ack(tsk);
@@ -1383,7 +1431,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
1383 struct tipc_msg *msg; 1431 struct tipc_msg *msg;
1384 long timeo; 1432 long timeo;
1385 unsigned int sz; 1433 unsigned int sz;
1386 int sz_to_copy, target, needed; 1434 int target;
1387 int sz_copied = 0; 1435 int sz_copied = 0;
1388 u32 err; 1436 u32 err;
1389 int res = 0, hlen; 1437 int res = 0, hlen;
@@ -1394,7 +1442,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
1394 1442
1395 lock_sock(sk); 1443 lock_sock(sk);
1396 1444
1397 if (unlikely(sock->state == SS_UNCONNECTED)) { 1445 if (unlikely(sk->sk_state == TIPC_OPEN)) {
1398 res = -ENOTCONN; 1446 res = -ENOTCONN;
1399 goto exit; 1447 goto exit;
1400 } 1448 }
@@ -1431,11 +1479,13 @@ restart:
1431 1479
1432 /* Capture message data (if valid) & compute return value (always) */ 1480 /* Capture message data (if valid) & compute return value (always) */
1433 if (!err) { 1481 if (!err) {
1434 u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); 1482 u32 offset = TIPC_SKB_CB(buf)->bytes_read;
1483 u32 needed;
1484 int sz_to_copy;
1435 1485
1436 sz -= offset; 1486 sz -= offset;
1437 needed = (buf_len - sz_copied); 1487 needed = (buf_len - sz_copied);
1438 sz_to_copy = (sz <= needed) ? sz : needed; 1488 sz_to_copy = min(sz, needed);
1439 1489
1440 res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); 1490 res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
1441 if (res) 1491 if (res)
@@ -1445,8 +1495,8 @@ restart:
1445 1495
1446 if (sz_to_copy < sz) { 1496 if (sz_to_copy < sz) {
1447 if (!(flags & MSG_PEEK)) 1497 if (!(flags & MSG_PEEK))
1448 TIPC_SKB_CB(buf)->handle = 1498 TIPC_SKB_CB(buf)->bytes_read =
1449 (void *)(unsigned long)(offset + sz_to_copy); 1499 offset + sz_to_copy;
1450 goto exit; 1500 goto exit;
1451 } 1501 }
1452 } else { 1502 } else {
@@ -1528,49 +1578,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1528{ 1578{
1529 struct sock *sk = &tsk->sk; 1579 struct sock *sk = &tsk->sk;
1530 struct net *net = sock_net(sk); 1580 struct net *net = sock_net(sk);
1531 struct socket *sock = sk->sk_socket;
1532 struct tipc_msg *hdr = buf_msg(skb); 1581 struct tipc_msg *hdr = buf_msg(skb);
1533 1582
1534 if (unlikely(msg_mcast(hdr))) 1583 if (unlikely(msg_mcast(hdr)))
1535 return false; 1584 return false;
1536 1585
1537 switch ((int)sock->state) { 1586 switch (sk->sk_state) {
1538 case SS_CONNECTED: 1587 case TIPC_CONNECTING:
1539
1540 /* Accept only connection-based messages sent by peer */
1541 if (unlikely(!tsk_peer_msg(tsk, hdr)))
1542 return false;
1543
1544 if (unlikely(msg_errcode(hdr))) {
1545 sock->state = SS_DISCONNECTING;
1546 tsk->connected = 0;
1547 /* Let timer expire on it's own */
1548 tipc_node_remove_conn(net, tsk_peer_node(tsk),
1549 tsk->portid);
1550 }
1551 return true;
1552
1553 case SS_CONNECTING:
1554
1555 /* Accept only ACK or NACK message */ 1588 /* Accept only ACK or NACK message */
1556 if (unlikely(!msg_connected(hdr))) 1589 if (unlikely(!msg_connected(hdr)))
1557 return false; 1590 return false;
1558 1591
1559 if (unlikely(msg_errcode(hdr))) { 1592 if (unlikely(msg_errcode(hdr))) {
1560 sock->state = SS_DISCONNECTING; 1593 tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1561 sk->sk_err = ECONNREFUSED; 1594 sk->sk_err = ECONNREFUSED;
1562 return true; 1595 return true;
1563 } 1596 }
1564 1597
1565 if (unlikely(!msg_isdata(hdr))) { 1598 if (unlikely(!msg_isdata(hdr))) {
1566 sock->state = SS_DISCONNECTING; 1599 tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1567 sk->sk_err = EINVAL; 1600 sk->sk_err = EINVAL;
1568 return true; 1601 return true;
1569 } 1602 }
1570 1603
1571 tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr)); 1604 tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
1572 msg_set_importance(&tsk->phdr, msg_importance(hdr)); 1605 msg_set_importance(&tsk->phdr, msg_importance(hdr));
1573 sock->state = SS_CONNECTED;
1574 1606
1575 /* If 'ACK+' message, add to socket receive queue */ 1607 /* If 'ACK+' message, add to socket receive queue */
1576 if (msg_data_sz(hdr)) 1608 if (msg_data_sz(hdr))
@@ -1584,18 +1616,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1584 msg_set_dest_droppable(hdr, 1); 1616 msg_set_dest_droppable(hdr, 1);
1585 return false; 1617 return false;
1586 1618
1587 case SS_LISTENING: 1619 case TIPC_OPEN:
1588 case SS_UNCONNECTED: 1620 case TIPC_DISCONNECTING:
1589 1621 break;
1622 case TIPC_LISTEN:
1590 /* Accept only SYN message */ 1623 /* Accept only SYN message */
1591 if (!msg_connected(hdr) && !(msg_errcode(hdr))) 1624 if (!msg_connected(hdr) && !(msg_errcode(hdr)))
1592 return true; 1625 return true;
1593 break; 1626 break;
1594 case SS_DISCONNECTING: 1627 case TIPC_ESTABLISHED:
1595 break; 1628 /* Accept only connection-based messages sent by peer */
1629 if (unlikely(!tsk_peer_msg(tsk, hdr)))
1630 return false;
1631
1632 if (unlikely(msg_errcode(hdr))) {
1633 tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1634 /* Let timer expire on it's own */
1635 tipc_node_remove_conn(net, tsk_peer_node(tsk),
1636 tsk->portid);
1637 sk->sk_state_change(sk);
1638 }
1639 return true;
1596 default: 1640 default:
1597 pr_err("Unknown socket state %u\n", sock->state); 1641 pr_err("Unknown sk_state %u\n", sk->sk_state);
1598 } 1642 }
1643
1599 return false; 1644 return false;
1600} 1645}
1601 1646
@@ -1646,7 +1691,6 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1646static bool filter_rcv(struct sock *sk, struct sk_buff *skb, 1691static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
1647 struct sk_buff_head *xmitq) 1692 struct sk_buff_head *xmitq)
1648{ 1693{
1649 struct socket *sock = sk->sk_socket;
1650 struct tipc_sock *tsk = tipc_sk(sk); 1694 struct tipc_sock *tsk = tipc_sk(sk);
1651 struct tipc_msg *hdr = buf_msg(skb); 1695 struct tipc_msg *hdr = buf_msg(skb);
1652 unsigned int limit = rcvbuf_limit(sk, skb); 1696 unsigned int limit = rcvbuf_limit(sk, skb);
@@ -1672,7 +1716,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
1672 } 1716 }
1673 1717
1674 /* Reject if wrong message type for current socket state */ 1718 /* Reject if wrong message type for current socket state */
1675 if (unlikely(sock->state == SS_READY)) { 1719 if (tipc_sk_type_connectionless(sk)) {
1676 if (msg_connected(hdr)) { 1720 if (msg_connected(hdr)) {
1677 err = TIPC_ERR_NO_PORT; 1721 err = TIPC_ERR_NO_PORT;
1678 goto reject; 1722 goto reject;
@@ -1689,7 +1733,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
1689 } 1733 }
1690 1734
1691 /* Enqueue message */ 1735 /* Enqueue message */
1692 TIPC_SKB_CB(skb)->handle = NULL; 1736 TIPC_SKB_CB(skb)->bytes_read = 0;
1693 __skb_queue_tail(&sk->sk_receive_queue, skb); 1737 __skb_queue_tail(&sk->sk_receive_queue, skb);
1694 skb_set_owner_r(skb, sk); 1738 skb_set_owner_r(skb, sk);
1695 1739
@@ -1839,8 +1883,8 @@ xmit:
1839 1883
1840static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) 1884static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
1841{ 1885{
1886 DEFINE_WAIT_FUNC(wait, woken_wake_function);
1842 struct sock *sk = sock->sk; 1887 struct sock *sk = sock->sk;
1843 DEFINE_WAIT(wait);
1844 int done; 1888 int done;
1845 1889
1846 do { 1890 do {
@@ -1852,9 +1896,10 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
1852 if (signal_pending(current)) 1896 if (signal_pending(current))
1853 return sock_intr_errno(*timeo_p); 1897 return sock_intr_errno(*timeo_p);
1854 1898
1855 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1899 add_wait_queue(sk_sleep(sk), &wait);
1856 done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING); 1900 done = sk_wait_event(sk, timeo_p,
1857 finish_wait(sk_sleep(sk), &wait); 1901 sk->sk_state != TIPC_CONNECTING, &wait);
1902 remove_wait_queue(sk_sleep(sk), &wait);
1858 } while (!done); 1903 } while (!done);
1859 return 0; 1904 return 0;
1860} 1905}
@@ -1876,21 +1921,19 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
1876 struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; 1921 struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
1877 struct msghdr m = {NULL,}; 1922 struct msghdr m = {NULL,};
1878 long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; 1923 long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
1879 socket_state previous; 1924 int previous;
1880 int res = 0; 1925 int res = 0;
1881 1926
1882 lock_sock(sk); 1927 lock_sock(sk);
1883 1928
1884 /* DGRAM/RDM connect(), just save the destaddr */ 1929 /* DGRAM/RDM connect(), just save the destaddr */
1885 if (sock->state == SS_READY) { 1930 if (tipc_sk_type_connectionless(sk)) {
1886 if (dst->family == AF_UNSPEC) { 1931 if (dst->family == AF_UNSPEC) {
1887 memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); 1932 memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1888 tsk->connected = 0;
1889 } else if (destlen != sizeof(struct sockaddr_tipc)) { 1933 } else if (destlen != sizeof(struct sockaddr_tipc)) {
1890 res = -EINVAL; 1934 res = -EINVAL;
1891 } else { 1935 } else {
1892 memcpy(&tsk->remote, dest, destlen); 1936 memcpy(&tsk->peer, dest, destlen);
1893 tsk->connected = 1;
1894 } 1937 }
1895 goto exit; 1938 goto exit;
1896 } 1939 }
@@ -1906,9 +1949,10 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
1906 goto exit; 1949 goto exit;
1907 } 1950 }
1908 1951
1909 previous = sock->state; 1952 previous = sk->sk_state;
1910 switch (sock->state) { 1953
1911 case SS_UNCONNECTED: 1954 switch (sk->sk_state) {
1955 case TIPC_OPEN:
1912 /* Send a 'SYN-' to destination */ 1956 /* Send a 'SYN-' to destination */
1913 m.msg_name = dest; 1957 m.msg_name = dest;
1914 m.msg_namelen = destlen; 1958 m.msg_namelen = destlen;
@@ -1923,27 +1967,29 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
1923 if ((res < 0) && (res != -EWOULDBLOCK)) 1967 if ((res < 0) && (res != -EWOULDBLOCK))
1924 goto exit; 1968 goto exit;
1925 1969
1926 /* Just entered SS_CONNECTING state; the only 1970 /* Just entered TIPC_CONNECTING state; the only
1927 * difference is that return value in non-blocking 1971 * difference is that return value in non-blocking
1928 * case is EINPROGRESS, rather than EALREADY. 1972 * case is EINPROGRESS, rather than EALREADY.
1929 */ 1973 */
1930 res = -EINPROGRESS; 1974 res = -EINPROGRESS;
1931 case SS_CONNECTING: 1975 /* fall thru' */
1932 if (previous == SS_CONNECTING) 1976 case TIPC_CONNECTING:
1933 res = -EALREADY; 1977 if (!timeout) {
1934 if (!timeout) 1978 if (previous == TIPC_CONNECTING)
1979 res = -EALREADY;
1935 goto exit; 1980 goto exit;
1981 }
1936 timeout = msecs_to_jiffies(timeout); 1982 timeout = msecs_to_jiffies(timeout);
1937 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 1983 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1938 res = tipc_wait_for_connect(sock, &timeout); 1984 res = tipc_wait_for_connect(sock, &timeout);
1939 break; 1985 break;
1940 case SS_CONNECTED: 1986 case TIPC_ESTABLISHED:
1941 res = -EISCONN; 1987 res = -EISCONN;
1942 break; 1988 break;
1943 default: 1989 default:
1944 res = -EINVAL; 1990 res = -EINVAL;
1945 break;
1946 } 1991 }
1992
1947exit: 1993exit:
1948 release_sock(sk); 1994 release_sock(sk);
1949 return res; 1995 return res;
@@ -1962,15 +2008,9 @@ static int tipc_listen(struct socket *sock, int len)
1962 int res; 2008 int res;
1963 2009
1964 lock_sock(sk); 2010 lock_sock(sk);
1965 2011 res = tipc_set_sk_state(sk, TIPC_LISTEN);
1966 if (sock->state != SS_UNCONNECTED)
1967 res = -EINVAL;
1968 else {
1969 sock->state = SS_LISTENING;
1970 res = 0;
1971 }
1972
1973 release_sock(sk); 2012 release_sock(sk);
2013
1974 return res; 2014 return res;
1975} 2015}
1976 2016
@@ -1996,9 +2036,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
1996 err = 0; 2036 err = 0;
1997 if (!skb_queue_empty(&sk->sk_receive_queue)) 2037 if (!skb_queue_empty(&sk->sk_receive_queue))
1998 break; 2038 break;
1999 err = -EINVAL;
2000 if (sock->state != SS_LISTENING)
2001 break;
2002 err = -EAGAIN; 2039 err = -EAGAIN;
2003 if (!timeo) 2040 if (!timeo)
2004 break; 2041 break;
@@ -2029,7 +2066,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
2029 2066
2030 lock_sock(sk); 2067 lock_sock(sk);
2031 2068
2032 if (sock->state != SS_LISTENING) { 2069 if (sk->sk_state != TIPC_LISTEN) {
2033 res = -EINVAL; 2070 res = -EINVAL;
2034 goto exit; 2071 goto exit;
2035 } 2072 }
@@ -2040,7 +2077,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
2040 2077
2041 buf = skb_peek(&sk->sk_receive_queue); 2078 buf = skb_peek(&sk->sk_receive_queue);
2042 2079
2043 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); 2080 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
2044 if (res) 2081 if (res)
2045 goto exit; 2082 goto exit;
2046 security_sk_clone(sock->sk, new_sock->sk); 2083 security_sk_clone(sock->sk, new_sock->sk);
@@ -2060,7 +2097,6 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
2060 2097
2061 /* Connect new socket to it's peer */ 2098 /* Connect new socket to it's peer */
2062 tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); 2099 tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2063 new_sock->state = SS_CONNECTED;
2064 2100
2065 tsk_set_importance(new_tsock, msg_importance(msg)); 2101 tsk_set_importance(new_tsock, msg_importance(msg));
2066 if (msg_named(msg)) { 2102 if (msg_named(msg)) {
@@ -2100,13 +2136,6 @@ exit:
2100static int tipc_shutdown(struct socket *sock, int how) 2136static int tipc_shutdown(struct socket *sock, int how)
2101{ 2137{
2102 struct sock *sk = sock->sk; 2138 struct sock *sk = sock->sk;
2103 struct net *net = sock_net(sk);
2104 struct tipc_sock *tsk = tipc_sk(sk);
2105 struct sk_buff *skb;
2106 u32 dnode = tsk_peer_node(tsk);
2107 u32 dport = tsk_peer_port(tsk);
2108 u32 onode = tipc_own_addr(net);
2109 u32 oport = tsk->portid;
2110 int res; 2139 int res;
2111 2140
2112 if (how != SHUT_RDWR) 2141 if (how != SHUT_RDWR)
@@ -2114,45 +2143,17 @@ static int tipc_shutdown(struct socket *sock, int how)
2114 2143
2115 lock_sock(sk); 2144 lock_sock(sk);
2116 2145
2117 switch (sock->state) { 2146 __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
2118 case SS_CONNECTING: 2147 sk->sk_shutdown = SEND_SHUTDOWN;
2119 case SS_CONNECTED:
2120
2121restart:
2122 dnode = tsk_peer_node(tsk);
2123
2124 /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
2125 skb = __skb_dequeue(&sk->sk_receive_queue);
2126 if (skb) {
2127 if (TIPC_SKB_CB(skb)->handle != NULL) {
2128 kfree_skb(skb);
2129 goto restart;
2130 }
2131 tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN);
2132 } else {
2133 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
2134 TIPC_CONN_MSG, SHORT_H_SIZE,
2135 0, dnode, onode, dport, oport,
2136 TIPC_CONN_SHUTDOWN);
2137 if (skb)
2138 tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
2139 }
2140 tsk->connected = 0;
2141 sock->state = SS_DISCONNECTING;
2142 tipc_node_remove_conn(net, dnode, tsk->portid);
2143 /* fall through */
2144
2145 case SS_DISCONNECTING:
2146 2148
2149 if (sk->sk_state == TIPC_DISCONNECTING) {
2147 /* Discard any unreceived messages */ 2150 /* Discard any unreceived messages */
2148 __skb_queue_purge(&sk->sk_receive_queue); 2151 __skb_queue_purge(&sk->sk_receive_queue);
2149 2152
2150 /* Wake up anyone sleeping in poll */ 2153 /* Wake up anyone sleeping in poll */
2151 sk->sk_state_change(sk); 2154 sk->sk_state_change(sk);
2152 res = 0; 2155 res = 0;
2153 break; 2156 } else {
2154
2155 default:
2156 res = -ENOTCONN; 2157 res = -ENOTCONN;
2157 } 2158 }
2158 2159
@@ -2169,17 +2170,16 @@ static void tipc_sk_timeout(unsigned long data)
2169 u32 own_node = tsk_own_node(tsk); 2170 u32 own_node = tsk_own_node(tsk);
2170 2171
2171 bh_lock_sock(sk); 2172 bh_lock_sock(sk);
2172 if (!tsk->connected) { 2173 if (!tipc_sk_connected(sk)) {
2173 bh_unlock_sock(sk); 2174 bh_unlock_sock(sk);
2174 goto exit; 2175 goto exit;
2175 } 2176 }
2176 peer_port = tsk_peer_port(tsk); 2177 peer_port = tsk_peer_port(tsk);
2177 peer_node = tsk_peer_node(tsk); 2178 peer_node = tsk_peer_node(tsk);
2178 2179
2179 if (tsk->probing_state == TIPC_CONN_PROBING) { 2180 if (tsk->probe_unacked) {
2180 if (!sock_owned_by_user(sk)) { 2181 if (!sock_owned_by_user(sk)) {
2181 sk->sk_socket->state = SS_DISCONNECTING; 2182 tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2182 tsk->connected = 0;
2183 tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), 2183 tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
2184 tsk_peer_port(tsk)); 2184 tsk_peer_port(tsk));
2185 sk->sk_state_change(sk); 2185 sk->sk_state_change(sk);
@@ -2188,13 +2188,15 @@ static void tipc_sk_timeout(unsigned long data)
2188 sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); 2188 sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
2189 } 2189 }
2190 2190
2191 } else { 2191 bh_unlock_sock(sk);
2192 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, 2192 goto exit;
2193 INT_H_SIZE, 0, peer_node, own_node,
2194 peer_port, tsk->portid, TIPC_OK);
2195 tsk->probing_state = TIPC_CONN_PROBING;
2196 sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
2197 } 2193 }
2194
2195 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
2196 INT_H_SIZE, 0, peer_node, own_node,
2197 peer_port, tsk->portid, TIPC_OK);
2198 tsk->probe_unacked = true;
2199 sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
2198 bh_unlock_sock(sk); 2200 bh_unlock_sock(sk);
2199 if (skb) 2201 if (skb)
2200 tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); 2202 tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
@@ -2205,11 +2207,12 @@ exit:
2205static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 2207static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
2206 struct tipc_name_seq const *seq) 2208 struct tipc_name_seq const *seq)
2207{ 2209{
2208 struct net *net = sock_net(&tsk->sk); 2210 struct sock *sk = &tsk->sk;
2211 struct net *net = sock_net(sk);
2209 struct publication *publ; 2212 struct publication *publ;
2210 u32 key; 2213 u32 key;
2211 2214
2212 if (tsk->connected) 2215 if (tipc_sk_connected(sk))
2213 return -EINVAL; 2216 return -EINVAL;
2214 key = tsk->portid + tsk->pub_count + 1; 2217 key = tsk->portid + tsk->pub_count + 1;
2215 if (key == tsk->portid) 2218 if (key == tsk->portid)
@@ -2667,6 +2670,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
2667 struct nlattr *attrs; 2670 struct nlattr *attrs;
2668 struct net *net = sock_net(skb->sk); 2671 struct net *net = sock_net(skb->sk);
2669 struct tipc_net *tn = net_generic(net, tipc_net_id); 2672 struct tipc_net *tn = net_generic(net, tipc_net_id);
2673 struct sock *sk = &tsk->sk;
2670 2674
2671 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 2675 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2672 &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); 2676 &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -2681,7 +2685,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
2681 if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) 2685 if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
2682 goto attr_msg_cancel; 2686 goto attr_msg_cancel;
2683 2687
2684 if (tsk->connected) { 2688 if (tipc_sk_connected(sk)) {
2685 err = __tipc_nl_add_sk_con(skb, tsk); 2689 err = __tipc_nl_add_sk_con(skb, tsk);
2686 if (err) 2690 if (err)
2687 goto attr_msg_cancel; 2691 goto attr_msg_cancel;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2358f2690ec5..127656ebe7be 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -100,7 +100,7 @@
100#include <linux/in.h> 100#include <linux/in.h>
101#include <linux/fs.h> 101#include <linux/fs.h>
102#include <linux/slab.h> 102#include <linux/slab.h>
103#include <asm/uaccess.h> 103#include <linux/uaccess.h>
104#include <linux/skbuff.h> 104#include <linux/skbuff.h>
105#include <linux/netdevice.h> 105#include <linux/netdevice.h>
106#include <net/net_namespace.h> 106#include <net/net_namespace.h>
@@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
315 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 315 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316 struct dentry *dentry = unix_sk(s)->path.dentry; 316 struct dentry *dentry = unix_sk(s)->path.dentry;
317 317
318 if (dentry && d_real_inode(dentry) == i) { 318 if (dentry && d_backing_inode(dentry) == i) {
319 sock_hold(s); 319 sock_hold(s);
320 goto found; 320 goto found;
321 } 321 }
@@ -913,7 +913,7 @@ static struct sock *unix_find_other(struct net *net,
913 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); 913 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
914 if (err) 914 if (err)
915 goto fail; 915 goto fail;
916 inode = d_real_inode(path.dentry); 916 inode = d_backing_inode(path.dentry);
917 err = inode_permission(inode, MAY_WRITE); 917 err = inode_permission(inode, MAY_WRITE);
918 if (err) 918 if (err)
919 goto put_fail; 919 goto put_fail;
@@ -1040,7 +1040,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1040 goto out_up; 1040 goto out_up;
1041 } 1041 }
1042 addr->hash = UNIX_HASH_SIZE; 1042 addr->hash = UNIX_HASH_SIZE;
1043 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); 1043 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1044 spin_lock(&unix_table_lock); 1044 spin_lock(&unix_table_lock);
1045 u->path = path; 1045 u->path = path;
1046 list = &unix_socket_table[hash]; 1046 list = &unix_socket_table[hash];
@@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2113 mutex_lock(&u->iolock); 2113 mutex_lock(&u->iolock);
2114 2114
2115 skip = sk_peek_offset(sk, flags); 2115 skip = sk_peek_offset(sk, flags);
2116 skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, 2116 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2117 &last); 2117 &err, &last);
2118 if (skb) 2118 if (skb)
2119 break; 2119 break;
2120 2120
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 936d7eee62d0..6788264acc63 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -44,6 +44,10 @@ struct virtio_vsock {
44 spinlock_t send_pkt_list_lock; 44 spinlock_t send_pkt_list_lock;
45 struct list_head send_pkt_list; 45 struct list_head send_pkt_list;
46 46
47 struct work_struct loopback_work;
48 spinlock_t loopback_list_lock; /* protects loopback_list */
49 struct list_head loopback_list;
50
47 atomic_t queued_replies; 51 atomic_t queued_replies;
48 52
49 /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] 53 /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
@@ -74,6 +78,42 @@ static u32 virtio_transport_get_local_cid(void)
74 return vsock->guest_cid; 78 return vsock->guest_cid;
75} 79}
76 80
81static void virtio_transport_loopback_work(struct work_struct *work)
82{
83 struct virtio_vsock *vsock =
84 container_of(work, struct virtio_vsock, loopback_work);
85 LIST_HEAD(pkts);
86
87 spin_lock_bh(&vsock->loopback_list_lock);
88 list_splice_init(&vsock->loopback_list, &pkts);
89 spin_unlock_bh(&vsock->loopback_list_lock);
90
91 mutex_lock(&vsock->rx_lock);
92 while (!list_empty(&pkts)) {
93 struct virtio_vsock_pkt *pkt;
94
95 pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
96 list_del_init(&pkt->list);
97
98 virtio_transport_recv_pkt(pkt);
99 }
100 mutex_unlock(&vsock->rx_lock);
101}
102
103static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock,
104 struct virtio_vsock_pkt *pkt)
105{
106 int len = pkt->len;
107
108 spin_lock_bh(&vsock->loopback_list_lock);
109 list_add_tail(&pkt->list, &vsock->loopback_list);
110 spin_unlock_bh(&vsock->loopback_list_lock);
111
112 queue_work(virtio_vsock_workqueue, &vsock->loopback_work);
113
114 return len;
115}
116
77static void 117static void
78virtio_transport_send_pkt_work(struct work_struct *work) 118virtio_transport_send_pkt_work(struct work_struct *work)
79{ 119{
@@ -159,6 +199,9 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
159 return -ENODEV; 199 return -ENODEV;
160 } 200 }
161 201
202 if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
203 return virtio_transport_send_pkt_loopback(vsock, pkt);
204
162 if (pkt->reply) 205 if (pkt->reply)
163 atomic_inc(&vsock->queued_replies); 206 atomic_inc(&vsock->queued_replies);
164 207
@@ -336,7 +379,7 @@ static void virtio_vsock_reset_sock(struct sock *sk)
336static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) 379static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
337{ 380{
338 struct virtio_device *vdev = vsock->vdev; 381 struct virtio_device *vdev = vsock->vdev;
339 u64 guest_cid; 382 __le64 guest_cid;
340 383
341 vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), 384 vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid),
342 &guest_cid, sizeof(guest_cid)); 385 &guest_cid, sizeof(guest_cid));
@@ -510,10 +553,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
510 mutex_init(&vsock->event_lock); 553 mutex_init(&vsock->event_lock);
511 spin_lock_init(&vsock->send_pkt_list_lock); 554 spin_lock_init(&vsock->send_pkt_list_lock);
512 INIT_LIST_HEAD(&vsock->send_pkt_list); 555 INIT_LIST_HEAD(&vsock->send_pkt_list);
556 spin_lock_init(&vsock->loopback_list_lock);
557 INIT_LIST_HEAD(&vsock->loopback_list);
513 INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); 558 INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
514 INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); 559 INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
515 INIT_WORK(&vsock->event_work, virtio_transport_event_work); 560 INIT_WORK(&vsock->event_work, virtio_transport_event_work);
516 INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); 561 INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
562 INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work);
517 563
518 mutex_lock(&vsock->rx_lock); 564 mutex_lock(&vsock->rx_lock);
519 virtio_vsock_rx_fill(vsock); 565 virtio_vsock_rx_fill(vsock);
@@ -539,6 +585,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
539 struct virtio_vsock *vsock = vdev->priv; 585 struct virtio_vsock *vsock = vdev->priv;
540 struct virtio_vsock_pkt *pkt; 586 struct virtio_vsock_pkt *pkt;
541 587
588 flush_work(&vsock->loopback_work);
542 flush_work(&vsock->rx_work); 589 flush_work(&vsock->rx_work);
543 flush_work(&vsock->tx_work); 590 flush_work(&vsock->tx_work);
544 flush_work(&vsock->event_work); 591 flush_work(&vsock->event_work);
@@ -565,6 +612,15 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
565 } 612 }
566 spin_unlock_bh(&vsock->send_pkt_list_lock); 613 spin_unlock_bh(&vsock->send_pkt_list_lock);
567 614
615 spin_lock_bh(&vsock->loopback_list_lock);
616 while (!list_empty(&vsock->loopback_list)) {
617 pkt = list_first_entry(&vsock->loopback_list,
618 struct virtio_vsock_pkt, list);
619 list_del(&pkt->list);
620 virtio_transport_free_pkt(pkt);
621 }
622 spin_unlock_bh(&vsock->loopback_list_lock);
623
568 mutex_lock(&the_virtio_vsock_mutex); 624 mutex_lock(&the_virtio_vsock_mutex);
569 the_virtio_vsock = NULL; 625 the_virtio_vsock = NULL;
570 vsock_core_exit(); 626 vsock_core_exit();
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index a53b3a16b4f1..849c4ad0411e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -32,7 +32,7 @@ static const struct virtio_transport *virtio_transport_get_ops(void)
32 return container_of(t, struct virtio_transport, transport); 32 return container_of(t, struct virtio_transport, transport);
33} 33}
34 34
35struct virtio_vsock_pkt * 35static struct virtio_vsock_pkt *
36virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, 36virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
37 size_t len, 37 size_t len,
38 u32 src_cid, 38 u32 src_cid,
@@ -82,7 +82,6 @@ out_pkt:
82 kfree(pkt); 82 kfree(pkt);
83 return NULL; 83 return NULL;
84} 84}
85EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt);
86 85
87static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 86static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
88 struct virtio_vsock_pkt_info *info) 87 struct virtio_vsock_pkt_info *info)
@@ -606,9 +605,9 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
606 return 0; 605 return 0;
607 606
608 pkt = virtio_transport_alloc_pkt(&info, 0, 607 pkt = virtio_transport_alloc_pkt(&info, 0,
609 le32_to_cpu(pkt->hdr.dst_cid), 608 le64_to_cpu(pkt->hdr.dst_cid),
610 le32_to_cpu(pkt->hdr.dst_port), 609 le32_to_cpu(pkt->hdr.dst_port),
611 le32_to_cpu(pkt->hdr.src_cid), 610 le64_to_cpu(pkt->hdr.src_cid),
612 le32_to_cpu(pkt->hdr.src_port)); 611 le32_to_cpu(pkt->hdr.src_port));
613 if (!pkt) 612 if (!pkt)
614 return -ENOMEM; 613 return -ENOMEM;
@@ -619,17 +618,17 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
619static void virtio_transport_wait_close(struct sock *sk, long timeout) 618static void virtio_transport_wait_close(struct sock *sk, long timeout)
620{ 619{
621 if (timeout) { 620 if (timeout) {
622 DEFINE_WAIT(wait); 621 DEFINE_WAIT_FUNC(wait, woken_wake_function);
622
623 add_wait_queue(sk_sleep(sk), &wait);
623 624
624 do { 625 do {
625 prepare_to_wait(sk_sleep(sk), &wait,
626 TASK_INTERRUPTIBLE);
627 if (sk_wait_event(sk, &timeout, 626 if (sk_wait_event(sk, &timeout,
628 sock_flag(sk, SOCK_DONE))) 627 sock_flag(sk, SOCK_DONE), &wait))
629 break; 628 break;
630 } while (!signal_pending(current) && timeout); 629 } while (!signal_pending(current) && timeout);
631 630
632 finish_wait(sk_sleep(sk), &wait); 631 remove_wait_queue(sk_sleep(sk), &wait);
633 } 632 }
634} 633}
635 634
@@ -823,7 +822,7 @@ virtio_transport_send_response(struct vsock_sock *vsk,
823 struct virtio_vsock_pkt_info info = { 822 struct virtio_vsock_pkt_info info = {
824 .op = VIRTIO_VSOCK_OP_RESPONSE, 823 .op = VIRTIO_VSOCK_OP_RESPONSE,
825 .type = VIRTIO_VSOCK_TYPE_STREAM, 824 .type = VIRTIO_VSOCK_TYPE_STREAM,
826 .remote_cid = le32_to_cpu(pkt->hdr.src_cid), 825 .remote_cid = le64_to_cpu(pkt->hdr.src_cid),
827 .remote_port = le32_to_cpu(pkt->hdr.src_port), 826 .remote_port = le32_to_cpu(pkt->hdr.src_port),
828 .reply = true, 827 .reply = true,
829 }; 828 };
@@ -863,9 +862,9 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
863 child->sk_state = SS_CONNECTED; 862 child->sk_state = SS_CONNECTED;
864 863
865 vchild = vsock_sk(child); 864 vchild = vsock_sk(child);
866 vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid), 865 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
867 le32_to_cpu(pkt->hdr.dst_port)); 866 le32_to_cpu(pkt->hdr.dst_port));
868 vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid), 867 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
869 le32_to_cpu(pkt->hdr.src_port)); 868 le32_to_cpu(pkt->hdr.src_port));
870 869
871 vsock_insert_connected(vchild); 870 vsock_insert_connected(vchild);
@@ -904,9 +903,9 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
904 struct sock *sk; 903 struct sock *sk;
905 bool space_available; 904 bool space_available;
906 905
907 vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), 906 vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid),
908 le32_to_cpu(pkt->hdr.src_port)); 907 le32_to_cpu(pkt->hdr.src_port));
909 vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), 908 vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid),
910 le32_to_cpu(pkt->hdr.dst_port)); 909 le32_to_cpu(pkt->hdr.dst_port));
911 910
912 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 911 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index fd8cf0214d51..1406db4d97d1 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -662,19 +662,19 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
662 662
663/* Socket control packet based operations. */ 663/* Socket control packet based operations. */
664const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { 664const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
665 vmci_transport_notify_pkt_socket_init, 665 .socket_init = vmci_transport_notify_pkt_socket_init,
666 vmci_transport_notify_pkt_socket_destruct, 666 .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
667 vmci_transport_notify_pkt_poll_in, 667 .poll_in = vmci_transport_notify_pkt_poll_in,
668 vmci_transport_notify_pkt_poll_out, 668 .poll_out = vmci_transport_notify_pkt_poll_out,
669 vmci_transport_notify_pkt_handle_pkt, 669 .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
670 vmci_transport_notify_pkt_recv_init, 670 .recv_init = vmci_transport_notify_pkt_recv_init,
671 vmci_transport_notify_pkt_recv_pre_block, 671 .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
672 vmci_transport_notify_pkt_recv_pre_dequeue, 672 .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
673 vmci_transport_notify_pkt_recv_post_dequeue, 673 .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
674 vmci_transport_notify_pkt_send_init, 674 .send_init = vmci_transport_notify_pkt_send_init,
675 vmci_transport_notify_pkt_send_pre_block, 675 .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
676 vmci_transport_notify_pkt_send_pre_enqueue, 676 .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
677 vmci_transport_notify_pkt_send_post_enqueue, 677 .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
678 vmci_transport_notify_pkt_process_request, 678 .process_request = vmci_transport_notify_pkt_process_request,
679 vmci_transport_notify_pkt_process_negotiate, 679 .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
680}; 680};
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index 21e591dafb03..f3a0afc46208 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -420,19 +420,19 @@ vmci_transport_notify_pkt_send_pre_enqueue(
420 420
421/* Socket always on control packet based operations. */ 421/* Socket always on control packet based operations. */
422const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { 422const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
423 vmci_transport_notify_pkt_socket_init, 423 .socket_init = vmci_transport_notify_pkt_socket_init,
424 vmci_transport_notify_pkt_socket_destruct, 424 .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
425 vmci_transport_notify_pkt_poll_in, 425 .poll_in = vmci_transport_notify_pkt_poll_in,
426 vmci_transport_notify_pkt_poll_out, 426 .poll_out = vmci_transport_notify_pkt_poll_out,
427 vmci_transport_notify_pkt_handle_pkt, 427 .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
428 vmci_transport_notify_pkt_recv_init, 428 .recv_init = vmci_transport_notify_pkt_recv_init,
429 vmci_transport_notify_pkt_recv_pre_block, 429 .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
430 vmci_transport_notify_pkt_recv_pre_dequeue, 430 .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
431 vmci_transport_notify_pkt_recv_post_dequeue, 431 .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
432 vmci_transport_notify_pkt_send_init, 432 .send_init = vmci_transport_notify_pkt_send_init,
433 vmci_transport_notify_pkt_send_pre_block, 433 .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
434 vmci_transport_notify_pkt_send_pre_enqueue, 434 .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
435 vmci_transport_notify_pkt_send_post_enqueue, 435 .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
436 vmci_transport_notify_pkt_process_request, 436 .process_request = vmci_transport_notify_pkt_process_request,
437 vmci_transport_notify_pkt_process_negotiate, 437 .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
438}; 438};
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 3f816e2971ee..5db731512014 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -572,16 +572,20 @@ struct d_level D_LEVEL[] = {
572size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); 572size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
573 573
574 574
575struct genl_family wimax_gnl_family = { 575static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
576 .id = GENL_ID_GENERATE, 576 { .name = "msg", },
577};
578
579struct genl_family wimax_gnl_family __ro_after_init = {
577 .name = "WiMAX", 580 .name = "WiMAX",
578 .version = WIMAX_GNL_VERSION, 581 .version = WIMAX_GNL_VERSION,
579 .hdrsize = 0, 582 .hdrsize = 0,
580 .maxattr = WIMAX_GNL_ATTR_MAX, 583 .maxattr = WIMAX_GNL_ATTR_MAX,
581}; 584 .module = THIS_MODULE,
582 585 .ops = wimax_gnl_ops,
583static const struct genl_multicast_group wimax_gnl_mcgrps[] = { 586 .n_ops = ARRAY_SIZE(wimax_gnl_ops),
584 { .name = "msg", }, 587 .mcgrps = wimax_gnl_mcgrps,
588 .n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps),
585}; 589};
586 590
587 591
@@ -596,11 +600,7 @@ int __init wimax_subsys_init(void)
596 d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params, 600 d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params,
597 "wimax.debug"); 601 "wimax.debug");
598 602
599 snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name), 603 result = genl_register_family(&wimax_gnl_family);
600 "WiMAX");
601 result = genl_register_family_with_ops_groups(&wimax_gnl_family,
602 wimax_gnl_ops,
603 wimax_gnl_mcgrps);
604 if (unlikely(result < 0)) { 604 if (unlikely(result < 0)) {
605 pr_err("cannot register generic netlink family: %d\n", result); 605 pr_err("cannot register generic netlink family: %d\n", result);
606 goto error_register_family; 606 goto error_register_family;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 4c9e39f04ef8..816c9331c8d2 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -17,8 +17,6 @@ cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
17 17
18CFLAGS_trace.o := -I$(src) 18CFLAGS_trace.o := -I$(src)
19 19
20ccflags-y += -D__CHECK_ENDIAN__
21
22$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk 20$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk
23 @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@ 21 @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@
24 22
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 8201e6d7449e..158c59ecf90a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -210,11 +210,11 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
210 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) 210 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE))
211 return; 211 return;
212 212
213 if (!wdev->p2p_started) 213 if (!wdev_running(wdev))
214 return; 214 return;
215 215
216 rdev_stop_p2p_device(rdev, wdev); 216 rdev_stop_p2p_device(rdev, wdev);
217 wdev->p2p_started = false; 217 wdev->is_running = false;
218 218
219 rdev->opencount--; 219 rdev->opencount--;
220 220
@@ -233,11 +233,11 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
233 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) 233 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN))
234 return; 234 return;
235 235
236 if (!wdev->nan_started) 236 if (!wdev_running(wdev))
237 return; 237 return;
238 238
239 rdev_stop_nan(rdev, wdev); 239 rdev_stop_nan(rdev, wdev);
240 wdev->nan_started = false; 240 wdev->is_running = false;
241 241
242 rdev->opencount--; 242 rdev->opencount--;
243} 243}
@@ -562,6 +562,21 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
562 c->limits[j].max > 1)) 562 c->limits[j].max > 1))
563 return -EINVAL; 563 return -EINVAL;
564 564
565 /*
566 * This isn't well-defined right now. If you have an
567 * IBSS interface, then its beacon interval may change
568 * by joining other networks, and nothing prevents it
569 * from doing that.
570 * So technically we probably shouldn't even allow AP
571 * and IBSS in the same interface, but it seems that
572 * some drivers support that, possibly only with fixed
573 * beacon intervals for IBSS.
574 */
575 if (WARN_ON(types & BIT(NL80211_IFTYPE_ADHOC) &&
576 c->beacon_int_min_gcd)) {
577 return -EINVAL;
578 }
579
565 cnt += c->limits[j].max; 580 cnt += c->limits[j].max;
566 /* 581 /*
567 * Don't advertise an unsupported type 582 * Don't advertise an unsupported type
@@ -571,6 +586,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
571 return -EINVAL; 586 return -EINVAL;
572 } 587 }
573 588
589#ifndef CONFIG_WIRELESS_WDS
590 if (WARN_ON(all_iftypes & BIT(NL80211_IFTYPE_WDS)))
591 return -EINVAL;
592#endif
593
574 /* You can't even choose that many! */ 594 /* You can't even choose that many! */
575 if (WARN_ON(cnt < c->max_interfaces)) 595 if (WARN_ON(cnt < c->max_interfaces))
576 return -EINVAL; 596 return -EINVAL;
@@ -609,6 +629,11 @@ int wiphy_register(struct wiphy *wiphy)
609 !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) 629 !rdev->ops->add_nan_func || !rdev->ops->del_nan_func)))
610 return -EINVAL; 630 return -EINVAL;
611 631
632#ifndef CONFIG_WIRELESS_WDS
633 if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS)))
634 return -EINVAL;
635#endif
636
612 /* 637 /*
613 * if a wiphy has unsupported modes for regulatory channel enforcement, 638 * if a wiphy has unsupported modes for regulatory channel enforcement,
614 * opt-out of enforcement checking 639 * opt-out of enforcement checking
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f0c0c8a48c92..af6e023020b1 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -346,7 +346,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
346 const u8 *ssid, int ssid_len, 346 const u8 *ssid, int ssid_len,
347 const u8 *ie, int ie_len, 347 const u8 *ie, int ie_len,
348 const u8 *key, int key_len, int key_idx, 348 const u8 *key, int key_len, int key_idx,
349 const u8 *sae_data, int sae_data_len); 349 const u8 *auth_data, int auth_data_len);
350int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 350int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
351 struct net_device *dev, 351 struct net_device *dev,
352 struct ieee80211_channel *chan, 352 struct ieee80211_channel *chan,
@@ -410,6 +410,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev);
410void cfg80211_sme_deauth(struct wireless_dev *wdev); 410void cfg80211_sme_deauth(struct wireless_dev *wdev);
411void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); 411void cfg80211_sme_auth_timeout(struct wireless_dev *wdev);
412void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); 412void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev);
413void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev);
413 414
414/* internal helpers */ 415/* internal helpers */
415bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); 416bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher);
@@ -476,7 +477,7 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
476 u32 *mask); 477 u32 *mask);
477 478
478int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, 479int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
479 u32 beacon_int); 480 enum nl80211_iftype iftype, u32 beacon_int);
480 481
481void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, 482void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
482 enum nl80211_iftype iftype, int num); 483 enum nl80211_iftype iftype, int num);
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 71447cf86306..ba0a1f398ce5 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -556,7 +556,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
556 memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ 556 memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */
557 memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ 557 memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */
558 break; 558 break;
559 case 0: 559 default:
560 memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ 560 memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */
561 memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ 561 memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */
562 break; 562 break;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index fa2066b56f36..2d8518a37eab 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -183,6 +183,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
183 memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); 183 memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
184 wdev->mesh_id_len = setup->mesh_id_len; 184 wdev->mesh_id_len = setup->mesh_id_len;
185 wdev->chandef = setup->chandef; 185 wdev->chandef = setup->chandef;
186 wdev->beacon_interval = setup->beacon_interval;
186 } 187 }
187 188
188 return err; 189 return err;
@@ -258,6 +259,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
258 err = rdev_leave_mesh(rdev, dev); 259 err = rdev_leave_mesh(rdev, dev);
259 if (!err) { 260 if (!err) {
260 wdev->mesh_id_len = 0; 261 wdev->mesh_id_len = 0;
262 wdev->beacon_interval = 0;
261 memset(&wdev->chandef, 0, sizeof(wdev->chandef)); 263 memset(&wdev->chandef, 0, sizeof(wdev->chandef));
262 rdev_set_qos_map(rdev, dev, NULL); 264 rdev_set_qos_map(rdev, dev, NULL);
263 } 265 }
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index cbb48e26a871..4646cf5695b9 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -149,6 +149,18 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss)
149} 149}
150EXPORT_SYMBOL(cfg80211_assoc_timeout); 150EXPORT_SYMBOL(cfg80211_assoc_timeout);
151 151
152void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss)
153{
154 struct wireless_dev *wdev = dev->ieee80211_ptr;
155 struct wiphy *wiphy = wdev->wiphy;
156
157 cfg80211_sme_abandon_assoc(wdev);
158
159 cfg80211_unhold_bss(bss_from_pub(bss));
160 cfg80211_put_bss(wiphy, bss);
161}
162EXPORT_SYMBOL(cfg80211_abandon_assoc);
163
152void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) 164void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len)
153{ 165{
154 struct wireless_dev *wdev = dev->ieee80211_ptr; 166 struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -204,14 +216,14 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
204 const u8 *ssid, int ssid_len, 216 const u8 *ssid, int ssid_len,
205 const u8 *ie, int ie_len, 217 const u8 *ie, int ie_len,
206 const u8 *key, int key_len, int key_idx, 218 const u8 *key, int key_len, int key_idx,
207 const u8 *sae_data, int sae_data_len) 219 const u8 *auth_data, int auth_data_len)
208{ 220{
209 struct wireless_dev *wdev = dev->ieee80211_ptr; 221 struct wireless_dev *wdev = dev->ieee80211_ptr;
210 struct cfg80211_auth_request req = { 222 struct cfg80211_auth_request req = {
211 .ie = ie, 223 .ie = ie,
212 .ie_len = ie_len, 224 .ie_len = ie_len,
213 .sae_data = sae_data, 225 .auth_data = auth_data,
214 .sae_data_len = sae_data_len, 226 .auth_data_len = auth_data_len,
215 .auth_type = auth_type, 227 .auth_type = auth_type,
216 .key = key, 228 .key = key,
217 .key_len = key_len, 229 .key_len = key_len,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c510810f0b7c..3df85a751a85 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -32,22 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
32 struct cfg80211_crypto_settings *settings, 32 struct cfg80211_crypto_settings *settings,
33 int cipher_limit); 33 int cipher_limit);
34 34
35static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
36 struct genl_info *info);
37static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
38 struct genl_info *info);
39
40/* the netlink family */ 35/* the netlink family */
41static struct genl_family nl80211_fam = { 36static struct genl_family nl80211_fam;
42 .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
43 .name = NL80211_GENL_NAME, /* have users key off the name instead */
44 .hdrsize = 0, /* no private header */
45 .version = 1, /* no particular meaning now */
46 .maxattr = NL80211_ATTR_MAX,
47 .netnsok = true,
48 .pre_doit = nl80211_pre_doit,
49 .post_doit = nl80211_post_doit,
50};
51 37
52/* multicast groups */ 38/* multicast groups */
53enum nl80211_multicast_groups { 39enum nl80211_multicast_groups {
@@ -357,7 +343,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
357 [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, 343 [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 },
358 [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, 344 [NL80211_ATTR_WDEV] = { .type = NLA_U64 },
359 [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, 345 [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 },
360 [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, 346 [NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, },
361 [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, 347 [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN },
362 [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, 348 [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
363 [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, 349 [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 },
@@ -414,6 +400,11 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
414 [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, 400 [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
415 [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, 401 [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 },
416 [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, 402 [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
403 [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY,
404 .len = FILS_MAX_KEK_LEN },
405 [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN },
406 [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, },
407 [NL80211_ATTR_BSSID] = { .len = ETH_ALEN },
417}; 408};
418 409
419/* policy for the key attributes */ 410/* policy for the key attributes */
@@ -435,6 +426,7 @@ nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = {
435 [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG }, 426 [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG },
436}; 427};
437 428
429#ifdef CONFIG_PM
438/* policy for WoWLAN attributes */ 430/* policy for WoWLAN attributes */
439static const struct nla_policy 431static const struct nla_policy
440nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { 432nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = {
@@ -468,6 +460,7 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
468 [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 }, 460 [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 },
469 [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, 461 [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 },
470}; 462};
463#endif /* CONFIG_PM */
471 464
472/* policy for coalesce rule attributes */ 465/* policy for coalesce rule attributes */
473static const struct nla_policy 466static const struct nla_policy
@@ -551,13 +544,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
551 544
552 if (!cb->args[0]) { 545 if (!cb->args[0]) {
553 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, 546 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
554 nl80211_fam.attrbuf, nl80211_fam.maxattr, 547 genl_family_attrbuf(&nl80211_fam),
555 nl80211_policy); 548 nl80211_fam.maxattr, nl80211_policy);
556 if (err) 549 if (err)
557 goto out_unlock; 550 goto out_unlock;
558 551
559 *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), 552 *wdev = __cfg80211_wdev_from_attrs(
560 nl80211_fam.attrbuf); 553 sock_net(skb->sk),
554 genl_family_attrbuf(&nl80211_fam));
561 if (IS_ERR(*wdev)) { 555 if (IS_ERR(*wdev)) {
562 err = PTR_ERR(*wdev); 556 err = PTR_ERR(*wdev);
563 goto out_unlock; 557 goto out_unlock;
@@ -1075,6 +1069,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
1075 nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, 1069 nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
1076 c->radar_detect_regions))) 1070 c->radar_detect_regions)))
1077 goto nla_put_failure; 1071 goto nla_put_failure;
1072 if (c->beacon_int_min_gcd &&
1073 nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD,
1074 c->beacon_int_min_gcd))
1075 goto nla_put_failure;
1078 1076
1079 nla_nest_end(msg, nl_combi); 1077 nla_nest_end(msg, nl_combi);
1080 } 1078 }
@@ -1322,6 +1320,95 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg,
1322 return 0; 1320 return 0;
1323} 1321}
1324 1322
1323#define CMD(op, n) \
1324 do { \
1325 if (rdev->ops->op) { \
1326 i++; \
1327 if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
1328 goto nla_put_failure; \
1329 } \
1330 } while (0)
1331
1332static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev,
1333 struct sk_buff *msg)
1334{
1335 int i = 0;
1336
1337 /*
1338 * do *NOT* add anything into this function, new things need to be
1339 * advertised only to new versions of userspace that can deal with
1340 * the split (and they can't possibly care about new features...
1341 */
1342 CMD(add_virtual_intf, NEW_INTERFACE);
1343 CMD(change_virtual_intf, SET_INTERFACE);
1344 CMD(add_key, NEW_KEY);
1345 CMD(start_ap, START_AP);
1346 CMD(add_station, NEW_STATION);
1347 CMD(add_mpath, NEW_MPATH);
1348 CMD(update_mesh_config, SET_MESH_CONFIG);
1349 CMD(change_bss, SET_BSS);
1350 CMD(auth, AUTHENTICATE);
1351 CMD(assoc, ASSOCIATE);
1352 CMD(deauth, DEAUTHENTICATE);
1353 CMD(disassoc, DISASSOCIATE);
1354 CMD(join_ibss, JOIN_IBSS);
1355 CMD(join_mesh, JOIN_MESH);
1356 CMD(set_pmksa, SET_PMKSA);
1357 CMD(del_pmksa, DEL_PMKSA);
1358 CMD(flush_pmksa, FLUSH_PMKSA);
1359 if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
1360 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
1361 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
1362 CMD(mgmt_tx, FRAME);
1363 CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
1364 if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
1365 i++;
1366 if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
1367 goto nla_put_failure;
1368 }
1369 if (rdev->ops->set_monitor_channel || rdev->ops->start_ap ||
1370 rdev->ops->join_mesh) {
1371 i++;
1372 if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
1373 goto nla_put_failure;
1374 }
1375 CMD(set_wds_peer, SET_WDS_PEER);
1376 if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
1377 CMD(tdls_mgmt, TDLS_MGMT);
1378 CMD(tdls_oper, TDLS_OPER);
1379 }
1380 if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
1381 CMD(sched_scan_start, START_SCHED_SCAN);
1382 CMD(probe_client, PROBE_CLIENT);
1383 CMD(set_noack_map, SET_NOACK_MAP);
1384 if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
1385 i++;
1386 if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
1387 goto nla_put_failure;
1388 }
1389 CMD(start_p2p_device, START_P2P_DEVICE);
1390 CMD(set_mcast_rate, SET_MCAST_RATE);
1391#ifdef CONFIG_NL80211_TESTMODE
1392 CMD(testmode_cmd, TESTMODE);
1393#endif
1394
1395 if (rdev->ops->connect || rdev->ops->auth) {
1396 i++;
1397 if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
1398 goto nla_put_failure;
1399 }
1400
1401 if (rdev->ops->disconnect || rdev->ops->deauth) {
1402 i++;
1403 if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
1404 goto nla_put_failure;
1405 }
1406
1407 return i;
1408 nla_put_failure:
1409 return -ENOBUFS;
1410}
1411
1325struct nl80211_dump_wiphy_state { 1412struct nl80211_dump_wiphy_state {
1326 s64 filter_wiphy; 1413 s64 filter_wiphy;
1327 long start; 1414 long start;
@@ -1549,68 +1636,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
1549 if (!nl_cmds) 1636 if (!nl_cmds)
1550 goto nla_put_failure; 1637 goto nla_put_failure;
1551 1638
1552 i = 0; 1639 i = nl80211_add_commands_unsplit(rdev, msg);
1553#define CMD(op, n) \ 1640 if (i < 0)
1554 do { \ 1641 goto nla_put_failure;
1555 if (rdev->ops->op) { \
1556 i++; \
1557 if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
1558 goto nla_put_failure; \
1559 } \
1560 } while (0)
1561
1562 CMD(add_virtual_intf, NEW_INTERFACE);
1563 CMD(change_virtual_intf, SET_INTERFACE);
1564 CMD(add_key, NEW_KEY);
1565 CMD(start_ap, START_AP);
1566 CMD(add_station, NEW_STATION);
1567 CMD(add_mpath, NEW_MPATH);
1568 CMD(update_mesh_config, SET_MESH_CONFIG);
1569 CMD(change_bss, SET_BSS);
1570 CMD(auth, AUTHENTICATE);
1571 CMD(assoc, ASSOCIATE);
1572 CMD(deauth, DEAUTHENTICATE);
1573 CMD(disassoc, DISASSOCIATE);
1574 CMD(join_ibss, JOIN_IBSS);
1575 CMD(join_mesh, JOIN_MESH);
1576 CMD(set_pmksa, SET_PMKSA);
1577 CMD(del_pmksa, DEL_PMKSA);
1578 CMD(flush_pmksa, FLUSH_PMKSA);
1579 if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
1580 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
1581 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
1582 CMD(mgmt_tx, FRAME);
1583 CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
1584 if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
1585 i++;
1586 if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
1587 goto nla_put_failure;
1588 }
1589 if (rdev->ops->set_monitor_channel || rdev->ops->start_ap ||
1590 rdev->ops->join_mesh) {
1591 i++;
1592 if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
1593 goto nla_put_failure;
1594 }
1595 CMD(set_wds_peer, SET_WDS_PEER);
1596 if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
1597 CMD(tdls_mgmt, TDLS_MGMT);
1598 CMD(tdls_oper, TDLS_OPER);
1599 }
1600 if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
1601 CMD(sched_scan_start, START_SCHED_SCAN);
1602 CMD(probe_client, PROBE_CLIENT);
1603 CMD(set_noack_map, SET_NOACK_MAP);
1604 if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
1605 i++;
1606 if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
1607 goto nla_put_failure;
1608 }
1609 CMD(start_p2p_device, START_P2P_DEVICE);
1610 CMD(set_mcast_rate, SET_MCAST_RATE);
1611#ifdef CONFIG_NL80211_TESTMODE
1612 CMD(testmode_cmd, TESTMODE);
1613#endif
1614 if (state->split) { 1642 if (state->split) {
1615 CMD(crit_proto_start, CRIT_PROTOCOL_START); 1643 CMD(crit_proto_start, CRIT_PROTOCOL_START);
1616 CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); 1644 CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
@@ -1620,22 +1648,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
1620 if (rdev->wiphy.features & 1648 if (rdev->wiphy.features &
1621 NL80211_FEATURE_SUPPORTS_WMM_ADMISSION) 1649 NL80211_FEATURE_SUPPORTS_WMM_ADMISSION)
1622 CMD(add_tx_ts, ADD_TX_TS); 1650 CMD(add_tx_ts, ADD_TX_TS);
1651 CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST);
1652 CMD(update_connect_params, UPDATE_CONNECT_PARAMS);
1623 } 1653 }
1624 /* add into the if now */
1625#undef CMD 1654#undef CMD
1626 1655
1627 if (rdev->ops->connect || rdev->ops->auth) {
1628 i++;
1629 if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
1630 goto nla_put_failure;
1631 }
1632
1633 if (rdev->ops->disconnect || rdev->ops->deauth) {
1634 i++;
1635 if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
1636 goto nla_put_failure;
1637 }
1638
1639 nla_nest_end(msg, nl_cmds); 1656 nla_nest_end(msg, nl_cmds);
1640 state->split_start++; 1657 state->split_start++;
1641 if (state->split) 1658 if (state->split)
@@ -1881,7 +1898,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
1881 struct netlink_callback *cb, 1898 struct netlink_callback *cb,
1882 struct nl80211_dump_wiphy_state *state) 1899 struct nl80211_dump_wiphy_state *state)
1883{ 1900{
1884 struct nlattr **tb = nl80211_fam.attrbuf; 1901 struct nlattr **tb = genl_family_attrbuf(&nl80211_fam);
1885 int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, 1902 int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
1886 tb, nl80211_fam.maxattr, nl80211_policy); 1903 tb, nl80211_fam.maxattr, nl80211_policy);
1887 /* ignore parse errors for backward compatibility */ 1904 /* ignore parse errors for backward compatibility */
@@ -2296,10 +2313,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
2296 nla_for_each_nested(nl_txq_params, 2313 nla_for_each_nested(nl_txq_params,
2297 info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], 2314 info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS],
2298 rem_txq_params) { 2315 rem_txq_params) {
2299 result = nla_parse(tb, NL80211_TXQ_ATTR_MAX, 2316 result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX,
2300 nla_data(nl_txq_params), 2317 nl_txq_params,
2301 nla_len(nl_txq_params), 2318 txq_params_policy);
2302 txq_params_policy);
2303 if (result) 2319 if (result)
2304 return result; 2320 return result;
2305 result = parse_txq_params(tb, &txq_params); 2321 result = parse_txq_params(tb, &txq_params);
@@ -3549,8 +3565,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
3549 sband = rdev->wiphy.bands[band]; 3565 sband = rdev->wiphy.bands[band];
3550 if (sband == NULL) 3566 if (sband == NULL)
3551 return -EINVAL; 3567 return -EINVAL;
3552 err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), 3568 err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates,
3553 nla_len(tx_rates), nl80211_txattr_policy); 3569 nl80211_txattr_policy);
3554 if (err) 3570 if (err)
3555 return err; 3571 return err;
3556 if (tb[NL80211_TXRATE_LEGACY]) { 3572 if (tb[NL80211_TXRATE_LEGACY]) {
@@ -3756,12 +3772,23 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
3756 if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && 3772 if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
3757 auth_type == NL80211_AUTHTYPE_SAE) 3773 auth_type == NL80211_AUTHTYPE_SAE)
3758 return false; 3774 return false;
3775 if (!wiphy_ext_feature_isset(&rdev->wiphy,
3776 NL80211_EXT_FEATURE_FILS_STA) &&
3777 (auth_type == NL80211_AUTHTYPE_FILS_SK ||
3778 auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
3779 auth_type == NL80211_AUTHTYPE_FILS_PK))
3780 return false;
3759 return true; 3781 return true;
3760 case NL80211_CMD_CONNECT: 3782 case NL80211_CMD_CONNECT:
3761 case NL80211_CMD_START_AP: 3783 case NL80211_CMD_START_AP:
3762 /* SAE not supported yet */ 3784 /* SAE not supported yet */
3763 if (auth_type == NL80211_AUTHTYPE_SAE) 3785 if (auth_type == NL80211_AUTHTYPE_SAE)
3764 return false; 3786 return false;
3787 /* FILS not supported yet */
3788 if (auth_type == NL80211_AUTHTYPE_FILS_SK ||
3789 auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
3790 auth_type == NL80211_AUTHTYPE_FILS_PK)
3791 return false;
3765 return true; 3792 return true;
3766 default: 3793 default:
3767 return false; 3794 return false;
@@ -3803,7 +3830,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
3803 params.dtim_period = 3830 params.dtim_period =
3804 nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); 3831 nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
3805 3832
3806 err = cfg80211_validate_beacon_int(rdev, params.beacon_interval); 3833 err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype,
3834 params.beacon_interval);
3807 if (err) 3835 if (err)
3808 return err; 3836 return err;
3809 3837
@@ -6305,9 +6333,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
6305 6333
6306 nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], 6334 nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
6307 rem_reg_rules) { 6335 rem_reg_rules) {
6308 r = nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, 6336 r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX,
6309 nla_data(nl_reg_rule), nla_len(nl_reg_rule), 6337 nl_reg_rule, reg_rule_policy);
6310 reg_rule_policy);
6311 if (r) 6338 if (r)
6312 goto bad_reg; 6339 goto bad_reg;
6313 r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); 6340 r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
@@ -6374,8 +6401,8 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy,
6374 if (!nla_ok(nest, nla_len(nest))) 6401 if (!nla_ok(nest, nla_len(nest)))
6375 return -EINVAL; 6402 return -EINVAL;
6376 6403
6377 err = nla_parse(attr, NL80211_BSS_SELECT_ATTR_MAX, nla_data(nest), 6404 err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest,
6378 nla_len(nest), nl80211_bss_select_policy); 6405 nl80211_bss_select_policy);
6379 if (err) 6406 if (err)
6380 return err; 6407 return err;
6381 6408
@@ -6677,7 +6704,20 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
6677 request->no_cck = 6704 request->no_cck =
6678 nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); 6705 nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
6679 6706
6680 if (info->attrs[NL80211_ATTR_MAC]) 6707 /* Initial implementation used NL80211_ATTR_MAC to set the specific
6708 * BSSID to scan for. This was problematic because that same attribute
6709 * was already used for another purpose (local random MAC address). The
6710 * NL80211_ATTR_BSSID attribute was added to fix this. For backwards
6711 * compatibility with older userspace components, also use the
6712 * NL80211_ATTR_MAC value here if it can be determined to be used for
6713 * the specific BSSID use case instead of the random MAC address
6714 * (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use).
6715 */
6716 if (info->attrs[NL80211_ATTR_BSSID])
6717 memcpy(request->bssid,
6718 nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN);
6719 else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) &&
6720 info->attrs[NL80211_ATTR_MAC])
6681 memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), 6721 memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]),
6682 ETH_ALEN); 6722 ETH_ALEN);
6683 else 6723 else
@@ -6765,9 +6805,8 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
6765 if (WARN_ON(i >= n_plans)) 6805 if (WARN_ON(i >= n_plans))
6766 return -EINVAL; 6806 return -EINVAL;
6767 6807
6768 err = nla_parse(plan, NL80211_SCHED_SCAN_PLAN_MAX, 6808 err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX,
6769 nla_data(attr), nla_len(attr), 6809 attr, nl80211_plan_policy);
6770 nl80211_plan_policy);
6771 if (err) 6810 if (err)
6772 return err; 6811 return err;
6773 6812
@@ -6856,9 +6895,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
6856 tmp) { 6895 tmp) {
6857 struct nlattr *rssi; 6896 struct nlattr *rssi;
6858 6897
6859 err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, 6898 err = nla_parse_nested(tb,
6860 nla_data(attr), nla_len(attr), 6899 NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
6861 nl80211_match_policy); 6900 attr, nl80211_match_policy);
6862 if (err) 6901 if (err)
6863 return ERR_PTR(err); 6902 return ERR_PTR(err);
6864 /* add other standalone attributes here */ 6903 /* add other standalone attributes here */
@@ -7029,9 +7068,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
7029 tmp) { 7068 tmp) {
7030 struct nlattr *ssid, *rssi; 7069 struct nlattr *ssid, *rssi;
7031 7070
7032 err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, 7071 err = nla_parse_nested(tb,
7033 nla_data(attr), nla_len(attr), 7072 NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
7034 nl80211_match_policy); 7073 attr, nl80211_match_policy);
7035 if (err) 7074 if (err)
7036 goto out_free; 7075 goto out_free;
7037 ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; 7076 ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID];
@@ -7643,6 +7682,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
7643 7682
7644static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) 7683static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
7645{ 7684{
7685 struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
7646 struct survey_info survey; 7686 struct survey_info survey;
7647 struct cfg80211_registered_device *rdev; 7687 struct cfg80211_registered_device *rdev;
7648 struct wireless_dev *wdev; 7688 struct wireless_dev *wdev;
@@ -7655,7 +7695,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
7655 return res; 7695 return res;
7656 7696
7657 /* prepare_wdev_dump parsed the attributes */ 7697 /* prepare_wdev_dump parsed the attributes */
7658 radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; 7698 radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
7659 7699
7660 if (!wdev->netdev) { 7700 if (!wdev->netdev) {
7661 res = -EINVAL; 7701 res = -EINVAL;
@@ -7708,8 +7748,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
7708 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 7748 struct cfg80211_registered_device *rdev = info->user_ptr[0];
7709 struct net_device *dev = info->user_ptr[1]; 7749 struct net_device *dev = info->user_ptr[1];
7710 struct ieee80211_channel *chan; 7750 struct ieee80211_channel *chan;
7711 const u8 *bssid, *ssid, *ie = NULL, *sae_data = NULL; 7751 const u8 *bssid, *ssid, *ie = NULL, *auth_data = NULL;
7712 int err, ssid_len, ie_len = 0, sae_data_len = 0; 7752 int err, ssid_len, ie_len = 0, auth_data_len = 0;
7713 enum nl80211_auth_type auth_type; 7753 enum nl80211_auth_type auth_type;
7714 struct key_parse key; 7754 struct key_parse key;
7715 bool local_state_change; 7755 bool local_state_change;
@@ -7789,17 +7829,23 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
7789 if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE)) 7829 if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE))
7790 return -EINVAL; 7830 return -EINVAL;
7791 7831
7792 if (auth_type == NL80211_AUTHTYPE_SAE && 7832 if ((auth_type == NL80211_AUTHTYPE_SAE ||
7793 !info->attrs[NL80211_ATTR_SAE_DATA]) 7833 auth_type == NL80211_AUTHTYPE_FILS_SK ||
7834 auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
7835 auth_type == NL80211_AUTHTYPE_FILS_PK) &&
7836 !info->attrs[NL80211_ATTR_AUTH_DATA])
7794 return -EINVAL; 7837 return -EINVAL;
7795 7838
7796 if (info->attrs[NL80211_ATTR_SAE_DATA]) { 7839 if (info->attrs[NL80211_ATTR_AUTH_DATA]) {
7797 if (auth_type != NL80211_AUTHTYPE_SAE) 7840 if (auth_type != NL80211_AUTHTYPE_SAE &&
7841 auth_type != NL80211_AUTHTYPE_FILS_SK &&
7842 auth_type != NL80211_AUTHTYPE_FILS_SK_PFS &&
7843 auth_type != NL80211_AUTHTYPE_FILS_PK)
7798 return -EINVAL; 7844 return -EINVAL;
7799 sae_data = nla_data(info->attrs[NL80211_ATTR_SAE_DATA]); 7845 auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]);
7800 sae_data_len = nla_len(info->attrs[NL80211_ATTR_SAE_DATA]); 7846 auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]);
7801 /* need to include at least Auth Transaction and Status Code */ 7847 /* need to include at least Auth Transaction and Status Code */
7802 if (sae_data_len < 4) 7848 if (auth_data_len < 4)
7803 return -EINVAL; 7849 return -EINVAL;
7804 } 7850 }
7805 7851
@@ -7816,7 +7862,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
7816 err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, 7862 err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
7817 ssid, ssid_len, ie, ie_len, 7863 ssid, ssid_len, ie, ie_len,
7818 key.p.key, key.p.key_len, key.idx, 7864 key.p.key, key.p.key_len, key.idx,
7819 sae_data, sae_data_len); 7865 auth_data, auth_data_len);
7820 wdev_unlock(dev->ieee80211_ptr); 7866 wdev_unlock(dev->ieee80211_ptr);
7821 return err; 7867 return err;
7822} 7868}
@@ -7995,6 +8041,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
7995 req.flags |= ASSOC_REQ_USE_RRM; 8041 req.flags |= ASSOC_REQ_USE_RRM;
7996 } 8042 }
7997 8043
8044 if (info->attrs[NL80211_ATTR_FILS_KEK]) {
8045 req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]);
8046 req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]);
8047 if (!info->attrs[NL80211_ATTR_FILS_NONCES])
8048 return -EINVAL;
8049 req.fils_nonces =
8050 nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]);
8051 }
8052
7998 err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); 8053 err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
7999 if (!err) { 8054 if (!err) {
8000 wdev_lock(dev->ieee80211_ptr); 8055 wdev_lock(dev->ieee80211_ptr);
@@ -8152,7 +8207,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
8152 ibss.beacon_interval = 8207 ibss.beacon_interval =
8153 nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); 8208 nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
8154 8209
8155 err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval); 8210 err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_ADHOC,
8211 ibss.beacon_interval);
8156 if (err) 8212 if (err)
8157 return err; 8213 return err;
8158 8214
@@ -8478,14 +8534,14 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
8478 */ 8534 */
8479 phy_idx = cb->args[0] - 1; 8535 phy_idx = cb->args[0] - 1;
8480 } else { 8536 } else {
8537 struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
8538
8481 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, 8539 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
8482 nl80211_fam.attrbuf, nl80211_fam.maxattr, 8540 attrbuf, nl80211_fam.maxattr, nl80211_policy);
8483 nl80211_policy);
8484 if (err) 8541 if (err)
8485 goto out_err; 8542 goto out_err;
8486 8543
8487 rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), 8544 rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
8488 nl80211_fam.attrbuf);
8489 if (IS_ERR(rdev)) { 8545 if (IS_ERR(rdev)) {
8490 err = PTR_ERR(rdev); 8546 err = PTR_ERR(rdev);
8491 goto out_err; 8547 goto out_err;
@@ -8493,9 +8549,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
8493 phy_idx = rdev->wiphy_idx; 8549 phy_idx = rdev->wiphy_idx;
8494 rdev = NULL; 8550 rdev = NULL;
8495 8551
8496 if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) 8552 if (attrbuf[NL80211_ATTR_TESTDATA])
8497 cb->args[1] = 8553 cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA];
8498 (long)nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA];
8499 } 8554 }
8500 8555
8501 if (cb->args[1]) { 8556 if (cb->args[1]) {
@@ -8726,6 +8781,37 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
8726 return err; 8781 return err;
8727} 8782}
8728 8783
8784static int nl80211_update_connect_params(struct sk_buff *skb,
8785 struct genl_info *info)
8786{
8787 struct cfg80211_connect_params connect = {};
8788 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8789 struct net_device *dev = info->user_ptr[1];
8790 struct wireless_dev *wdev = dev->ieee80211_ptr;
8791 u32 changed = 0;
8792 int ret;
8793
8794 if (!rdev->ops->update_connect_params)
8795 return -EOPNOTSUPP;
8796
8797 if (info->attrs[NL80211_ATTR_IE]) {
8798 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
8799 return -EINVAL;
8800 connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
8801 connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
8802 changed |= UPDATE_ASSOC_IES;
8803 }
8804
8805 wdev_lock(dev->ieee80211_ptr);
8806 if (!wdev->current_bss)
8807 ret = -ENOLINK;
8808 else
8809 ret = rdev_update_connect_params(rdev, dev, &connect, changed);
8810 wdev_unlock(dev->ieee80211_ptr);
8811
8812 return ret;
8813}
8814
8729static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) 8815static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
8730{ 8816{
8731 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 8817 struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -9417,7 +9503,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
9417 setup.beacon_interval = 9503 setup.beacon_interval =
9418 nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); 9504 nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
9419 9505
9420 err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval); 9506 err = cfg80211_validate_beacon_int(rdev,
9507 NL80211_IFTYPE_MESH_POINT,
9508 setup.beacon_interval);
9421 if (err) 9509 if (err)
9422 return err; 9510 return err;
9423 } 9511 }
@@ -9728,9 +9816,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
9728 if (!rdev->wiphy.wowlan->tcp) 9816 if (!rdev->wiphy.wowlan->tcp)
9729 return -EINVAL; 9817 return -EINVAL;
9730 9818
9731 err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, 9819 err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr,
9732 nla_data(attr), nla_len(attr), 9820 nl80211_wowlan_tcp_policy);
9733 nl80211_wowlan_tcp_policy);
9734 if (err) 9821 if (err)
9735 return err; 9822 return err;
9736 9823
@@ -9875,9 +9962,7 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev,
9875 goto out; 9962 goto out;
9876 } 9963 }
9877 9964
9878 err = nla_parse(tb, NL80211_ATTR_MAX, 9965 err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy);
9879 nla_data(attr), nla_len(attr),
9880 nl80211_policy);
9881 if (err) 9966 if (err)
9882 goto out; 9967 goto out;
9883 9968
@@ -9911,10 +9996,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
9911 goto set_wakeup; 9996 goto set_wakeup;
9912 } 9997 }
9913 9998
9914 err = nla_parse(tb, MAX_NL80211_WOWLAN_TRIG, 9999 err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG,
9915 nla_data(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]), 10000 info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS],
9916 nla_len(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]), 10001 nl80211_wowlan_policy);
9917 nl80211_wowlan_policy);
9918 if (err) 10002 if (err)
9919 return err; 10003 return err;
9920 10004
@@ -9996,8 +10080,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
9996 rem) { 10080 rem) {
9997 u8 *mask_pat; 10081 u8 *mask_pat;
9998 10082
9999 nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), 10083 nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
10000 nla_len(pat), NULL); 10084 NULL);
10001 err = -EINVAL; 10085 err = -EINVAL;
10002 if (!pat_tb[NL80211_PKTPAT_MASK] || 10086 if (!pat_tb[NL80211_PKTPAT_MASK] ||
10003 !pat_tb[NL80211_PKTPAT_PATTERN]) 10087 !pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10207,8 +10291,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
10207 int rem, pat_len, mask_len, pkt_offset, n_patterns = 0; 10291 int rem, pat_len, mask_len, pkt_offset, n_patterns = 0;
10208 struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; 10292 struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
10209 10293
10210 err = nla_parse(tb, NL80211_ATTR_COALESCE_RULE_MAX, nla_data(rule), 10294 err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule,
10211 nla_len(rule), nl80211_coalesce_policy); 10295 nl80211_coalesce_policy);
10212 if (err) 10296 if (err)
10213 return err; 10297 return err;
10214 10298
@@ -10246,8 +10330,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
10246 rem) { 10330 rem) {
10247 u8 *mask_pat; 10331 u8 *mask_pat;
10248 10332
10249 nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), 10333 nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL);
10250 nla_len(pat), NULL);
10251 if (!pat_tb[NL80211_PKTPAT_MASK] || 10334 if (!pat_tb[NL80211_PKTPAT_MASK] ||
10252 !pat_tb[NL80211_PKTPAT_PATTERN]) 10335 !pat_tb[NL80211_PKTPAT_PATTERN])
10253 return -EINVAL; 10336 return -EINVAL;
@@ -10366,10 +10449,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
10366 if (!info->attrs[NL80211_ATTR_REKEY_DATA]) 10449 if (!info->attrs[NL80211_ATTR_REKEY_DATA])
10367 return -EINVAL; 10450 return -EINVAL;
10368 10451
10369 err = nla_parse(tb, MAX_NL80211_REKEY_DATA, 10452 err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA,
10370 nla_data(info->attrs[NL80211_ATTR_REKEY_DATA]), 10453 info->attrs[NL80211_ATTR_REKEY_DATA],
10371 nla_len(info->attrs[NL80211_ATTR_REKEY_DATA]), 10454 nl80211_rekey_policy);
10372 nl80211_rekey_policy);
10373 if (err) 10455 if (err)
10374 return err; 10456 return err;
10375 10457
@@ -10518,7 +10600,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
10518 if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) 10600 if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)
10519 return -EOPNOTSUPP; 10601 return -EOPNOTSUPP;
10520 10602
10521 if (wdev->p2p_started) 10603 if (wdev_running(wdev))
10522 return 0; 10604 return 0;
10523 10605
10524 if (rfkill_blocked(rdev->rfkill)) 10606 if (rfkill_blocked(rdev->rfkill))
@@ -10528,7 +10610,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
10528 if (err) 10610 if (err)
10529 return err; 10611 return err;
10530 10612
10531 wdev->p2p_started = true; 10613 wdev->is_running = true;
10532 rdev->opencount++; 10614 rdev->opencount++;
10533 10615
10534 return 0; 10616 return 0;
@@ -10560,7 +10642,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
10560 if (wdev->iftype != NL80211_IFTYPE_NAN) 10642 if (wdev->iftype != NL80211_IFTYPE_NAN)
10561 return -EOPNOTSUPP; 10643 return -EOPNOTSUPP;
10562 10644
10563 if (wdev->nan_started) 10645 if (wdev_running(wdev))
10564 return -EEXIST; 10646 return -EEXIST;
10565 10647
10566 if (rfkill_blocked(rdev->rfkill)) 10648 if (rfkill_blocked(rdev->rfkill))
@@ -10583,7 +10665,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
10583 if (err) 10665 if (err)
10584 return err; 10666 return err;
10585 10667
10586 wdev->nan_started = true; 10668 wdev->is_running = true;
10587 rdev->opencount++; 10669 rdev->opencount++;
10588 10670
10589 return 0; 10671 return 0;
@@ -10638,8 +10720,7 @@ static int handle_nan_filter(struct nlattr *attr_filter,
10638 10720
10639 i = 0; 10721 i = 0;
10640 nla_for_each_nested(attr, attr_filter, rem) { 10722 nla_for_each_nested(attr, attr_filter, rem) {
10641 filter[i].filter = kmemdup(nla_data(attr), nla_len(attr), 10723 filter[i].filter = nla_memdup(attr, GFP_KERNEL);
10642 GFP_KERNEL);
10643 filter[i].len = nla_len(attr); 10724 filter[i].len = nla_len(attr);
10644 i++; 10725 i++;
10645 } 10726 }
@@ -10668,7 +10749,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
10668 if (wdev->iftype != NL80211_IFTYPE_NAN) 10749 if (wdev->iftype != NL80211_IFTYPE_NAN)
10669 return -EOPNOTSUPP; 10750 return -EOPNOTSUPP;
10670 10751
10671 if (!wdev->nan_started) 10752 if (!wdev_running(wdev))
10672 return -ENOTCONN; 10753 return -ENOTCONN;
10673 10754
10674 if (!info->attrs[NL80211_ATTR_NAN_FUNC]) 10755 if (!info->attrs[NL80211_ATTR_NAN_FUNC])
@@ -10678,10 +10759,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
10678 wdev->owner_nlportid != info->snd_portid) 10759 wdev->owner_nlportid != info->snd_portid)
10679 return -ENOTCONN; 10760 return -ENOTCONN;
10680 10761
10681 err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX, 10762 err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX,
10682 nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]), 10763 info->attrs[NL80211_ATTR_NAN_FUNC],
10683 nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]), 10764 nl80211_nan_func_policy);
10684 nl80211_nan_func_policy);
10685 if (err) 10765 if (err)
10686 return err; 10766 return err;
10687 10767
@@ -10776,9 +10856,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
10776 if (tb[NL80211_NAN_FUNC_SRF]) { 10856 if (tb[NL80211_NAN_FUNC_SRF]) {
10777 struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; 10857 struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR];
10778 10858
10779 err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX, 10859 err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
10780 nla_data(tb[NL80211_NAN_FUNC_SRF]), 10860 tb[NL80211_NAN_FUNC_SRF],
10781 nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL); 10861 nl80211_nan_srf_policy);
10782 if (err) 10862 if (err)
10783 goto out; 10863 goto out;
10784 10864
@@ -10904,7 +10984,7 @@ static int nl80211_nan_del_func(struct sk_buff *skb,
10904 if (wdev->iftype != NL80211_IFTYPE_NAN) 10984 if (wdev->iftype != NL80211_IFTYPE_NAN)
10905 return -EOPNOTSUPP; 10985 return -EOPNOTSUPP;
10906 10986
10907 if (!wdev->nan_started) 10987 if (!wdev_running(wdev))
10908 return -ENOTCONN; 10988 return -ENOTCONN;
10909 10989
10910 if (!info->attrs[NL80211_ATTR_COOKIE]) 10990 if (!info->attrs[NL80211_ATTR_COOKIE])
@@ -10932,7 +11012,7 @@ static int nl80211_nan_change_config(struct sk_buff *skb,
10932 if (wdev->iftype != NL80211_IFTYPE_NAN) 11012 if (wdev->iftype != NL80211_IFTYPE_NAN)
10933 return -EOPNOTSUPP; 11013 return -EOPNOTSUPP;
10934 11014
10935 if (!wdev->nan_started) 11015 if (!wdev_running(wdev))
10936 return -ENOTCONN; 11016 return -ENOTCONN;
10937 11017
10938 if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { 11018 if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) {
@@ -11244,10 +11324,7 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
11244 return -EINVAL; 11324 return -EINVAL;
11245 11325
11246 if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { 11326 if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
11247 if (wdev->netdev && 11327 if (!wdev_running(wdev))
11248 !netif_running(wdev->netdev))
11249 return -ENETDOWN;
11250 if (!wdev->netdev && !wdev->p2p_started)
11251 return -ENETDOWN; 11328 return -ENETDOWN;
11252 } 11329 }
11253 11330
@@ -11277,6 +11354,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
11277 struct cfg80211_registered_device **rdev, 11354 struct cfg80211_registered_device **rdev,
11278 struct wireless_dev **wdev) 11355 struct wireless_dev **wdev)
11279{ 11356{
11357 struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
11280 u32 vid, subcmd; 11358 u32 vid, subcmd;
11281 unsigned int i; 11359 unsigned int i;
11282 int vcmd_idx = -1; 11360 int vcmd_idx = -1;
@@ -11312,31 +11390,28 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
11312 } 11390 }
11313 11391
11314 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, 11392 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
11315 nl80211_fam.attrbuf, nl80211_fam.maxattr, 11393 attrbuf, nl80211_fam.maxattr, nl80211_policy);
11316 nl80211_policy);
11317 if (err) 11394 if (err)
11318 goto out_unlock; 11395 goto out_unlock;
11319 11396
11320 if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] || 11397 if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
11321 !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { 11398 !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
11322 err = -EINVAL; 11399 err = -EINVAL;
11323 goto out_unlock; 11400 goto out_unlock;
11324 } 11401 }
11325 11402
11326 *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), 11403 *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
11327 nl80211_fam.attrbuf);
11328 if (IS_ERR(*wdev)) 11404 if (IS_ERR(*wdev))
11329 *wdev = NULL; 11405 *wdev = NULL;
11330 11406
11331 *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), 11407 *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
11332 nl80211_fam.attrbuf);
11333 if (IS_ERR(*rdev)) { 11408 if (IS_ERR(*rdev)) {
11334 err = PTR_ERR(*rdev); 11409 err = PTR_ERR(*rdev);
11335 goto out_unlock; 11410 goto out_unlock;
11336 } 11411 }
11337 11412
11338 vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]); 11413 vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
11339 subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); 11414 subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
11340 11415
11341 for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) { 11416 for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) {
11342 const struct wiphy_vendor_command *vcmd; 11417 const struct wiphy_vendor_command *vcmd;
@@ -11360,9 +11435,9 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
11360 goto out_unlock; 11435 goto out_unlock;
11361 } 11436 }
11362 11437
11363 if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) { 11438 if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
11364 data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); 11439 data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
11365 data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); 11440 data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]);
11366 } 11441 }
11367 11442
11368 /* 0 is the first index - add 1 to parse only once */ 11443 /* 0 is the first index - add 1 to parse only once */
@@ -11410,10 +11485,7 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
11410 return -EINVAL; 11485 return -EINVAL;
11411 11486
11412 if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { 11487 if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
11413 if (wdev->netdev && 11488 if (!wdev_running(wdev))
11414 !netif_running(wdev->netdev))
11415 return -ENETDOWN;
11416 if (!wdev->netdev && !wdev->p2p_started)
11417 return -ENETDOWN; 11489 return -ENETDOWN;
11418 } 11490 }
11419 } 11491 }
@@ -11726,6 +11798,31 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb,
11726 return 0; 11798 return 0;
11727} 11799}
11728 11800
11801static int nl80211_set_multicast_to_unicast(struct sk_buff *skb,
11802 struct genl_info *info)
11803{
11804 struct cfg80211_registered_device *rdev = info->user_ptr[0];
11805 struct net_device *dev = info->user_ptr[1];
11806 struct wireless_dev *wdev = dev->ieee80211_ptr;
11807 const struct nlattr *nla;
11808 bool enabled;
11809
11810 if (netif_running(dev))
11811 return -EBUSY;
11812
11813 if (!rdev->ops->set_multicast_to_unicast)
11814 return -EOPNOTSUPP;
11815
11816 if (wdev->iftype != NL80211_IFTYPE_AP &&
11817 wdev->iftype != NL80211_IFTYPE_P2P_GO)
11818 return -EOPNOTSUPP;
11819
11820 nla = info->attrs[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED];
11821 enabled = nla_get_flag(nla);
11822
11823 return rdev_set_multicast_to_unicast(rdev, dev, enabled);
11824}
11825
11729#define NL80211_FLAG_NEED_WIPHY 0x01 11826#define NL80211_FLAG_NEED_WIPHY 0x01
11730#define NL80211_FLAG_NEED_NETDEV 0x02 11827#define NL80211_FLAG_NEED_NETDEV 0x02
11731#define NL80211_FLAG_NEED_RTNL 0x04 11828#define NL80211_FLAG_NEED_RTNL 0x04
@@ -11784,29 +11881,15 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
11784 info->user_ptr[1] = wdev; 11881 info->user_ptr[1] = wdev;
11785 } 11882 }
11786 11883
11787 if (dev) { 11884 if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
11788 if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && 11885 !wdev_running(wdev)) {
11789 !netif_running(dev)) { 11886 if (rtnl)
11790 if (rtnl) 11887 rtnl_unlock();
11791 rtnl_unlock(); 11888 return -ENETDOWN;
11792 return -ENETDOWN; 11889 }
11793 }
11794 11890
11891 if (dev)
11795 dev_hold(dev); 11892 dev_hold(dev);
11796 } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
11797 if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE &&
11798 !wdev->p2p_started) {
11799 if (rtnl)
11800 rtnl_unlock();
11801 return -ENETDOWN;
11802 }
11803 if (wdev->iftype == NL80211_IFTYPE_NAN &&
11804 !wdev->nan_started) {
11805 if (rtnl)
11806 rtnl_unlock();
11807 return -ENETDOWN;
11808 }
11809 }
11810 11893
11811 info->user_ptr[0] = rdev; 11894 info->user_ptr[0] = rdev;
11812 } 11895 }
@@ -12179,6 +12262,14 @@ static const struct genl_ops nl80211_ops[] = {
12179 NL80211_FLAG_NEED_RTNL, 12262 NL80211_FLAG_NEED_RTNL,
12180 }, 12263 },
12181 { 12264 {
12265 .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
12266 .doit = nl80211_update_connect_params,
12267 .policy = nl80211_policy,
12268 .flags = GENL_ADMIN_PERM,
12269 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
12270 NL80211_FLAG_NEED_RTNL,
12271 },
12272 {
12182 .cmd = NL80211_CMD_DISCONNECT, 12273 .cmd = NL80211_CMD_DISCONNECT,
12183 .doit = nl80211_disconnect, 12274 .doit = nl80211_disconnect,
12184 .policy = nl80211_policy, 12275 .policy = nl80211_policy,
@@ -12599,6 +12690,29 @@ static const struct genl_ops nl80211_ops[] = {
12599 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | 12690 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
12600 NL80211_FLAG_NEED_RTNL, 12691 NL80211_FLAG_NEED_RTNL,
12601 }, 12692 },
12693 {
12694 .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST,
12695 .doit = nl80211_set_multicast_to_unicast,
12696 .policy = nl80211_policy,
12697 .flags = GENL_UNS_ADMIN_PERM,
12698 .internal_flags = NL80211_FLAG_NEED_NETDEV |
12699 NL80211_FLAG_NEED_RTNL,
12700 },
12701};
12702
12703static struct genl_family nl80211_fam __ro_after_init = {
12704 .name = NL80211_GENL_NAME, /* have users key off the name instead */
12705 .hdrsize = 0, /* no private header */
12706 .version = 1, /* no particular meaning now */
12707 .maxattr = NL80211_ATTR_MAX,
12708 .netnsok = true,
12709 .pre_doit = nl80211_pre_doit,
12710 .post_doit = nl80211_post_doit,
12711 .module = THIS_MODULE,
12712 .ops = nl80211_ops,
12713 .n_ops = ARRAY_SIZE(nl80211_ops),
12714 .mcgrps = nl80211_mcgrps,
12715 .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps),
12602}; 12716};
12603 12717
12604/* notification functions */ 12718/* notification functions */
@@ -14563,12 +14677,11 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
14563 14677
14564/* initialisation/exit functions */ 14678/* initialisation/exit functions */
14565 14679
14566int nl80211_init(void) 14680int __init nl80211_init(void)
14567{ 14681{
14568 int err; 14682 int err;
14569 14683
14570 err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops, 14684 err = genl_register_family(&nl80211_fam);
14571 nl80211_mcgrps);
14572 if (err) 14685 if (err)
14573 return err; 14686 return err;
14574 14687
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 11cf83c8ad4f..2f425075ada8 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -490,6 +490,18 @@ static inline int rdev_connect(struct cfg80211_registered_device *rdev,
490 return ret; 490 return ret;
491} 491}
492 492
493static inline int
494rdev_update_connect_params(struct cfg80211_registered_device *rdev,
495 struct net_device *dev,
496 struct cfg80211_connect_params *sme, u32 changed)
497{
498 int ret;
499 trace_rdev_update_connect_params(&rdev->wiphy, dev, sme, changed);
500 ret = rdev->ops->update_connect_params(&rdev->wiphy, dev, sme, changed);
501 trace_rdev_return_int(&rdev->wiphy, ret);
502 return ret;
503}
504
493static inline int rdev_disconnect(struct cfg80211_registered_device *rdev, 505static inline int rdev_disconnect(struct cfg80211_registered_device *rdev,
494 struct net_device *dev, u16 reason_code) 506 struct net_device *dev, u16 reason_code)
495{ 507{
@@ -562,6 +574,18 @@ static inline int rdev_set_wds_peer(struct cfg80211_registered_device *rdev,
562 return ret; 574 return ret;
563} 575}
564 576
577static inline int
578rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev,
579 struct net_device *dev,
580 const bool enabled)
581{
582 int ret;
583 trace_rdev_set_multicast_to_unicast(&rdev->wiphy, dev, enabled);
584 ret = rdev->ops->set_multicast_to_unicast(&rdev->wiphy, dev, enabled);
585 trace_rdev_return_int(&rdev->wiphy, ret);
586 return ret;
587}
588
565static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev) 589static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev)
566{ 590{
567 trace_rdev_rfkill_poll(&rdev->wiphy); 591 trace_rdev_rfkill_poll(&rdev->wiphy);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a77db333927e..5e0d19380302 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -39,6 +39,7 @@ struct cfg80211_conn {
39 CFG80211_CONN_ASSOCIATING, 39 CFG80211_CONN_ASSOCIATING,
40 CFG80211_CONN_ASSOC_FAILED, 40 CFG80211_CONN_ASSOC_FAILED,
41 CFG80211_CONN_DEAUTH, 41 CFG80211_CONN_DEAUTH,
42 CFG80211_CONN_ABANDON,
42 CFG80211_CONN_CONNECTED, 43 CFG80211_CONN_CONNECTED,
43 } state; 44 } state;
44 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; 45 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
@@ -206,6 +207,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
206 cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, 207 cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
207 NULL, 0, 208 NULL, 0,
208 WLAN_REASON_DEAUTH_LEAVING, false); 209 WLAN_REASON_DEAUTH_LEAVING, false);
210 /* fall through */
211 case CFG80211_CONN_ABANDON:
209 /* free directly, disconnected event already sent */ 212 /* free directly, disconnected event already sent */
210 cfg80211_sme_free(wdev); 213 cfg80211_sme_free(wdev);
211 return 0; 214 return 0;
@@ -423,6 +426,17 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev)
423 schedule_work(&rdev->conn_work); 426 schedule_work(&rdev->conn_work);
424} 427}
425 428
429void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev)
430{
431 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
432
433 if (!wdev->conn)
434 return;
435
436 wdev->conn->state = CFG80211_CONN_ABANDON;
437 schedule_work(&rdev->conn_work);
438}
439
426static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, 440static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev,
427 const u8 *ies, size_t ies_len, 441 const u8 *ies, size_t ies_len,
428 const u8 **out_ies, size_t *out_ies_len) 442 const u8 **out_ies, size_t *out_ies_len)
@@ -1088,7 +1102,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
1088 err = cfg80211_sme_disconnect(wdev, reason); 1102 err = cfg80211_sme_disconnect(wdev, reason);
1089 else if (!rdev->ops->disconnect) 1103 else if (!rdev->ops->disconnect)
1090 cfg80211_mlme_down(rdev, dev); 1104 cfg80211_mlme_down(rdev, dev);
1091 else if (wdev->current_bss) 1105 else if (wdev->ssid_len)
1092 err = rdev_disconnect(rdev, dev, reason); 1106 err = rdev_disconnect(rdev, dev, reason);
1093 1107
1094 return err; 1108 return err;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index a3d0a91b1e09..ea1b47e04fa4 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1281,6 +1281,24 @@ TRACE_EVENT(rdev_connect,
1281 __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid)) 1281 __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid))
1282); 1282);
1283 1283
1284TRACE_EVENT(rdev_update_connect_params,
1285 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
1286 struct cfg80211_connect_params *sme, u32 changed),
1287 TP_ARGS(wiphy, netdev, sme, changed),
1288 TP_STRUCT__entry(
1289 WIPHY_ENTRY
1290 NETDEV_ENTRY
1291 __field(u32, changed)
1292 ),
1293 TP_fast_assign(
1294 WIPHY_ASSIGN;
1295 NETDEV_ASSIGN;
1296 __entry->changed = changed;
1297 ),
1298 TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", parameters changed: %u",
1299 WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->changed)
1300);
1301
1284TRACE_EVENT(rdev_set_cqm_rssi_config, 1302TRACE_EVENT(rdev_set_cqm_rssi_config,
1285 TP_PROTO(struct wiphy *wiphy, 1303 TP_PROTO(struct wiphy *wiphy,
1286 struct net_device *netdev, s32 rssi_thold, 1304 struct net_device *netdev, s32 rssi_thold,
@@ -3030,6 +3048,25 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
3030 TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), 3048 TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
3031 TP_ARGS(wiphy, wdev) 3049 TP_ARGS(wiphy, wdev)
3032); 3050);
3051
3052TRACE_EVENT(rdev_set_multicast_to_unicast,
3053 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
3054 const bool enabled),
3055 TP_ARGS(wiphy, netdev, enabled),
3056 TP_STRUCT__entry(
3057 WIPHY_ENTRY
3058 NETDEV_ENTRY
3059 __field(bool, enabled)
3060 ),
3061 TP_fast_assign(
3062 WIPHY_ASSIGN;
3063 NETDEV_ASSIGN;
3064 __entry->enabled = enabled;
3065 ),
3066 TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s",
3067 WIPHY_PR_ARG, NETDEV_PR_ARG,
3068 BOOL_TO_STR(__entry->enabled))
3069);
3033#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ 3070#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
3034 3071
3035#undef TRACE_INCLUDE_PATH 3072#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 659b507b347d..e9d040d29846 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -13,6 +13,7 @@
13#include <net/dsfield.h> 13#include <net/dsfield.h>
14#include <linux/if_vlan.h> 14#include <linux/if_vlan.h>
15#include <linux/mpls.h> 15#include <linux/mpls.h>
16#include <linux/gcd.h>
16#include "core.h" 17#include "core.h"
17#include "rdev-ops.h" 18#include "rdev-ops.h"
18 19
@@ -1378,6 +1379,25 @@ static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
1378 return false; 1379 return false;
1379} 1380}
1380 1381
1382static size_t skip_ie(const u8 *ies, size_t ielen, size_t pos)
1383{
1384 /* we assume a validly formed IEs buffer */
1385 u8 len = ies[pos + 1];
1386
1387 pos += 2 + len;
1388
1389 /* the IE itself must have 255 bytes for fragments to follow */
1390 if (len < 255)
1391 return pos;
1392
1393 while (pos < ielen && ies[pos] == WLAN_EID_FRAGMENT) {
1394 len = ies[pos + 1];
1395 pos += 2 + len;
1396 }
1397
1398 return pos;
1399}
1400
1381size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, 1401size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
1382 const u8 *ids, int n_ids, 1402 const u8 *ids, int n_ids,
1383 const u8 *after_ric, int n_after_ric, 1403 const u8 *after_ric, int n_after_ric,
@@ -1387,14 +1407,14 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
1387 1407
1388 while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { 1408 while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
1389 if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { 1409 if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
1390 pos += 2 + ies[pos + 1]; 1410 pos = skip_ie(ies, ielen, pos);
1391 1411
1392 while (pos < ielen && 1412 while (pos < ielen &&
1393 !ieee80211_id_in_list(after_ric, n_after_ric, 1413 !ieee80211_id_in_list(after_ric, n_after_ric,
1394 ies[pos])) 1414 ies[pos]))
1395 pos += 2 + ies[pos + 1]; 1415 pos = skip_ie(ies, ielen, pos);
1396 } else { 1416 } else {
1397 pos += 2 + ies[pos + 1]; 1417 pos = skip_ie(ies, ielen, pos);
1398 } 1418 }
1399 } 1419 }
1400 1420
@@ -1555,31 +1575,57 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
1555} 1575}
1556EXPORT_SYMBOL(ieee80211_chandef_to_operating_class); 1576EXPORT_SYMBOL(ieee80211_chandef_to_operating_class);
1557 1577
1558int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, 1578static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int,
1559 u32 beacon_int) 1579 u32 *beacon_int_gcd,
1580 bool *beacon_int_different)
1560{ 1581{
1561 struct wireless_dev *wdev; 1582 struct wireless_dev *wdev;
1562 int res = 0;
1563 1583
1564 if (beacon_int < 10 || beacon_int > 10000) 1584 *beacon_int_gcd = 0;
1565 return -EINVAL; 1585 *beacon_int_different = false;
1566 1586
1567 list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { 1587 list_for_each_entry(wdev, &wiphy->wdev_list, list) {
1568 if (!wdev->beacon_interval) 1588 if (!wdev->beacon_interval)
1569 continue; 1589 continue;
1570 if (wdev->beacon_interval != beacon_int) { 1590
1571 res = -EINVAL; 1591 if (!*beacon_int_gcd) {
1572 break; 1592 *beacon_int_gcd = wdev->beacon_interval;
1593 continue;
1573 } 1594 }
1595
1596 if (wdev->beacon_interval == *beacon_int_gcd)
1597 continue;
1598
1599 *beacon_int_different = true;
1600 *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval);
1574 } 1601 }
1575 1602
1576 return res; 1603 if (new_beacon_int && *beacon_int_gcd != new_beacon_int) {
1604 if (*beacon_int_gcd)
1605 *beacon_int_different = true;
1606 *beacon_int_gcd = gcd(*beacon_int_gcd, new_beacon_int);
1607 }
1608}
1609
1610int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
1611 enum nl80211_iftype iftype, u32 beacon_int)
1612{
1613 /*
1614 * This is just a basic pre-condition check; if interface combinations
1615 * are possible the driver must already be checking those with a call
1616 * to cfg80211_check_combinations(), in which case we'll validate more
1617 * through the cfg80211_calculate_bi_data() call and code in
1618 * cfg80211_iter_combinations().
1619 */
1620
1621 if (beacon_int < 10 || beacon_int > 10000)
1622 return -EINVAL;
1623
1624 return 0;
1577} 1625}
1578 1626
1579int cfg80211_iter_combinations(struct wiphy *wiphy, 1627int cfg80211_iter_combinations(struct wiphy *wiphy,
1580 const int num_different_channels, 1628 struct iface_combination_params *params,
1581 const u8 radar_detect,
1582 const int iftype_num[NUM_NL80211_IFTYPES],
1583 void (*iter)(const struct ieee80211_iface_combination *c, 1629 void (*iter)(const struct ieee80211_iface_combination *c,
1584 void *data), 1630 void *data),
1585 void *data) 1631 void *data)
@@ -1589,8 +1635,23 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
1589 int i, j, iftype; 1635 int i, j, iftype;
1590 int num_interfaces = 0; 1636 int num_interfaces = 0;
1591 u32 used_iftypes = 0; 1637 u32 used_iftypes = 0;
1638 u32 beacon_int_gcd;
1639 bool beacon_int_different;
1640
1641 /*
1642 * This is a bit strange, since the iteration used to rely only on
1643 * the data given by the driver, but here it now relies on context,
1644 * in form of the currently operating interfaces.
1645 * This is OK for all current users, and saves us from having to
1646 * push the GCD calculations into all the drivers.
1647 * In the future, this should probably rely more on data that's in
1648 * cfg80211 already - the only thing not would appear to be any new
1649 * interfaces (while being brought up) and channel/radar data.
1650 */
1651 cfg80211_calculate_bi_data(wiphy, params->new_beacon_int,
1652 &beacon_int_gcd, &beacon_int_different);
1592 1653
1593 if (radar_detect) { 1654 if (params->radar_detect) {
1594 rcu_read_lock(); 1655 rcu_read_lock();
1595 regdom = rcu_dereference(cfg80211_regdomain); 1656 regdom = rcu_dereference(cfg80211_regdomain);
1596 if (regdom) 1657 if (regdom)
@@ -1599,8 +1660,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
1599 } 1660 }
1600 1661
1601 for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { 1662 for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
1602 num_interfaces += iftype_num[iftype]; 1663 num_interfaces += params->iftype_num[iftype];
1603 if (iftype_num[iftype] > 0 && 1664 if (params->iftype_num[iftype] > 0 &&
1604 !(wiphy->software_iftypes & BIT(iftype))) 1665 !(wiphy->software_iftypes & BIT(iftype)))
1605 used_iftypes |= BIT(iftype); 1666 used_iftypes |= BIT(iftype);
1606 } 1667 }
@@ -1614,7 +1675,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
1614 1675
1615 if (num_interfaces > c->max_interfaces) 1676 if (num_interfaces > c->max_interfaces)
1616 continue; 1677 continue;
1617 if (num_different_channels > c->num_different_channels) 1678 if (params->num_different_channels > c->num_different_channels)
1618 continue; 1679 continue;
1619 1680
1620 limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, 1681 limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
@@ -1629,16 +1690,17 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
1629 all_iftypes |= limits[j].types; 1690 all_iftypes |= limits[j].types;
1630 if (!(limits[j].types & BIT(iftype))) 1691 if (!(limits[j].types & BIT(iftype)))
1631 continue; 1692 continue;
1632 if (limits[j].max < iftype_num[iftype]) 1693 if (limits[j].max < params->iftype_num[iftype])
1633 goto cont; 1694 goto cont;
1634 limits[j].max -= iftype_num[iftype]; 1695 limits[j].max -= params->iftype_num[iftype];
1635 } 1696 }
1636 } 1697 }
1637 1698
1638 if (radar_detect != (c->radar_detect_widths & radar_detect)) 1699 if (params->radar_detect !=
1700 (c->radar_detect_widths & params->radar_detect))
1639 goto cont; 1701 goto cont;
1640 1702
1641 if (radar_detect && c->radar_detect_regions && 1703 if (params->radar_detect && c->radar_detect_regions &&
1642 !(c->radar_detect_regions & BIT(region))) 1704 !(c->radar_detect_regions & BIT(region)))
1643 goto cont; 1705 goto cont;
1644 1706
@@ -1650,6 +1712,14 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
1650 if ((all_iftypes & used_iftypes) != used_iftypes) 1712 if ((all_iftypes & used_iftypes) != used_iftypes)
1651 goto cont; 1713 goto cont;
1652 1714
1715 if (beacon_int_gcd) {
1716 if (c->beacon_int_min_gcd &&
1717 beacon_int_gcd < c->beacon_int_min_gcd)
1718 goto cont;
1719 if (!c->beacon_int_min_gcd && beacon_int_different)
1720 goto cont;
1721 }
1722
1653 /* This combination covered all interface types and 1723 /* This combination covered all interface types and
1654 * supported the requested numbers, so we're good. 1724 * supported the requested numbers, so we're good.
1655 */ 1725 */
@@ -1672,14 +1742,11 @@ cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c,
1672} 1742}
1673 1743
1674int cfg80211_check_combinations(struct wiphy *wiphy, 1744int cfg80211_check_combinations(struct wiphy *wiphy,
1675 const int num_different_channels, 1745 struct iface_combination_params *params)
1676 const u8 radar_detect,
1677 const int iftype_num[NUM_NL80211_IFTYPES])
1678{ 1746{
1679 int err, num = 0; 1747 int err, num = 0;
1680 1748
1681 err = cfg80211_iter_combinations(wiphy, num_different_channels, 1749 err = cfg80211_iter_combinations(wiphy, params,
1682 radar_detect, iftype_num,
1683 cfg80211_iter_sum_ifcombs, &num); 1750 cfg80211_iter_sum_ifcombs, &num);
1684 if (err) 1751 if (err)
1685 return err; 1752 return err;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f83b74d3e2ac..079c883aa96e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -51,7 +51,7 @@
51#include <linux/slab.h> 51#include <linux/slab.h>
52#include <net/sock.h> 52#include <net/sock.h>
53#include <net/tcp_states.h> 53#include <net/tcp_states.h>
54#include <asm/uaccess.h> 54#include <linux/uaccess.h>
55#include <linux/fcntl.h> 55#include <linux/fcntl.h>
56#include <linux/termios.h> /* For TIOCINQ/OUTQ */ 56#include <linux/termios.h> /* For TIOCINQ/OUTQ */
57#include <linux/notifier.h> 57#include <linux/notifier.h>
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index 43239527a205..a06dfe143c67 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -70,7 +70,7 @@ static struct ctl_table x25_table[] = {
70 .mode = 0644, 70 .mode = 0644,
71 .proc_handler = proc_dointvec, 71 .proc_handler = proc_dointvec,
72 }, 72 },
73 { 0, }, 73 { },
74}; 74};
75 75
76void __init x25_register_sysctl(void) 76void __init x25_register_sysctl(void)
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index fd5ffb25873f..bcaa180d6a3f 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -29,7 +29,7 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/netdevice.h> 30#include <linux/netdevice.h>
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <asm/uaccess.h> 32#include <linux/uaccess.h>
33#include <linux/init.h> 33#include <linux/init.h>
34#include <net/x25.h> 34#include <net/x25.h>
35 35
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5bf7e1bfeac7..177e208e8ff5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3113,6 +3113,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
3113 3113
3114void __init xfrm_init(void) 3114void __init xfrm_init(void)
3115{ 3115{
3116 flow_cache_hp_init();
3116 register_pernet_subsys(&xfrm_net_ops); 3117 register_pernet_subsys(&xfrm_net_ops);
3117 seqcount_init(&xfrm_policy_hash_generation); 3118 seqcount_init(&xfrm_policy_hash_generation);
3118 xfrm_input_init(); 3119 xfrm_input_init();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 419bf5d463bd..64e3c82eedf6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -20,7 +20,7 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/cache.h> 21#include <linux/cache.h>
22#include <linux/audit.h> 22#include <linux/audit.h>
23#include <asm/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/ktime.h> 24#include <linux/ktime.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
@@ -388,14 +388,6 @@ static void xfrm_state_gc_task(struct work_struct *work)
388 xfrm_state_gc_destroy(x); 388 xfrm_state_gc_destroy(x);
389} 389}
390 390
391static inline unsigned long make_jiffies(long secs)
392{
393 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
394 return MAX_SCHEDULE_TIMEOUT-1;
395 else
396 return secs*HZ;
397}
398
399static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) 391static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
400{ 392{
401 struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); 393 struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
@@ -1412,7 +1404,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
1412 if (x->curlft.bytes >= x->lft.hard_byte_limit || 1404 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1413 x->curlft.packets >= x->lft.hard_packet_limit) { 1405 x->curlft.packets >= x->lft.hard_packet_limit) {
1414 x->km.state = XFRM_STATE_EXPIRED; 1406 x->km.state = XFRM_STATE_EXPIRED;
1415 tasklet_hrtimer_start(&x->mtimer, ktime_set(0, 0), HRTIMER_MODE_REL); 1407 tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
1416 return -EINVAL; 1408 return -EINVAL;
1417 } 1409 }
1418 1410
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 671a1d0333f0..9705c279494b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -27,7 +27,7 @@
27#include <net/xfrm.h> 27#include <net/xfrm.h>
28#include <net/netlink.h> 28#include <net/netlink.h>
29#include <net/ah.h> 29#include <net/ah.h>
30#include <asm/uaccess.h> 30#include <linux/uaccess.h>
31#if IS_ENABLED(CONFIG_IPV6) 31#if IS_ENABLED(CONFIG_IPV6)
32#include <linux/in6.h> 32#include <linux/in6.h>
33#endif 33#endif