aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c5
-rw-r--r--net/6lowpan/iphc.c690
-rw-r--r--net/6lowpan/nhc.c16
-rw-r--r--net/6lowpan/nhc.h14
-rw-r--r--net/6lowpan/nhc_udp.c35
-rw-r--r--net/8021q/vlan_core.c14
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile3
-rw-r--r--net/bluetooth/6lowpan.c162
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/hci_conn.c246
-rw-r--r--net/bluetooth/hci_core.c252
-rw-r--r--net/bluetooth/hci_event.c39
-rw-r--r--net/bluetooth/hci_request.c103
-rw-r--r--net/bluetooth/hci_request.h4
-rw-r--r--net/bluetooth/hci_sock.c109
-rw-r--r--net/bluetooth/hidp/core.c14
-rw-r--r--net/bluetooth/l2cap_core.c20
-rw-r--r--net/bluetooth/l2cap_sock.c71
-rw-r--r--net/bluetooth/lib.c32
-rw-r--r--net/bluetooth/mgmt.c202
-rw-r--r--net/bluetooth/sco.c44
-rw-r--r--net/bluetooth/smp.c62
-rw-r--r--net/bluetooth/smp.h1
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_fdb.c216
-rw-r--r--net/bridge/br_forward.c33
-rw-r--r--net/bridge/br_if.c3
-rw-r--r--net/bridge/br_input.c35
-rw-r--r--net/bridge/br_ioctl.c3
-rw-r--r--net/bridge/br_mdb.c24
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netfilter_hooks.c107
-rw-r--r--net/bridge/br_netfilter_ipv6.c21
-rw-r--r--net/bridge/br_netlink.c522
-rw-r--r--net/bridge/br_private.h208
-rw-r--r--net/bridge/br_stp.c26
-rw-r--r--net/bridge/br_stp_bpdu.c12
-rw-r--r--net/bridge/br_stp_if.c12
-rw-r--r--net/bridge/br_sysfs_br.c11
-rw-r--r--net/bridge/br_vlan.c774
-rw-r--r--net/bridge/netfilter/ebt_log.c2
-rw-r--r--net/bridge/netfilter/ebt_nflog.c2
-rw-r--r--net/bridge/netfilter/ebtable_broute.c8
-rw-r--r--net/bridge/netfilter/ebtable_filter.c13
-rw-r--r--net/bridge/netfilter/ebtable_nat.c13
-rw-r--r--net/bridge/netfilter/ebtables.c14
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c20
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c19
-rw-r--r--net/can/bcm.c15
-rw-r--r--net/ceph/auth_x.c36
-rw-r--r--net/ceph/ceph_common.c20
-rw-r--r--net/ceph/crypto.c6
-rw-r--r--net/ceph/crypto.h4
-rw-r--r--net/ceph/messenger.c88
-rw-r--r--net/ceph/osd_client.c47
-rw-r--r--net/core/dev.c151
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/filter.c142
-rw-r--r--net/core/lwtunnel.c4
-rw-r--r--net/core/neighbour.c47
-rw-r--r--net/core/net-sysfs.c11
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/ptp_classifier.c16
-rw-r--r--net/core/request_sock.c88
-rw-r--r--net/core/rtnetlink.c296
-rw-r--r--net/core/skbuff.c11
-rw-r--r--net/core/sock.c81
-rw-r--r--net/core/sock_diag.c14
-rw-r--r--net/core/tso.c18
-rw-r--r--net/core/utils.c49
-rw-r--r--net/dcb/dcbnl.c30
-rw-r--r--net/dccp/dccp.h16
-rw-r--r--net/dccp/ipv4.c94
-rw-r--r--net/dccp/ipv6.c138
-rw-r--r--net/dccp/minisocks.c18
-rw-r--r--net/dccp/output.c17
-rw-r--r--net/dccp/probe.c11
-rw-r--r--net/decnet/dn_neigh.c23
-rw-r--r--net/decnet/dn_nsp_in.c7
-rw-r--r--net/decnet/dn_nsp_out.c4
-rw-r--r--net/decnet/dn_route.c38
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/dns_resolver/dns_key.c20
-rw-r--r--net/dns_resolver/dns_query.c7
-rw-r--r--net/dns_resolver/internal.h8
-rw-r--r--net/dsa/dsa.c74
-rw-r--r--net/dsa/slave.c184
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h14
-rw-r--r--net/ieee802154/6lowpan/core.c126
-rw-r--r--net/ieee802154/6lowpan/reassembly.c168
-rw-r--r--net/ieee802154/6lowpan/rx.c357
-rw-r--r--net/ieee802154/6lowpan/tx.c95
-rw-r--r--net/ieee802154/Kconfig5
-rw-r--r--net/ieee802154/core.c12
-rw-r--r--net/ieee802154/core.h1
-rw-r--r--net/ieee802154/header_ops.c20
-rw-r--r--net/ieee802154/nl802154.c1133
-rw-r--r--net/ieee802154/rdev-ops.h109
-rw-r--r--net/ieee802154/socket.c8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c30
-rw-r--r--net/ipv4/arp.c28
-rw-r--r--net/ipv4/devinet.c7
-rw-r--r--net/ipv4/fib_frontend.c39
-rw-r--r--net/ipv4/fib_semantics.c193
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/gre_offload.c3
-rw-r--r--net/ipv4/icmp.c27
-rw-r--r--net/ipv4/igmp.c18
-rw-r--r--net/ipv4/inet_connection_sock.c270
-rw-r--r--net/ipv4/inet_diag.c96
-rw-r--r--net/ipv4/inet_fragment.c6
-rw-r--r--net/ipv4/inet_hashtables.c53
-rw-r--r--net/ipv4/ip_forward.c19
-rw-r--r--net/ipv4/ip_fragment.c25
-rw-r--r--net/ipv4/ip_gre.c46
-rw-r--r--net/ipv4/ip_input.c47
-rw-r--r--net/ipv4/ip_output.c148
-rw-r--r--net/ipv4/ip_sockglue.c45
-rw-r--r--net/ipv4/ip_tunnel_core.c6
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipconfig.c32
-rw-r--r--net/ipv4/ipmr.c15
-rw-r--r--net/ipv4/netfilter.c7
-rw-r--r--net/ipv4/netfilter/Kconfig1
-rw-r--r--net/ipv4/netfilter/arp_tables.c15
-rw-r--r--net/ipv4/netfilter/arptable_filter.c7
-rw-r--r--net/ipv4/netfilter/ip_tables.c31
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c12
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c32
-rw-r--r--net/ipv4/netfilter/ipt_ah.c2
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c9
-rw-r--r--net/ipv4/netfilter/iptable_filter.c9
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c19
-rw-r--r--net/ipv4/netfilter/iptable_nat.c26
-rw-r--r--net/ipv4/netfilter/iptable_raw.c9
-rw-r--r--net/ipv4/netfilter/iptable_security.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c18
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c18
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c25
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c44
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c6
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c22
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c5
-rw-r--r--net/ipv4/raw.c21
-rw-r--r--net/ipv4/route.c216
-rw-r--r--net/ipv4/syncookies.c23
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv4/tcp.c60
-rw-r--r--net/ipv4/tcp_cong.c12
-rw-r--r--net/ipv4/tcp_dctcp.c2
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_fastopen.c75
-rw-r--r--net/ipv4/tcp_input.c280
-rw-r--r--net/ipv4/tcp_ipv4.c219
-rw-r--r--net/ipv4/tcp_minisocks.c71
-rw-r--r--net/ipv4/tcp_output.c86
-rw-r--r--net/ipv4/tcp_recovery.c109
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c28
-rw-r--r--net/ipv4/xfrm4_input.c7
-rw-r--r--net/ipv4/xfrm4_output.c13
-rw-r--r--net/ipv4/xfrm4_policy.c59
-rw-r--r--net/ipv6/addrconf.c79
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/fib6_rules.c19
-rw-r--r--net/ipv6/icmp.c18
-rw-r--r--net/ipv6/ila.c4
-rw-r--r--net/ipv6/inet6_connection_sock.c77
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c13
-rw-r--r--net/ipv6/ip6_input.c15
-rw-r--r--net/ipv6/ip6_offload.c12
-rw-r--r--net/ipv6/ip6_output.c172
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ip6mr.c12
-rw-r--r--net/ipv6/mcast.c11
-rw-r--r--net/ipv6/mip6.c16
-rw-r--r--net/ipv6/ndisc.c50
-rw-r--r--net/ipv6/netfilter.c7
-rw-r--r--net/ipv6/netfilter/Kconfig1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c33
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c2
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c12
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c6
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c6
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c18
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c26
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c6
-rw-r--r--net/ipv6/netfilter/ip6table_security.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c18
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c21
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c9
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c25
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c44
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c10
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c22
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c7
-rw-r--r--net/ipv6/output_core.c24
-rw-r--r--net/ipv6/raw.c9
-rw-r--r--net/ipv6/reassembly.c12
-rw-r--r--net/ipv6/route.c187
-rw-r--r--net/ipv6/sit.c26
-rw-r--r--net/ipv6/syncookies.c13
-rw-r--r--net/ipv6/tcp_ipv6.c228
-rw-r--r--net/ipv6/tunnel6.c12
-rw-r--r--net/ipv6/udp.c11
-rw-r--r--net/ipv6/xfrm6_input.c4
-rw-r--r--net/ipv6/xfrm6_output.c40
-rw-r--r--net/ipv6/xfrm6_policy.c17
-rw-r--r--net/irda/af_irda.c3
-rw-r--r--net/irda/ircomm/ircomm_tty.c31
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/iucv/af_iucv.c9
-rw-r--r--net/iucv/iucv.c12
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_core.h3
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/l2tp/l2tp_ip.c1
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/l2tp/l2tp_netlink.c7
-rw-r--r--net/l2tp/l2tp_ppp.c1
-rw-r--r--net/l3mdev/Kconfig10
-rw-r--r--net/l3mdev/Makefile5
-rw-r--r--net/l3mdev/l3mdev.c92
-rw-r--r--net/mac80211/Makefile1
-rw-r--r--net/mac80211/agg-rx.c8
-rw-r--r--net/mac80211/agg-tx.c15
-rw-r--r--net/mac80211/cfg.c107
-rw-r--r--net/mac80211/cfg.h9
-rw-r--r--net/mac80211/debugfs.c4
-rw-r--r--net/mac80211/debugfs_key.c51
-rw-r--r--net/mac80211/debugfs_netdev.c41
-rw-r--r--net/mac80211/debugfs_sta.c8
-rw-r--r--net/mac80211/driver-ops.c268
-rw-r--r--net/mac80211/driver-ops.h301
-rw-r--r--net/mac80211/ethtool.c29
-rw-r--r--net/mac80211/event.c27
-rw-r--r--net/mac80211/ibss.c28
-rw-r--r--net/mac80211/ieee80211_i.h38
-rw-r--r--net/mac80211/iface.c14
-rw-r--r--net/mac80211/main.c19
-rw-r--r--net/mac80211/mesh.c85
-rw-r--r--net/mac80211/mesh.h10
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_plink.c18
-rw-r--r--net/mac80211/mlme.c413
-rw-r--r--net/mac80211/ocb.c2
-rw-r--r--net/mac80211/offchannel.c6
-rw-r--r--net/mac80211/pm.c14
-rw-r--r--net/mac80211/rate.c5
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c12
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c12
-rw-r--r--net/mac80211/rx.c81
-rw-r--r--net/mac80211/scan.c32
-rw-r--r--net/mac80211/sta_info.c109
-rw-r--r--net/mac80211/sta_info.h105
-rw-r--r--net/mac80211/status.c162
-rw-r--r--net/mac80211/tdls.c21
-rw-r--r--net/mac80211/trace.h52
-rw-r--r--net/mac80211/tx.c49
-rw-r--r--net/mac80211/util.c105
-rw-r--r--net/mac80211/wpa.c9
-rw-r--r--net/mac802154/cfg.c205
-rw-r--r--net/mac802154/iface.c118
-rw-r--r--net/mac802154/llsec.c21
-rw-r--r--net/mac802154/rx.c4
-rw-r--r--net/mac802154/tx.c7
-rw-r--r--net/mpls/af_mpls.c636
-rw-r--r--net/mpls/internal.h74
-rw-r--r--net/mpls/mpls_iptunnel.c2
-rw-r--r--net/netfilter/Kconfig21
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c15
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c64
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c18
-rw-r--r--net/netfilter/ipset/ip_set_core.c23
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c91
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c550
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c291
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c27
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c58
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c61
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c49
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c87
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c85
-rw-r--r--net/netfilter/nf_conntrack_core.c22
-rw-r--r--net/netfilter/nf_conntrack_netlink.c98
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c1
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_redirect.c2
-rw-r--r--net/netfilter/nf_queue.c42
-rw-r--r--net/netfilter/nf_tables_api.c1
-rw-r--r--net/netfilter/nf_tables_core.c10
-rw-r--r--net/netfilter/nf_tables_netdev.c20
-rw-r--r--net/netfilter/nfnetlink.c4
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c34
-rw-r--r--net/netfilter/nfnetlink_log.c89
-rw-r--r--net/netfilter/nfnetlink_queue.c (renamed from net/netfilter/nfnetlink_queue_core.c)73
-rw-r--r--net/netfilter/nfnetlink_queue_ct.c113
-rw-r--r--net/netfilter/nft_counter.c49
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_log.c3
-rw-r--r--net/netfilter/nft_meta.c40
-rw-r--r--net/netfilter/nft_queue.c2
-rw-r--r--net/netfilter/nft_reject_inet.c19
-rw-r--r--net/netfilter/x_tables.c1
-rw-r--r--net/netfilter/xt_CT.c7
-rw-r--r--net/netfilter/xt_LOG.c2
-rw-r--r--net/netfilter/xt_NFLOG.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TEE.c6
-rw-r--r--net/netfilter/xt_TPROXY.c24
-rw-r--r--net/netfilter/xt_addrtype.c4
-rw-r--r--net/netfilter/xt_connlimit.c4
-rw-r--r--net/netfilter/xt_ipvs.c5
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_owner.c6
-rw-r--r--net/netfilter/xt_recent.c2
-rw-r--r--net/netfilter/xt_socket.c14
-rw-r--r--net/netlink/af_netlink.c40
-rw-r--r--net/netlink/genetlink.c14
-rw-r--r--net/nfc/core.c4
-rw-r--r--net/nfc/digital_core.c3
-rw-r--r--net/nfc/hci/core.c3
-rw-r--r--net/nfc/hci/llc.c2
-rw-r--r--net/nfc/nci/Kconfig2
-rw-r--r--net/nfc/nci/Makefile3
-rw-r--r--net/nfc/nci/core.c150
-rw-r--r--net/nfc/nci/data.c13
-rw-r--r--net/nfc/nci/hci.c167
-rw-r--r--net/nfc/nci/ntf.c3
-rw-r--r--net/nfc/nci/rsp.c1
-rw-r--r--net/nfc/nci/spi.c11
-rw-r--r--net/nfc/netlink.c8
-rw-r--r--net/nfc/nfc.h5
-rw-r--r--net/nfc/rawsock.c3
-rw-r--r--net/openvswitch/actions.c45
-rw-r--r--net/openvswitch/conntrack.c143
-rw-r--r--net/openvswitch/conntrack.h9
-rw-r--r--net/openvswitch/datapath.c10
-rw-r--r--net/openvswitch/datapath.h1
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/flow.h3
-rw-r--r--net/openvswitch/flow_netlink.c171
-rw-r--r--net/openvswitch/flow_netlink.h6
-rw-r--r--net/openvswitch/flow_table.c5
-rw-r--r--net/openvswitch/vport-geneve.c15
-rw-r--r--net/openvswitch/vport-gre.c10
-rw-r--r--net/openvswitch/vport-internal_dev.c54
-rw-r--r--net/openvswitch/vport-netdev.c33
-rw-r--r--net/openvswitch/vport-netdev.h1
-rw-r--r--net/openvswitch/vport-vxlan.c21
-rw-r--r--net/openvswitch/vport.c121
-rw-r--r--net/openvswitch/vport.h23
-rw-r--r--net/packet/af_packet.c206
-rw-r--r--net/rds/af_rds.c16
-rw-r--r--net/rds/bind.c124
-rw-r--r--net/rds/connection.c22
-rw-r--r--net/rds/ib.c49
-rw-r--r--net/rds/ib.h84
-rw-r--r--net/rds/ib_cm.c116
-rw-r--r--net/rds/ib_rdma.c116
-rw-r--r--net/rds/ib_recv.c140
-rw-r--r--net/rds/ib_send.c181
-rw-r--r--net/rds/ib_stats.c22
-rw-r--r--net/rds/iw.c2
-rw-r--r--net/rds/iw.h9
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/iw_rdma.c135
-rw-r--r--net/rds/iw_send.c154
-rw-r--r--net/rds/rdma_transport.c4
-rw-r--r--net/rds/rds.h10
-rw-r--r--net/rds/send.c19
-rw-r--r--net/rds/tcp.c16
-rw-r--r--net/rds/tcp_listen.c25
-rw-r--r--net/rds/tcp_recv.c11
-rw-r--r--net/rds/tcp_send.c8
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-connection.c6
-rw-r--r--net/rxrpc/ar-internal.h4
-rw-r--r--net/rxrpc/ar-key.c32
-rw-r--r--net/rxrpc/ar-output.c2
-rw-r--r--net/rxrpc/ar-security.c4
-rw-r--r--net/rxrpc/ar-transport.c4
-rw-r--r--net/rxrpc/rxkad.c16
-rw-r--r--net/sched/act_bpf.c1
-rw-r--r--net/sched/act_connmark.c5
-rw-r--r--net/sched/act_ipt.c1
-rw-r--r--net/sched/act_mirred.c18
-rw-r--r--net/sched/cls_bpf.c82
-rw-r--r--net/sched/cls_flow.c15
-rw-r--r--net/sched/em_ipset.c1
-rw-r--r--net/sched/em_meta.c138
-rw-r--r--net/sched/sch_blackhole.c15
-rw-r--r--net/sched/sch_choke.c59
-rw-r--r--net/sched/sch_dsmark.c63
-rw-r--r--net/sched/sch_fq.c13
-rw-r--r--net/sched/sch_hhf.c11
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/auth.c4
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_statefuns.c2
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/socket.c6
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c13
-rw-r--r--net/sunrpc/backchannel_rqst.c24
-rw-r--r--net/sunrpc/cache.c53
-rw-r--r--net/sunrpc/svc.c5
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/sysctl.c23
-rw-r--r--net/sunrpc/xprtrdma/Makefile1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c394
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c126
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c148
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c125
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c18
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c96
-rw-r--r--net/sunrpc/xprtrdma/transport.c18
-rw-r--r--net/sunrpc/xprtrdma/verbs.c490
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h57
-rw-r--r--net/sunrpc/xprtsock.c260
-rw-r--r--net/switchdev/switchdev.c641
-rw-r--r--net/sysctl_net.c6
-rw-r--r--net/tipc/bcast.c996
-rw-r--r--net/tipc/bcast.h122
-rw-r--r--net/tipc/bearer.c102
-rw-r--r--net/tipc/bearer.h9
-rw-r--r--net/tipc/core.c9
-rw-r--r--net/tipc/core.h12
-rw-r--r--net/tipc/discover.c28
-rw-r--r--net/tipc/link.c933
-rw-r--r--net/tipc/link.h81
-rw-r--r--net/tipc/msg.c63
-rw-r--r--net/tipc/msg.h46
-rw-r--r--net/tipc/name_distr.c4
-rw-r--r--net/tipc/net.c6
-rw-r--r--net/tipc/node.c233
-rw-r--r--net/tipc/node.h41
-rw-r--r--net/tipc/socket.c4
-rw-r--r--net/tipc/udp_media.c23
-rw-r--r--net/unix/af_unix.c41
-rw-r--r--net/vmw_vsock/af_vsock.c44
-rw-r--r--net/vmw_vsock/vmci_transport.c179
-rw-r--r--net/vmw_vsock/vmci_transport.h4
-rw-r--r--net/wireless/Kconfig10
-rw-r--r--net/wireless/core.c6
-rw-r--r--net/wireless/core.h1
-rw-r--r--net/wireless/nl80211.c536
-rw-r--r--net/wireless/reg.c295
-rw-r--r--net/wireless/scan.c61
-rw-r--r--net/wireless/trace.h22
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_output.c20
-rw-r--r--net/xfrm/xfrm_policy.c19
-rw-r--r--net/xfrm/xfrm_user.c9
498 files changed, 16992 insertions, 10949 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index ae1896fa45e2..83b19e072224 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -17,6 +17,11 @@
17 17
18void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype) 18void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype)
19{ 19{
20 dev->addr_len = EUI64_ADDR_LEN;
21 dev->type = ARPHRD_6LOWPAN;
22 dev->mtu = IPV6_MIN_MTU;
23 dev->priv_flags |= IFF_NO_QUEUE;
24
20 lowpan_priv(dev)->lltype = lltype; 25 lowpan_priv(dev)->lltype = lltype;
21} 26}
22EXPORT_SYMBOL(lowpan_netdev_setup); 27EXPORT_SYMBOL(lowpan_netdev_setup);
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 1e0071fdcf72..346b5c1a9185 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -49,36 +49,178 @@
49#include <linux/bitops.h> 49#include <linux/bitops.h>
50#include <linux/if_arp.h> 50#include <linux/if_arp.h>
51#include <linux/netdevice.h> 51#include <linux/netdevice.h>
52
52#include <net/6lowpan.h> 53#include <net/6lowpan.h>
53#include <net/ipv6.h> 54#include <net/ipv6.h>
54#include <net/af_ieee802154.h> 55
56/* special link-layer handling */
57#include <net/mac802154.h>
55 58
56#include "nhc.h" 59#include "nhc.h"
57 60
61/* Values of fields within the IPHC encoding first byte */
62#define LOWPAN_IPHC_TF_MASK 0x18
63#define LOWPAN_IPHC_TF_00 0x00
64#define LOWPAN_IPHC_TF_01 0x08
65#define LOWPAN_IPHC_TF_10 0x10
66#define LOWPAN_IPHC_TF_11 0x18
67
68#define LOWPAN_IPHC_NH 0x04
69
70#define LOWPAN_IPHC_HLIM_MASK 0x03
71#define LOWPAN_IPHC_HLIM_00 0x00
72#define LOWPAN_IPHC_HLIM_01 0x01
73#define LOWPAN_IPHC_HLIM_10 0x02
74#define LOWPAN_IPHC_HLIM_11 0x03
75
76/* Values of fields within the IPHC encoding second byte */
77#define LOWPAN_IPHC_CID 0x80
78
79#define LOWPAN_IPHC_SAC 0x40
80
81#define LOWPAN_IPHC_SAM_MASK 0x30
82#define LOWPAN_IPHC_SAM_00 0x00
83#define LOWPAN_IPHC_SAM_01 0x10
84#define LOWPAN_IPHC_SAM_10 0x20
85#define LOWPAN_IPHC_SAM_11 0x30
86
87#define LOWPAN_IPHC_M 0x08
88
89#define LOWPAN_IPHC_DAC 0x04
90
91#define LOWPAN_IPHC_DAM_MASK 0x03
92#define LOWPAN_IPHC_DAM_00 0x00
93#define LOWPAN_IPHC_DAM_01 0x01
94#define LOWPAN_IPHC_DAM_10 0x02
95#define LOWPAN_IPHC_DAM_11 0x03
96
97/* ipv6 address based on mac
98 * second bit-flip (Universe/Local) is done according RFC2464
99 */
100#define is_addr_mac_addr_based(a, m) \
101 ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \
102 (((a)->s6_addr[9]) == (m)[1]) && \
103 (((a)->s6_addr[10]) == (m)[2]) && \
104 (((a)->s6_addr[11]) == (m)[3]) && \
105 (((a)->s6_addr[12]) == (m)[4]) && \
106 (((a)->s6_addr[13]) == (m)[5]) && \
107 (((a)->s6_addr[14]) == (m)[6]) && \
108 (((a)->s6_addr[15]) == (m)[7]))
109
110/* check whether we can compress the IID to 16 bits,
111 * it's possible for unicast addresses with first 49 bits are zero only.
112 */
113#define lowpan_is_iid_16_bit_compressable(a) \
114 ((((a)->s6_addr16[4]) == 0) && \
115 (((a)->s6_addr[10]) == 0) && \
116 (((a)->s6_addr[11]) == 0xff) && \
117 (((a)->s6_addr[12]) == 0xfe) && \
118 (((a)->s6_addr[13]) == 0))
119
120/* check whether the 112-bit gid of the multicast address is mappable to: */
121
122/* 48 bits, FFXX::00XX:XXXX:XXXX */
123#define lowpan_is_mcast_addr_compressable48(a) \
124 ((((a)->s6_addr16[1]) == 0) && \
125 (((a)->s6_addr16[2]) == 0) && \
126 (((a)->s6_addr16[3]) == 0) && \
127 (((a)->s6_addr16[4]) == 0) && \
128 (((a)->s6_addr[10]) == 0))
129
130/* 32 bits, FFXX::00XX:XXXX */
131#define lowpan_is_mcast_addr_compressable32(a) \
132 ((((a)->s6_addr16[1]) == 0) && \
133 (((a)->s6_addr16[2]) == 0) && \
134 (((a)->s6_addr16[3]) == 0) && \
135 (((a)->s6_addr16[4]) == 0) && \
136 (((a)->s6_addr16[5]) == 0) && \
137 (((a)->s6_addr[12]) == 0))
138
139/* 8 bits, FF02::00XX */
140#define lowpan_is_mcast_addr_compressable8(a) \
141 ((((a)->s6_addr[1]) == 2) && \
142 (((a)->s6_addr16[1]) == 0) && \
143 (((a)->s6_addr16[2]) == 0) && \
144 (((a)->s6_addr16[3]) == 0) && \
145 (((a)->s6_addr16[4]) == 0) && \
146 (((a)->s6_addr16[5]) == 0) && \
147 (((a)->s6_addr16[6]) == 0) && \
148 (((a)->s6_addr[14]) == 0))
149
150static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr,
151 const void *lladdr)
152{
153 /* fe:80::XXXX:XXXX:XXXX:XXXX
154 * \_________________/
155 * hwaddr
156 */
157 ipaddr->s6_addr[0] = 0xFE;
158 ipaddr->s6_addr[1] = 0x80;
159 memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN);
160 /* second bit-flip (Universe/Local)
161 * is done according RFC2464
162 */
163 ipaddr->s6_addr[8] ^= 0x02;
164}
165
166static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
167 const void *lladdr)
168{
169 const struct ieee802154_addr *addr = lladdr;
170 u8 eui64[EUI64_ADDR_LEN] = { };
171
172 switch (addr->mode) {
173 case IEEE802154_ADDR_LONG:
174 ieee802154_le64_to_be64(eui64, &addr->extended_addr);
175 iphc_uncompress_eui64_lladdr(ipaddr, eui64);
176 break;
177 case IEEE802154_ADDR_SHORT:
178 /* fe:80::ff:fe00:XXXX
179 * \__/
180 * short_addr
181 *
182 * Universe/Local bit is zero.
183 */
184 ipaddr->s6_addr[0] = 0xFE;
185 ipaddr->s6_addr[1] = 0x80;
186 ipaddr->s6_addr[11] = 0xFF;
187 ipaddr->s6_addr[12] = 0xFE;
188 ieee802154_le16_to_be16(&ipaddr->s6_addr16[7],
189 &addr->short_addr);
190 break;
191 default:
192 /* should never handled and filtered by 802154 6lowpan */
193 WARN_ON_ONCE(1);
194 break;
195 }
196}
197
58/* Uncompress address function for source and 198/* Uncompress address function for source and
59 * destination address(non-multicast). 199 * destination address(non-multicast).
60 * 200 *
61 * address_mode is sam value or dam value. 201 * address_mode is the masked value for sam or dam value
62 */ 202 */
63static int uncompress_addr(struct sk_buff *skb, 203static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
64 struct in6_addr *ipaddr, const u8 address_mode, 204 struct in6_addr *ipaddr, u8 address_mode,
65 const u8 *lladdr, const u8 addr_type, 205 const void *lladdr)
66 const u8 addr_len)
67{ 206{
68 bool fail; 207 bool fail;
69 208
70 switch (address_mode) { 209 switch (address_mode) {
71 case LOWPAN_IPHC_ADDR_00: 210 /* SAM and DAM are the same here */
211 case LOWPAN_IPHC_DAM_00:
72 /* for global link addresses */ 212 /* for global link addresses */
73 fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); 213 fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
74 break; 214 break;
75 case LOWPAN_IPHC_ADDR_01: 215 case LOWPAN_IPHC_SAM_01:
216 case LOWPAN_IPHC_DAM_01:
76 /* fe:80::XXXX:XXXX:XXXX:XXXX */ 217 /* fe:80::XXXX:XXXX:XXXX:XXXX */
77 ipaddr->s6_addr[0] = 0xFE; 218 ipaddr->s6_addr[0] = 0xFE;
78 ipaddr->s6_addr[1] = 0x80; 219 ipaddr->s6_addr[1] = 0x80;
79 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8); 220 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
80 break; 221 break;
81 case LOWPAN_IPHC_ADDR_02: 222 case LOWPAN_IPHC_SAM_10:
223 case LOWPAN_IPHC_DAM_10:
82 /* fe:80::ff:fe00:XXXX */ 224 /* fe:80::ff:fe00:XXXX */
83 ipaddr->s6_addr[0] = 0xFE; 225 ipaddr->s6_addr[0] = 0xFE;
84 ipaddr->s6_addr[1] = 0x80; 226 ipaddr->s6_addr[1] = 0x80;
@@ -86,38 +228,16 @@ static int uncompress_addr(struct sk_buff *skb,
86 ipaddr->s6_addr[12] = 0xFE; 228 ipaddr->s6_addr[12] = 0xFE;
87 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2); 229 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
88 break; 230 break;
89 case LOWPAN_IPHC_ADDR_03: 231 case LOWPAN_IPHC_SAM_11:
232 case LOWPAN_IPHC_DAM_11:
90 fail = false; 233 fail = false;
91 switch (addr_type) { 234 switch (lowpan_priv(dev)->lltype) {
92 case IEEE802154_ADDR_LONG: 235 case LOWPAN_LLTYPE_IEEE802154:
93 /* fe:80::XXXX:XXXX:XXXX:XXXX 236 iphc_uncompress_802154_lladdr(ipaddr, lladdr);
94 * \_________________/
95 * hwaddr
96 */
97 ipaddr->s6_addr[0] = 0xFE;
98 ipaddr->s6_addr[1] = 0x80;
99 memcpy(&ipaddr->s6_addr[8], lladdr, addr_len);
100 /* second bit-flip (Universe/Local)
101 * is done according RFC2464
102 */
103 ipaddr->s6_addr[8] ^= 0x02;
104 break;
105 case IEEE802154_ADDR_SHORT:
106 /* fe:80::ff:fe00:XXXX
107 * \__/
108 * short_addr
109 *
110 * Universe/Local bit is zero.
111 */
112 ipaddr->s6_addr[0] = 0xFE;
113 ipaddr->s6_addr[1] = 0x80;
114 ipaddr->s6_addr[11] = 0xFF;
115 ipaddr->s6_addr[12] = 0xFE;
116 ipaddr->s6_addr16[7] = htons(*((u16 *)lladdr));
117 break; 237 break;
118 default: 238 default:
119 pr_debug("Invalid addr_type set\n"); 239 iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
120 return -EINVAL; 240 break;
121 } 241 }
122 break; 242 break;
123 default: 243 default:
@@ -141,24 +261,25 @@ static int uncompress_addr(struct sk_buff *skb,
141 */ 261 */
142static int uncompress_context_based_src_addr(struct sk_buff *skb, 262static int uncompress_context_based_src_addr(struct sk_buff *skb,
143 struct in6_addr *ipaddr, 263 struct in6_addr *ipaddr,
144 const u8 sam) 264 u8 address_mode)
145{ 265{
146 switch (sam) { 266 switch (address_mode) {
147 case LOWPAN_IPHC_ADDR_00: 267 case LOWPAN_IPHC_SAM_00:
148 /* unspec address :: 268 /* unspec address ::
149 * Do nothing, address is already :: 269 * Do nothing, address is already ::
150 */ 270 */
151 break; 271 break;
152 case LOWPAN_IPHC_ADDR_01: 272 case LOWPAN_IPHC_SAM_01:
153 /* TODO */ 273 /* TODO */
154 case LOWPAN_IPHC_ADDR_02: 274 case LOWPAN_IPHC_SAM_10:
155 /* TODO */ 275 /* TODO */
156 case LOWPAN_IPHC_ADDR_03: 276 case LOWPAN_IPHC_SAM_11:
157 /* TODO */ 277 /* TODO */
158 netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam); 278 netdev_warn(skb->dev, "SAM value 0x%x not supported\n",
279 address_mode);
159 return -EINVAL; 280 return -EINVAL;
160 default: 281 default:
161 pr_debug("Invalid sam value: 0x%x\n", sam); 282 pr_debug("Invalid sam value: 0x%x\n", address_mode);
162 return -EINVAL; 283 return -EINVAL;
163 } 284 }
164 285
@@ -174,11 +295,11 @@ static int uncompress_context_based_src_addr(struct sk_buff *skb,
174 */ 295 */
175static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb, 296static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
176 struct in6_addr *ipaddr, 297 struct in6_addr *ipaddr,
177 const u8 dam) 298 u8 address_mode)
178{ 299{
179 bool fail; 300 bool fail;
180 301
181 switch (dam) { 302 switch (address_mode) {
182 case LOWPAN_IPHC_DAM_00: 303 case LOWPAN_IPHC_DAM_00:
183 /* 00: 128 bits. The full address 304 /* 00: 128 bits. The full address
184 * is carried in-line. 305 * is carried in-line.
@@ -210,7 +331,7 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
210 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1); 331 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1);
211 break; 332 break;
212 default: 333 default:
213 pr_debug("DAM value has a wrong value: 0x%x\n", dam); 334 pr_debug("DAM value has a wrong value: 0x%x\n", address_mode);
214 return -EINVAL; 335 return -EINVAL;
215 } 336 }
216 337
@@ -225,77 +346,142 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
225 return 0; 346 return 0;
226} 347}
227 348
228/* TTL uncompression values */ 349/* get the ecn values from iphc tf format and set it to ipv6hdr */
229static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 }; 350static inline void lowpan_iphc_tf_set_ecn(struct ipv6hdr *hdr, const u8 *tf)
230
231int
232lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
233 const u8 *saddr, const u8 saddr_type,
234 const u8 saddr_len, const u8 *daddr,
235 const u8 daddr_type, const u8 daddr_len,
236 u8 iphc0, u8 iphc1)
237{ 351{
238 struct ipv6hdr hdr = {}; 352 /* get the two higher bits which is ecn */
239 u8 tmp, num_context = 0; 353 u8 ecn = tf[0] & 0xc0;
240 int err;
241 354
242 raw_dump_table(__func__, "raw skb data dump uncompressed", 355 /* ECN takes 0x30 in hdr->flow_lbl[0] */
243 skb->data, skb->len); 356 hdr->flow_lbl[0] |= (ecn >> 2);
357}
244 358
245 /* another if the CID flag is set */ 359/* get the dscp values from iphc tf format and set it to ipv6hdr */
246 if (iphc1 & LOWPAN_IPHC_CID) { 360static inline void lowpan_iphc_tf_set_dscp(struct ipv6hdr *hdr, const u8 *tf)
247 pr_debug("CID flag is set, increase header with one\n"); 361{
248 if (lowpan_fetch_skb(skb, &num_context, sizeof(num_context))) 362 /* DSCP is at place after ECN */
249 return -EINVAL; 363 u8 dscp = tf[0] & 0x3f;
250 }
251 364
252 hdr.version = 6; 365 /* The four highest bits need to be set at hdr->priority */
366 hdr->priority |= ((dscp & 0x3c) >> 2);
367 /* The two lower bits is part of hdr->flow_lbl[0] */
368 hdr->flow_lbl[0] |= ((dscp & 0x03) << 6);
369}
253 370
254 /* Traffic Class and Flow Label */ 371/* get the flow label values from iphc tf format and set it to ipv6hdr */
255 switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { 372static inline void lowpan_iphc_tf_set_lbl(struct ipv6hdr *hdr, const u8 *lbl)
256 /* Traffic Class and FLow Label carried in-line 373{
257 * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) 374 /* flow label is always some array started with lower nibble of
375 * flow_lbl[0] and followed with two bytes afterwards. Inside inline
376 * data the flow_lbl position can be different, which will be handled
377 * by lbl pointer. E.g. case "01" vs "00" the traffic class is 8 bit
378 * shifted, the different lbl pointer will handle that.
379 *
380 * The flow label will started at lower nibble of flow_lbl[0], the
381 * higher nibbles are part of DSCP + ECN.
258 */ 382 */
259 case 0: /* 00b */ 383 hdr->flow_lbl[0] |= lbl[0] & 0x0f;
260 if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp))) 384 memcpy(&hdr->flow_lbl[1], &lbl[1], 2);
385}
386
387/* lowpan_iphc_tf_decompress - decompress the traffic class.
388 * This function will return zero on success, a value lower than zero if
389 * failed.
390 */
391static int lowpan_iphc_tf_decompress(struct sk_buff *skb, struct ipv6hdr *hdr,
392 u8 val)
393{
394 u8 tf[4];
395
396 /* Traffic Class and Flow Label */
397 switch (val) {
398 case LOWPAN_IPHC_TF_00:
399 /* ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) */
400 if (lowpan_fetch_skb(skb, tf, 4))
261 return -EINVAL; 401 return -EINVAL;
262 402
263 memcpy(&hdr.flow_lbl, &skb->data[0], 3); 403 /* 1 2 3
264 skb_pull(skb, 3); 404 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
265 hdr.priority = ((tmp >> 2) & 0x0f); 405 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
266 hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | 406 * |ECN| DSCP | rsv | Flow Label |
267 (hdr.flow_lbl[0] & 0x0f); 407 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
408 */
409 lowpan_iphc_tf_set_ecn(hdr, tf);
410 lowpan_iphc_tf_set_dscp(hdr, tf);
411 lowpan_iphc_tf_set_lbl(hdr, &tf[1]);
268 break; 412 break;
269 /* Traffic class carried in-line 413 case LOWPAN_IPHC_TF_01:
270 * ECN + DSCP (1 byte), Flow Label is elided 414 /* ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided. */
271 */ 415 if (lowpan_fetch_skb(skb, tf, 3))
272 case 2: /* 10b */
273 if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
274 return -EINVAL; 416 return -EINVAL;
275 417
276 hdr.priority = ((tmp >> 2) & 0x0f); 418 /* 1 2
277 hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); 419 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
420 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
421 * |ECN|rsv| Flow Label |
422 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
423 */
424 lowpan_iphc_tf_set_ecn(hdr, tf);
425 lowpan_iphc_tf_set_lbl(hdr, &tf[0]);
278 break; 426 break;
279 /* Flow Label carried in-line 427 case LOWPAN_IPHC_TF_10:
280 * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided 428 /* ECN + DSCP (1 byte), Flow Label is elided. */
281 */ 429 if (lowpan_fetch_skb(skb, tf, 1))
282 case 1: /* 01b */
283 if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
284 return -EINVAL; 430 return -EINVAL;
285 431
286 hdr.flow_lbl[0] = (tmp & 0x0F) | ((tmp >> 2) & 0x30); 432 /* 0 1 2 3 4 5 6 7
287 memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); 433 * +-+-+-+-+-+-+-+-+
288 skb_pull(skb, 2); 434 * |ECN| DSCP |
435 * +-+-+-+-+-+-+-+-+
436 */
437 lowpan_iphc_tf_set_ecn(hdr, tf);
438 lowpan_iphc_tf_set_dscp(hdr, tf);
289 break; 439 break;
290 /* Traffic Class and Flow Label are elided */ 440 case LOWPAN_IPHC_TF_11:
291 case 3: /* 11b */ 441 /* Traffic Class and Flow Label are elided */
292 break; 442 break;
293 default: 443 default:
294 break; 444 WARN_ON_ONCE(1);
445 return -EINVAL;
295 } 446 }
296 447
448 return 0;
449}
450
451/* TTL uncompression values */
452static const u8 lowpan_ttl_values[] = {
453 [LOWPAN_IPHC_HLIM_01] = 1,
454 [LOWPAN_IPHC_HLIM_10] = 64,
455 [LOWPAN_IPHC_HLIM_11] = 255,
456};
457
458int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
459 const void *daddr, const void *saddr)
460{
461 struct ipv6hdr hdr = {};
462 u8 iphc0, iphc1;
463 int err;
464
465 raw_dump_table(__func__, "raw skb data dump uncompressed",
466 skb->data, skb->len);
467
468 if (lowpan_fetch_skb(skb, &iphc0, sizeof(iphc0)) ||
469 lowpan_fetch_skb(skb, &iphc1, sizeof(iphc1)))
470 return -EINVAL;
471
472 /* another if the CID flag is set */
473 if (iphc1 & LOWPAN_IPHC_CID)
474 return -ENOTSUPP;
475
476 hdr.version = 6;
477
478 err = lowpan_iphc_tf_decompress(skb, &hdr,
479 iphc0 & LOWPAN_IPHC_TF_MASK);
480 if (err < 0)
481 return err;
482
297 /* Next Header */ 483 /* Next Header */
298 if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { 484 if (!(iphc0 & LOWPAN_IPHC_NH)) {
299 /* Next header is carried inline */ 485 /* Next header is carried inline */
300 if (lowpan_fetch_skb(skb, &hdr.nexthdr, sizeof(hdr.nexthdr))) 486 if (lowpan_fetch_skb(skb, &hdr.nexthdr, sizeof(hdr.nexthdr)))
301 return -EINVAL; 487 return -EINVAL;
@@ -305,35 +491,30 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
305 } 491 }
306 492
307 /* Hop Limit */ 493 /* Hop Limit */
308 if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) { 494 if ((iphc0 & LOWPAN_IPHC_HLIM_MASK) != LOWPAN_IPHC_HLIM_00) {
309 hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; 495 hdr.hop_limit = lowpan_ttl_values[iphc0 & LOWPAN_IPHC_HLIM_MASK];
310 } else { 496 } else {
311 if (lowpan_fetch_skb(skb, &hdr.hop_limit, 497 if (lowpan_fetch_skb(skb, &hdr.hop_limit,
312 sizeof(hdr.hop_limit))) 498 sizeof(hdr.hop_limit)))
313 return -EINVAL; 499 return -EINVAL;
314 } 500 }
315 501
316 /* Extract SAM to the tmp variable */
317 tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
318
319 if (iphc1 & LOWPAN_IPHC_SAC) { 502 if (iphc1 & LOWPAN_IPHC_SAC) {
320 /* Source address context based uncompression */ 503 /* Source address context based uncompression */
321 pr_debug("SAC bit is set. Handle context based source address.\n"); 504 pr_debug("SAC bit is set. Handle context based source address.\n");
322 err = uncompress_context_based_src_addr(skb, &hdr.saddr, tmp); 505 err = uncompress_context_based_src_addr(skb, &hdr.saddr,
506 iphc1 & LOWPAN_IPHC_SAM_MASK);
323 } else { 507 } else {
324 /* Source address uncompression */ 508 /* Source address uncompression */
325 pr_debug("source address stateless compression\n"); 509 pr_debug("source address stateless compression\n");
326 err = uncompress_addr(skb, &hdr.saddr, tmp, saddr, 510 err = uncompress_addr(skb, dev, &hdr.saddr,
327 saddr_type, saddr_len); 511 iphc1 & LOWPAN_IPHC_SAM_MASK, saddr);
328 } 512 }
329 513
330 /* Check on error of previous branch */ 514 /* Check on error of previous branch */
331 if (err) 515 if (err)
332 return -EINVAL; 516 return -EINVAL;
333 517
334 /* Extract DAM to the tmp variable */
335 tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03;
336
337 /* check for Multicast Compression */ 518 /* check for Multicast Compression */
338 if (iphc1 & LOWPAN_IPHC_M) { 519 if (iphc1 & LOWPAN_IPHC_M) {
339 if (iphc1 & LOWPAN_IPHC_DAC) { 520 if (iphc1 & LOWPAN_IPHC_DAC) {
@@ -341,22 +522,22 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
341 /* TODO: implement this */ 522 /* TODO: implement this */
342 } else { 523 } else {
343 err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr, 524 err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr,
344 tmp); 525 iphc1 & LOWPAN_IPHC_DAM_MASK);
345 526
346 if (err) 527 if (err)
347 return -EINVAL; 528 return -EINVAL;
348 } 529 }
349 } else { 530 } else {
350 err = uncompress_addr(skb, &hdr.daddr, tmp, daddr, 531 err = uncompress_addr(skb, dev, &hdr.daddr,
351 daddr_type, daddr_len); 532 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
352 pr_debug("dest: stateless compression mode %d dest %pI6c\n", 533 pr_debug("dest: stateless compression mode %d dest %pI6c\n",
353 tmp, &hdr.daddr); 534 iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr);
354 if (err) 535 if (err)
355 return -EINVAL; 536 return -EINVAL;
356 } 537 }
357 538
358 /* Next header data uncompression */ 539 /* Next header data uncompression */
359 if (iphc0 & LOWPAN_IPHC_NH_C) { 540 if (iphc0 & LOWPAN_IPHC_NH) {
360 err = lowpan_nhc_do_uncompression(skb, dev, &hdr); 541 err = lowpan_nhc_do_uncompression(skb, dev, &hdr);
361 if (err < 0) 542 if (err < 0)
362 return err; 543 return err;
@@ -366,7 +547,18 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
366 return err; 547 return err;
367 } 548 }
368 549
369 hdr.payload_len = htons(skb->len); 550 switch (lowpan_priv(dev)->lltype) {
551 case LOWPAN_LLTYPE_IEEE802154:
552 if (lowpan_802154_cb(skb)->d_size)
553 hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size -
554 sizeof(struct ipv6hdr));
555 else
556 hdr.payload_len = htons(skb->len);
557 break;
558 default:
559 hdr.payload_len = htons(skb->len);
560 break;
561 }
370 562
371 pr_debug("skb headroom size = %d, data length = %d\n", 563 pr_debug("skb headroom size = %d, data length = %d\n",
372 skb_headroom(skb), skb->len); 564 skb_headroom(skb), skb->len);
@@ -386,42 +578,176 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
386} 578}
387EXPORT_SYMBOL_GPL(lowpan_header_decompress); 579EXPORT_SYMBOL_GPL(lowpan_header_decompress);
388 580
389static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift, 581static const u8 lowpan_iphc_dam_to_sam_value[] = {
390 const struct in6_addr *ipaddr, 582 [LOWPAN_IPHC_DAM_00] = LOWPAN_IPHC_SAM_00,
391 const unsigned char *lladdr) 583 [LOWPAN_IPHC_DAM_01] = LOWPAN_IPHC_SAM_01,
584 [LOWPAN_IPHC_DAM_10] = LOWPAN_IPHC_SAM_10,
585 [LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
586};
587
588static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
589 const unsigned char *lladdr, bool sam)
392{ 590{
393 u8 val = 0; 591 u8 dam = LOWPAN_IPHC_DAM_00;
394 592
395 if (is_addr_mac_addr_based(ipaddr, lladdr)) { 593 if (is_addr_mac_addr_based(ipaddr, lladdr)) {
396 val = 3; /* 0-bits */ 594 dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
397 pr_debug("address compression 0 bits\n"); 595 pr_debug("address compression 0 bits\n");
398 } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { 596 } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
399 /* compress IID to 16 bits xxxx::XXXX */ 597 /* compress IID to 16 bits xxxx::XXXX */
400 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2); 598 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2);
401 val = 2; /* 16-bits */ 599 dam = LOWPAN_IPHC_DAM_10; /* 16-bits */
402 raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)", 600 raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)",
403 *hc_ptr - 2, 2); 601 *hc_ptr - 2, 2);
404 } else { 602 } else {
405 /* do not compress IID => xxxx::IID */ 603 /* do not compress IID => xxxx::IID */
406 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8); 604 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
407 val = 1; /* 64-bits */ 605 dam = LOWPAN_IPHC_DAM_01; /* 64-bits */
408 raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)", 606 raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
409 *hc_ptr - 8, 8); 607 *hc_ptr - 8, 8);
410 } 608 }
411 609
412 return rol8(val, shift); 610 if (sam)
611 return lowpan_iphc_dam_to_sam_value[dam];
612 else
613 return dam;
614}
615
616/* lowpan_iphc_get_tc - get the ECN + DCSP fields in hc format */
617static inline u8 lowpan_iphc_get_tc(const struct ipv6hdr *hdr)
618{
619 u8 dscp, ecn;
620
621 /* hdr->priority contains the higher bits of dscp, lower are part of
622 * flow_lbl[0]. Note ECN, DCSP is swapped in ipv6 hdr.
623 */
624 dscp = (hdr->priority << 2) | ((hdr->flow_lbl[0] & 0xc0) >> 6);
625 /* ECN is at the two lower bits from first nibble of flow_lbl[0] */
626 ecn = (hdr->flow_lbl[0] & 0x30);
627 /* for pretty debug output, also shift ecn to get the ecn value */
628 pr_debug("ecn 0x%02x dscp 0x%02x\n", ecn >> 4, dscp);
629 /* ECN is at 0x30 now, shift it to have ECN + DCSP */
630 return (ecn << 2) | dscp;
413} 631}
414 632
415int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, 633/* lowpan_iphc_is_flow_lbl_zero - check if flow label is zero */
416 unsigned short type, const void *_daddr, 634static inline bool lowpan_iphc_is_flow_lbl_zero(const struct ipv6hdr *hdr)
417 const void *_saddr, unsigned int len)
418{ 635{
419 u8 tmp, iphc0, iphc1, *hc_ptr; 636 return ((!(hdr->flow_lbl[0] & 0x0f)) &&
637 !hdr->flow_lbl[1] && !hdr->flow_lbl[2]);
638}
639
640/* lowpan_iphc_tf_compress - compress the traffic class which is set by
641 * ipv6hdr. Return the corresponding format identifier which is used.
642 */
643static u8 lowpan_iphc_tf_compress(u8 **hc_ptr, const struct ipv6hdr *hdr)
644{
645 /* get ecn dscp data in a byteformat as: ECN(hi) + DSCP(lo) */
646 u8 tc = lowpan_iphc_get_tc(hdr), tf[4], val;
647
648 /* printout the traffic class in hc format */
649 pr_debug("tc 0x%02x\n", tc);
650
651 if (lowpan_iphc_is_flow_lbl_zero(hdr)) {
652 if (!tc) {
653 /* 11: Traffic Class and Flow Label are elided. */
654 val = LOWPAN_IPHC_TF_11;
655 } else {
656 /* 10: ECN + DSCP (1 byte), Flow Label is elided.
657 *
658 * 0 1 2 3 4 5 6 7
659 * +-+-+-+-+-+-+-+-+
660 * |ECN| DSCP |
661 * +-+-+-+-+-+-+-+-+
662 */
663 lowpan_push_hc_data(hc_ptr, &tc, sizeof(tc));
664 val = LOWPAN_IPHC_TF_10;
665 }
666 } else {
667 /* check if dscp is zero, it's after the first two bit */
668 if (!(tc & 0x3f)) {
669 /* 01: ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
670 *
671 * 1 2
672 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
673 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
674 * |ECN|rsv| Flow Label |
675 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
676 */
677 memcpy(&tf[0], &hdr->flow_lbl[0], 3);
678 /* zero the highest 4-bits, contains DCSP + ECN */
679 tf[0] &= ~0xf0;
680 /* set ECN */
681 tf[0] |= (tc & 0xc0);
682
683 lowpan_push_hc_data(hc_ptr, tf, 3);
684 val = LOWPAN_IPHC_TF_01;
685 } else {
686 /* 00: ECN + DSCP + 4-bit Pad + Flow Label (4 bytes)
687 *
688 * 1 2 3
689 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
690 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
691 * |ECN| DSCP | rsv | Flow Label |
692 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
693 */
694 memcpy(&tf[0], &tc, sizeof(tc));
695 /* highest nibble of flow_lbl[0] is part of DSCP + ECN
696 * which will be the 4-bit pad and will be filled with
697 * zeros afterwards.
698 */
699 memcpy(&tf[1], &hdr->flow_lbl[0], 3);
700 /* zero the 4-bit pad, which is reserved */
701 tf[1] &= ~0xf0;
702
703 lowpan_push_hc_data(hc_ptr, tf, 4);
704 val = LOWPAN_IPHC_TF_00;
705 }
706 }
707
708 return val;
709}
710
711static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr,
712 const struct in6_addr *ipaddr)
713{
714 u8 val;
715
716 if (lowpan_is_mcast_addr_compressable8(ipaddr)) {
717 pr_debug("compressed to 1 octet\n");
718 /* use last byte */
719 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[15], 1);
720 val = LOWPAN_IPHC_DAM_11;
721 } else if (lowpan_is_mcast_addr_compressable32(ipaddr)) {
722 pr_debug("compressed to 4 octets\n");
723 /* second byte + the last three */
724 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[1], 1);
725 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[13], 3);
726 val = LOWPAN_IPHC_DAM_10;
727 } else if (lowpan_is_mcast_addr_compressable48(ipaddr)) {
728 pr_debug("compressed to 6 octets\n");
729 /* second byte + the last five */
730 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[1], 1);
731 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[11], 5);
732 val = LOWPAN_IPHC_DAM_01;
733 } else {
734 pr_debug("using full address\n");
735 lowpan_push_hc_data(hc_ptr, ipaddr->s6_addr, 16);
736 val = LOWPAN_IPHC_DAM_00;
737 }
738
739 return val;
740}
741
742int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
743 const void *daddr, const void *saddr)
744{
745 u8 iphc0, iphc1, *hc_ptr;
420 struct ipv6hdr *hdr; 746 struct ipv6hdr *hdr;
421 u8 head[100] = {}; 747 u8 head[LOWPAN_IPHC_MAX_HC_BUF_LEN] = {};
422 int ret, addr_type; 748 int ret, addr_type;
423 749
424 if (type != ETH_P_IPV6) 750 if (skb->protocol != htons(ETH_P_IPV6))
425 return -EINVAL; 751 return -EINVAL;
426 752
427 hdr = ipv6_hdr(skb); 753 hdr = ipv6_hdr(skb);
@@ -445,63 +771,26 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
445 771
446 /* TODO: context lookup */ 772 /* TODO: context lookup */
447 773
448 raw_dump_inline(__func__, "saddr", 774 raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
449 (unsigned char *)_saddr, IEEE802154_ADDR_LEN); 775 raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
450 raw_dump_inline(__func__, "daddr",
451 (unsigned char *)_daddr, IEEE802154_ADDR_LEN);
452 776
453 raw_dump_table(__func__, "sending raw skb network uncompressed packet", 777 raw_dump_table(__func__, "sending raw skb network uncompressed packet",
454 skb->data, skb->len); 778 skb->data, skb->len);
455 779
456 /* Traffic class, flow label 780 /* Traffic Class, Flow Label compression */
457 * If flow label is 0, compress it. If traffic class is 0, compress it 781 iphc0 |= lowpan_iphc_tf_compress(&hc_ptr, hdr);
458 * We have to process both in the same time as the offset of traffic
459 * class depends on the presence of version and flow label
460 */
461
462 /* hc format of TC is ECN | DSCP , original one is DSCP | ECN */
463 tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4);
464 tmp = ((tmp & 0x03) << 6) | (tmp >> 2);
465
466 if (((hdr->flow_lbl[0] & 0x0F) == 0) &&
467 (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) {
468 /* flow label can be compressed */
469 iphc0 |= LOWPAN_IPHC_FL_C;
470 if ((hdr->priority == 0) &&
471 ((hdr->flow_lbl[0] & 0xF0) == 0)) {
472 /* compress (elide) all */
473 iphc0 |= LOWPAN_IPHC_TC_C;
474 } else {
475 /* compress only the flow label */
476 *hc_ptr = tmp;
477 hc_ptr += 1;
478 }
479 } else {
480 /* Flow label cannot be compressed */
481 if ((hdr->priority == 0) &&
482 ((hdr->flow_lbl[0] & 0xF0) == 0)) {
483 /* compress only traffic class */
484 iphc0 |= LOWPAN_IPHC_TC_C;
485 *hc_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F);
486 memcpy(hc_ptr + 1, &hdr->flow_lbl[1], 2);
487 hc_ptr += 3;
488 } else {
489 /* compress nothing */
490 memcpy(hc_ptr, hdr, 4);
491 /* replace the top byte with new ECN | DSCP format */
492 *hc_ptr = tmp;
493 hc_ptr += 4;
494 }
495 }
496 782
497 /* NOTE: payload length is always compressed */ 783 /* NOTE: payload length is always compressed */
498 784
499 /* Check if we provide the nhc format for nexthdr and compression 785 /* Check if we provide the nhc format for nexthdr and compression
500 * functionality. If not nexthdr is handled inline and not compressed. 786 * functionality. If not nexthdr is handled inline and not compressed.
501 */ 787 */
502 ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr, &iphc0); 788 ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr);
503 if (ret < 0) 789 if (ret == -ENOENT)
504 return ret; 790 lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr,
791 sizeof(hdr->nexthdr));
792 else
793 iphc0 |= LOWPAN_IPHC_NH;
505 794
506 /* Hop limit 795 /* Hop limit
507 * if 1: compress, encoding is 01 796 * if 1: compress, encoding is 01
@@ -511,13 +800,13 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
511 */ 800 */
512 switch (hdr->hop_limit) { 801 switch (hdr->hop_limit) {
513 case 1: 802 case 1:
514 iphc0 |= LOWPAN_IPHC_TTL_1; 803 iphc0 |= LOWPAN_IPHC_HLIM_01;
515 break; 804 break;
516 case 64: 805 case 64:
517 iphc0 |= LOWPAN_IPHC_TTL_64; 806 iphc0 |= LOWPAN_IPHC_HLIM_10;
518 break; 807 break;
519 case 255: 808 case 255:
520 iphc0 |= LOWPAN_IPHC_TTL_255; 809 iphc0 |= LOWPAN_IPHC_HLIM_11;
521 break; 810 break;
522 default: 811 default:
523 lowpan_push_hc_data(&hc_ptr, &hdr->hop_limit, 812 lowpan_push_hc_data(&hc_ptr, &hdr->hop_limit,
@@ -531,9 +820,8 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
531 iphc1 |= LOWPAN_IPHC_SAC; 820 iphc1 |= LOWPAN_IPHC_SAC;
532 } else { 821 } else {
533 if (addr_type & IPV6_ADDR_LINKLOCAL) { 822 if (addr_type & IPV6_ADDR_LINKLOCAL) {
534 iphc1 |= lowpan_compress_addr_64(&hc_ptr, 823 iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->saddr,
535 LOWPAN_IPHC_SAM_BIT, 824 saddr, true);
536 &hdr->saddr, _saddr);
537 pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n", 825 pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
538 &hdr->saddr, iphc1); 826 &hdr->saddr, iphc1);
539 } else { 827 } else {
@@ -547,38 +835,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
547 if (addr_type & IPV6_ADDR_MULTICAST) { 835 if (addr_type & IPV6_ADDR_MULTICAST) {
548 pr_debug("destination address is multicast: "); 836 pr_debug("destination address is multicast: ");
549 iphc1 |= LOWPAN_IPHC_M; 837 iphc1 |= LOWPAN_IPHC_M;
550 if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { 838 iphc1 |= lowpan_iphc_mcast_addr_compress(&hc_ptr, &hdr->daddr);
551 pr_debug("compressed to 1 octet\n");
552 iphc1 |= LOWPAN_IPHC_DAM_11;
553 /* use last byte */
554 lowpan_push_hc_data(&hc_ptr,
555 &hdr->daddr.s6_addr[15], 1);
556 } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) {
557 pr_debug("compressed to 4 octets\n");
558 iphc1 |= LOWPAN_IPHC_DAM_10;
559 /* second byte + the last three */
560 lowpan_push_hc_data(&hc_ptr,
561 &hdr->daddr.s6_addr[1], 1);
562 lowpan_push_hc_data(&hc_ptr,
563 &hdr->daddr.s6_addr[13], 3);
564 } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) {
565 pr_debug("compressed to 6 octets\n");
566 iphc1 |= LOWPAN_IPHC_DAM_01;
567 /* second byte + the last five */
568 lowpan_push_hc_data(&hc_ptr,
569 &hdr->daddr.s6_addr[1], 1);
570 lowpan_push_hc_data(&hc_ptr,
571 &hdr->daddr.s6_addr[11], 5);
572 } else {
573 pr_debug("using full address\n");
574 iphc1 |= LOWPAN_IPHC_DAM_00;
575 lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16);
576 }
577 } else { 839 } else {
578 if (addr_type & IPV6_ADDR_LINKLOCAL) { 840 if (addr_type & IPV6_ADDR_LINKLOCAL) {
579 /* TODO: context lookup */ 841 /* TODO: context lookup */
580 iphc1 |= lowpan_compress_addr_64(&hc_ptr, 842 iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->daddr,
581 LOWPAN_IPHC_DAM_BIT, &hdr->daddr, _daddr); 843 daddr, false);
582 pr_debug("dest address unicast link-local %pI6c " 844 pr_debug("dest address unicast link-local %pI6c "
583 "iphc1 0x%02x\n", &hdr->daddr, iphc1); 845 "iphc1 0x%02x\n", &hdr->daddr, iphc1);
584 } else { 846 } else {
@@ -588,7 +850,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
588 } 850 }
589 851
590 /* next header compression */ 852 /* next header compression */
591 if (iphc0 & LOWPAN_IPHC_NH_C) { 853 if (iphc0 & LOWPAN_IPHC_NH) {
592 ret = lowpan_nhc_do_compression(skb, hdr, &hc_ptr); 854 ret = lowpan_nhc_do_compression(skb, hdr, &hc_ptr);
593 if (ret < 0) 855 if (ret < 0)
594 return ret; 856 return ret;
diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c
index fd20fc51a7c4..7008d53e455c 100644
--- a/net/6lowpan/nhc.c
+++ b/net/6lowpan/nhc.c
@@ -95,23 +95,20 @@ static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb)
95} 95}
96 96
97int lowpan_nhc_check_compression(struct sk_buff *skb, 97int lowpan_nhc_check_compression(struct sk_buff *skb,
98 const struct ipv6hdr *hdr, u8 **hc_ptr, 98 const struct ipv6hdr *hdr, u8 **hc_ptr)
99 u8 *iphc0)
100{ 99{
101 struct lowpan_nhc *nhc; 100 struct lowpan_nhc *nhc;
101 int ret = 0;
102 102
103 spin_lock_bh(&lowpan_nhc_lock); 103 spin_lock_bh(&lowpan_nhc_lock);
104 104
105 nhc = lowpan_nexthdr_nhcs[hdr->nexthdr]; 105 nhc = lowpan_nexthdr_nhcs[hdr->nexthdr];
106 if (nhc && nhc->compress) 106 if (!(nhc && nhc->compress))
107 *iphc0 |= LOWPAN_IPHC_NH_C; 107 ret = -ENOENT;
108 else
109 lowpan_push_hc_data(hc_ptr, &hdr->nexthdr,
110 sizeof(hdr->nexthdr));
111 108
112 spin_unlock_bh(&lowpan_nhc_lock); 109 spin_unlock_bh(&lowpan_nhc_lock);
113 110
114 return 0; 111 return ret;
115} 112}
116 113
117int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, 114int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
@@ -157,7 +154,8 @@ out:
157 return ret; 154 return ret;
158} 155}
159 156
160int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev, 157int lowpan_nhc_do_uncompression(struct sk_buff *skb,
158 const struct net_device *dev,
161 struct ipv6hdr *hdr) 159 struct ipv6hdr *hdr)
162{ 160{
163 struct lowpan_nhc *nhc; 161 struct lowpan_nhc *nhc;
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index ed44938eb5de..803041400136 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -8,8 +8,6 @@
8#include <net/6lowpan.h> 8#include <net/6lowpan.h>
9#include <net/ipv6.h> 9#include <net/ipv6.h>
10 10
11#define LOWPAN_NHC_MAX_ID_LEN 1
12
13/** 11/**
14 * LOWPAN_NHC - helper macro to generate nh id fields and lowpan_nhc struct 12 * LOWPAN_NHC - helper macro to generate nh id fields and lowpan_nhc struct
15 * 13 *
@@ -88,19 +86,16 @@ struct lowpan_nhc *lowpan_nhc_by_nexthdr(u8 nexthdr);
88 86
89/** 87/**
90 * lowpan_nhc_check_compression - checks if we support compression format. If 88 * lowpan_nhc_check_compression - checks if we support compression format. If
91 * we support the nhc by nexthdr field, the 6LoWPAN iphc NHC bit will be 89 * we support the nhc by nexthdr field, the function will return 0. If we
92 * set. If we don't support nexthdr will be added as inline data to the 90 * don't support the nhc by nexthdr this function will return -ENOENT.
93 * 6LoWPAN header.
94 * 91 *
95 * @skb: skb of 6LoWPAN header to read nhc and replace header. 92 * @skb: skb of 6LoWPAN header to read nhc and replace header.
96 * @hdr: ipv6hdr to check the nexthdr value 93 * @hdr: ipv6hdr to check the nexthdr value
97 * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of 94 * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of
98 * replaced header. 95 * replaced header.
99 * @iphc0: iphc0 pointer to set the 6LoWPAN NHC bit
100 */ 96 */
101int lowpan_nhc_check_compression(struct sk_buff *skb, 97int lowpan_nhc_check_compression(struct sk_buff *skb,
102 const struct ipv6hdr *hdr, u8 **hc_ptr, 98 const struct ipv6hdr *hdr, u8 **hc_ptr);
103 u8 *iphc0);
104 99
105/** 100/**
106 * lowpan_nhc_do_compression - calling compress callback for nhc 101 * lowpan_nhc_do_compression - calling compress callback for nhc
@@ -121,7 +116,8 @@ int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
121 * @dev: netdevice for print logging information. 116 * @dev: netdevice for print logging information.
122 * @hdr: ipv6hdr for setting nexthdr value. 117 * @hdr: ipv6hdr for setting nexthdr value.
123 */ 118 */
124int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev, 119int lowpan_nhc_do_uncompression(struct sk_buff *skb,
120 const struct net_device *dev,
125 struct ipv6hdr *hdr); 121 struct ipv6hdr *hdr);
126 122
127/** 123/**
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index c6bcaeb428ae..69537a2eaab1 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -17,7 +17,27 @@
17 17
18#include "nhc.h" 18#include "nhc.h"
19 19
20#define LOWPAN_NHC_UDP_IDLEN 1 20#define LOWPAN_NHC_UDP_MASK 0xF8
21#define LOWPAN_NHC_UDP_ID 0xF0
22#define LOWPAN_NHC_UDP_IDLEN 1
23
24#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0
25#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0
26#define LOWPAN_NHC_UDP_8BIT_PORT 0xF000
27#define LOWPAN_NHC_UDP_8BIT_MASK 0xFF00
28
29/* values for port compression, _with checksum_ ie bit 5 set to 0 */
30
31/* all inline */
32#define LOWPAN_NHC_UDP_CS_P_00 0xF0
33/* source 16bit inline, dest = 0xF0 + 8 bit inline */
34#define LOWPAN_NHC_UDP_CS_P_01 0xF1
35/* source = 0xF0 + 8bit inline, dest = 16 bit inline */
36#define LOWPAN_NHC_UDP_CS_P_10 0xF2
37/* source & dest = 0xF0B + 4bit inline */
38#define LOWPAN_NHC_UDP_CS_P_11 0xF3
39/* checksum elided */
40#define LOWPAN_NHC_UDP_CS_C 0x04
21 41
22static int udp_uncompress(struct sk_buff *skb, size_t needed) 42static int udp_uncompress(struct sk_buff *skb, size_t needed)
23{ 43{
@@ -71,7 +91,18 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed)
71 * here, we obtain the hint from the remaining size of the 91 * here, we obtain the hint from the remaining size of the
72 * frame 92 * frame
73 */ 93 */
74 uh.len = htons(skb->len + sizeof(struct udphdr)); 94 switch (lowpan_priv(skb->dev)->lltype) {
95 case LOWPAN_LLTYPE_IEEE802154:
96 if (lowpan_802154_cb(skb)->d_size)
97 uh.len = htons(lowpan_802154_cb(skb)->d_size -
98 sizeof(struct ipv6hdr));
99 else
100 uh.len = htons(skb->len + sizeof(struct udphdr));
101 break;
102 default:
103 uh.len = htons(skb->len + sizeof(struct udphdr));
104 break;
105 }
75 pr_debug("uncompressed UDP length: src = %d", ntohs(uh.len)); 106 pr_debug("uncompressed UDP length: src = %d", ntohs(uh.len));
76 107
77 /* replace the compressed UDP head by the uncompressed UDP 108 /* replace the compressed UDP head by the uncompressed UDP
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 61bf2a06e85d..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
30 skb->pkt_type = PACKET_HOST; 30 skb->pkt_type = PACKET_HOST;
31 } 31 }
32 32
33 if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { 33 if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
34 !netif_is_macvlan_port(vlan_dev) &&
35 !netif_is_bridge_port(vlan_dev)) {
34 unsigned int offset = skb->data - skb_mac_header(skb); 36 unsigned int offset = skb->data - skb_mac_header(skb);
35 37
36 /* 38 /*
@@ -206,7 +208,10 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
206 return -ENOMEM; 208 return -ENOMEM;
207 209
208 if (vlan_hw_filter_capable(dev, vid_info)) { 210 if (vlan_hw_filter_capable(dev, vid_info)) {
209 err = ops->ndo_vlan_rx_add_vid(dev, proto, vid); 211 if (netif_device_present(dev))
212 err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
213 else
214 err = -ENODEV;
210 if (err) { 215 if (err) {
211 kfree(vid_info); 216 kfree(vid_info);
212 return err; 217 return err;
@@ -264,7 +269,10 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
264 int err; 269 int err;
265 270
266 if (vlan_hw_filter_capable(dev, vid_info)) { 271 if (vlan_hw_filter_capable(dev, vid_info)) {
267 err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid); 272 if (netif_device_present(dev))
273 err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
274 else
275 err = -ENODEV;
268 if (err) { 276 if (err) {
269 pr_warn("failed to kill vid %04x/%d for device %s\n", 277 pr_warn("failed to kill vid %04x/%d for device %s\n",
270 proto, vid, dev->name); 278 proto, vid, dev->name);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index ba1210253f5e..52b4a2f993f2 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
655 return -ENOMEM; 655 return -ENOMEM;
656 656
657 /* Create the RDMA CM ID */ 657 /* Create the RDMA CM ID */
658 rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, 658 rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
659 IB_QPT_RC); 659 RDMA_PS_TCP, IB_QPT_RC);
660 if (IS_ERR(rdma->cm_id)) 660 if (IS_ERR(rdma->cm_id))
661 goto error; 661 goto error;
662 662
diff --git a/net/Kconfig b/net/Kconfig
index 7021c1bf44d6..127da94ae25e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,7 @@ source "net/netlink/Kconfig"
232source "net/mpls/Kconfig" 232source "net/mpls/Kconfig"
233source "net/hsr/Kconfig" 233source "net/hsr/Kconfig"
234source "net/switchdev/Kconfig" 234source "net/switchdev/Kconfig"
235source "net/l3mdev/Kconfig"
235 236
236config RPS 237config RPS
237 bool 238 bool
diff --git a/net/Makefile b/net/Makefile
index 3995613e5510..a5d04098dfce 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -74,3 +74,6 @@ obj-$(CONFIG_HSR) += hsr/
74ifneq ($(CONFIG_NET_SWITCHDEV),) 74ifneq ($(CONFIG_NET_SWITCHDEV),)
75obj-y += switchdev/ 75obj-y += switchdev/
76endif 76endif
77ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
78obj-y += l3mdev/
79endif
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 131e79cde350..9e9cca3689a0 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -21,8 +21,6 @@
21#include <net/ip6_route.h> 21#include <net/ip6_route.h>
22#include <net/addrconf.h> 22#include <net/addrconf.h>
23 23
24#include <net/af_ieee802154.h> /* to get the address type */
25
26#include <net/bluetooth/bluetooth.h> 24#include <net/bluetooth/bluetooth.h>
27#include <net/bluetooth/hci_core.h> 25#include <net/bluetooth/hci_core.h>
28#include <net/bluetooth/l2cap.h> 26#include <net/bluetooth/l2cap.h>
@@ -35,7 +33,6 @@ static struct dentry *lowpan_enable_debugfs;
35static struct dentry *lowpan_control_debugfs; 33static struct dentry *lowpan_control_debugfs;
36 34
37#define IFACE_NAME_TEMPLATE "bt%d" 35#define IFACE_NAME_TEMPLATE "bt%d"
38#define EUI64_ADDR_LEN 8
39 36
40struct skb_cb { 37struct skb_cb {
41 struct in6_addr addr; 38 struct in6_addr addr;
@@ -266,14 +263,13 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
266 if (!skb_cp) 263 if (!skb_cp)
267 return NET_RX_DROP; 264 return NET_RX_DROP;
268 265
269 return netif_rx(skb_cp); 266 return netif_rx_ni(skb_cp);
270} 267}
271 268
272static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, 269static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
273 struct l2cap_chan *chan) 270 struct l2cap_chan *chan)
274{ 271{
275 const u8 *saddr, *daddr; 272 const u8 *saddr, *daddr;
276 u8 iphc0, iphc1;
277 struct lowpan_dev *dev; 273 struct lowpan_dev *dev;
278 struct lowpan_peer *peer; 274 struct lowpan_peer *peer;
279 275
@@ -288,22 +284,7 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
288 saddr = peer->eui64_addr; 284 saddr = peer->eui64_addr;
289 daddr = dev->netdev->dev_addr; 285 daddr = dev->netdev->dev_addr;
290 286
291 /* at least two bytes will be used for the encoding */ 287 return lowpan_header_decompress(skb, netdev, daddr, saddr);
292 if (skb->len < 2)
293 return -EINVAL;
294
295 if (lowpan_fetch_skb_u8(skb, &iphc0))
296 return -EINVAL;
297
298 if (lowpan_fetch_skb_u8(skb, &iphc1))
299 return -EINVAL;
300
301 return lowpan_header_decompress(skb, netdev,
302 saddr, IEEE802154_ADDR_LONG,
303 EUI64_ADDR_LEN, daddr,
304 IEEE802154_ADDR_LONG, EUI64_ADDR_LEN,
305 iphc0, iphc1);
306
307} 288}
308 289
309static int recv_pkt(struct sk_buff *skb, struct net_device *dev, 290static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
@@ -315,15 +296,17 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
315 if (!netif_running(dev)) 296 if (!netif_running(dev))
316 goto drop; 297 goto drop;
317 298
318 if (dev->type != ARPHRD_6LOWPAN) 299 if (dev->type != ARPHRD_6LOWPAN || !skb->len)
319 goto drop; 300 goto drop;
320 301
302 skb_reset_network_header(skb);
303
321 skb = skb_share_check(skb, GFP_ATOMIC); 304 skb = skb_share_check(skb, GFP_ATOMIC);
322 if (!skb) 305 if (!skb)
323 goto drop; 306 goto drop;
324 307
325 /* check that it's our buffer */ 308 /* check that it's our buffer */
326 if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { 309 if (lowpan_is_ipv6(*skb_network_header(skb))) {
327 /* Copy the packet so that the IPv6 header is 310 /* Copy the packet so that the IPv6 header is
328 * properly aligned. 311 * properly aligned.
329 */ 312 */
@@ -335,7 +318,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
335 local_skb->protocol = htons(ETH_P_IPV6); 318 local_skb->protocol = htons(ETH_P_IPV6);
336 local_skb->pkt_type = PACKET_HOST; 319 local_skb->pkt_type = PACKET_HOST;
337 320
338 skb_reset_network_header(local_skb);
339 skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); 321 skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
340 322
341 if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { 323 if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) {
@@ -348,38 +330,34 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
348 330
349 consume_skb(local_skb); 331 consume_skb(local_skb);
350 consume_skb(skb); 332 consume_skb(skb);
351 } else { 333 } else if (lowpan_is_iphc(*skb_network_header(skb))) {
352 switch (skb->data[0] & 0xe0) { 334 local_skb = skb_clone(skb, GFP_ATOMIC);
353 case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ 335 if (!local_skb)
354 local_skb = skb_clone(skb, GFP_ATOMIC); 336 goto drop;
355 if (!local_skb)
356 goto drop;
357 337
358 ret = iphc_decompress(local_skb, dev, chan); 338 ret = iphc_decompress(local_skb, dev, chan);
359 if (ret < 0) { 339 if (ret < 0) {
360 kfree_skb(local_skb); 340 kfree_skb(local_skb);
361 goto drop; 341 goto drop;
362 } 342 }
363 343
364 local_skb->protocol = htons(ETH_P_IPV6); 344 local_skb->protocol = htons(ETH_P_IPV6);
365 local_skb->pkt_type = PACKET_HOST; 345 local_skb->pkt_type = PACKET_HOST;
366 local_skb->dev = dev; 346 local_skb->dev = dev;
367 347
368 if (give_skb_to_upper(local_skb, dev) 348 if (give_skb_to_upper(local_skb, dev)
369 != NET_RX_SUCCESS) { 349 != NET_RX_SUCCESS) {
370 kfree_skb(local_skb); 350 kfree_skb(local_skb);
371 goto drop; 351 goto drop;
372 } 352 }
373 353
374 dev->stats.rx_bytes += skb->len; 354 dev->stats.rx_bytes += skb->len;
375 dev->stats.rx_packets++; 355 dev->stats.rx_packets++;
376 356
377 consume_skb(local_skb); 357 consume_skb(local_skb);
378 consume_skb(skb); 358 consume_skb(skb);
379 break; 359 } else {
380 default: 360 goto drop;
381 break;
382 }
383 } 361 }
384 362
385 return NET_RX_SUCCESS; 363 return NET_RX_SUCCESS;
@@ -493,8 +471,7 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
493 status = 1; 471 status = 1;
494 } 472 }
495 473
496 lowpan_header_compress(skb, netdev, ETH_P_IPV6, daddr, 474 lowpan_header_compress(skb, netdev, daddr, dev->netdev->dev_addr);
497 dev->netdev->dev_addr, skb->len);
498 475
499 err = dev_hard_header(skb, netdev, ETH_P_IPV6, NULL, NULL, 0); 476 err = dev_hard_header(skb, netdev, ETH_P_IPV6, NULL, NULL, 0);
500 if (err < 0) 477 if (err < 0)
@@ -674,13 +651,8 @@ static struct header_ops header_ops = {
674 651
675static void netdev_setup(struct net_device *dev) 652static void netdev_setup(struct net_device *dev)
676{ 653{
677 dev->addr_len = EUI64_ADDR_LEN;
678 dev->type = ARPHRD_6LOWPAN;
679
680 dev->hard_header_len = 0; 654 dev->hard_header_len = 0;
681 dev->needed_tailroom = 0; 655 dev->needed_tailroom = 0;
682 dev->mtu = IPV6_MIN_MTU;
683 dev->tx_queue_len = 0;
684 dev->flags = IFF_RUNNING | IFF_POINTOPOINT | 656 dev->flags = IFF_RUNNING | IFF_POINTOPOINT |
685 IFF_MULTICAST; 657 IFF_MULTICAST;
686 dev->watchdog_timeo = 0; 658 dev->watchdog_timeo = 0;
@@ -775,24 +747,7 @@ static struct l2cap_chan *chan_create(void)
775 747
776 chan->chan_type = L2CAP_CHAN_CONN_ORIENTED; 748 chan->chan_type = L2CAP_CHAN_CONN_ORIENTED;
777 chan->mode = L2CAP_MODE_LE_FLOWCTL; 749 chan->mode = L2CAP_MODE_LE_FLOWCTL;
778 chan->omtu = 65535; 750 chan->imtu = 1280;
779 chan->imtu = chan->omtu;
780
781 return chan;
782}
783
784static struct l2cap_chan *chan_open(struct l2cap_chan *pchan)
785{
786 struct l2cap_chan *chan;
787
788 chan = chan_create();
789 if (!chan)
790 return NULL;
791
792 chan->remote_mps = chan->omtu;
793 chan->mps = chan->omtu;
794
795 chan->state = BT_CONNECTED;
796 751
797 return chan; 752 return chan;
798} 753}
@@ -919,7 +874,10 @@ static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan)
919{ 874{
920 struct l2cap_chan *chan; 875 struct l2cap_chan *chan;
921 876
922 chan = chan_open(pchan); 877 chan = chan_create();
878 if (!chan)
879 return NULL;
880
923 chan->ops = pchan->ops; 881 chan->ops = pchan->ops;
924 882
925 BT_DBG("chan %p pchan %p", chan, pchan); 883 BT_DBG("chan %p pchan %p", chan, pchan);
@@ -1065,34 +1023,23 @@ static inline __u8 bdaddr_type(__u8 type)
1065 return BDADDR_LE_RANDOM; 1023 return BDADDR_LE_RANDOM;
1066} 1024}
1067 1025
1068static struct l2cap_chan *chan_get(void)
1069{
1070 struct l2cap_chan *pchan;
1071
1072 pchan = chan_create();
1073 if (!pchan)
1074 return NULL;
1075
1076 pchan->ops = &bt_6lowpan_chan_ops;
1077
1078 return pchan;
1079}
1080
1081static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type) 1026static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type)
1082{ 1027{
1083 struct l2cap_chan *pchan; 1028 struct l2cap_chan *chan;
1084 int err; 1029 int err;
1085 1030
1086 pchan = chan_get(); 1031 chan = chan_create();
1087 if (!pchan) 1032 if (!chan)
1088 return -EINVAL; 1033 return -EINVAL;
1089 1034
1090 err = l2cap_chan_connect(pchan, cpu_to_le16(L2CAP_PSM_IPSP), 0, 1035 chan->ops = &bt_6lowpan_chan_ops;
1036
1037 err = l2cap_chan_connect(chan, cpu_to_le16(L2CAP_PSM_IPSP), 0,
1091 addr, dst_type); 1038 addr, dst_type);
1092 1039
1093 BT_DBG("chan %p err %d", pchan, err); 1040 BT_DBG("chan %p err %d", chan, err);
1094 if (err < 0) 1041 if (err < 0)
1095 l2cap_chan_put(pchan); 1042 l2cap_chan_put(chan);
1096 1043
1097 return err; 1044 return err;
1098} 1045}
@@ -1117,31 +1064,32 @@ static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type)
1117static struct l2cap_chan *bt_6lowpan_listen(void) 1064static struct l2cap_chan *bt_6lowpan_listen(void)
1118{ 1065{
1119 bdaddr_t *addr = BDADDR_ANY; 1066 bdaddr_t *addr = BDADDR_ANY;
1120 struct l2cap_chan *pchan; 1067 struct l2cap_chan *chan;
1121 int err; 1068 int err;
1122 1069
1123 if (!enable_6lowpan) 1070 if (!enable_6lowpan)
1124 return NULL; 1071 return NULL;
1125 1072
1126 pchan = chan_get(); 1073 chan = chan_create();
1127 if (!pchan) 1074 if (!chan)
1128 return NULL; 1075 return NULL;
1129 1076
1130 pchan->state = BT_LISTEN; 1077 chan->ops = &bt_6lowpan_chan_ops;
1131 pchan->src_type = BDADDR_LE_PUBLIC; 1078 chan->state = BT_LISTEN;
1079 chan->src_type = BDADDR_LE_PUBLIC;
1132 1080
1133 atomic_set(&pchan->nesting, L2CAP_NESTING_PARENT); 1081 atomic_set(&chan->nesting, L2CAP_NESTING_PARENT);
1134 1082
1135 BT_DBG("chan %p src type %d", pchan, pchan->src_type); 1083 BT_DBG("chan %p src type %d", chan, chan->src_type);
1136 1084
1137 err = l2cap_add_psm(pchan, addr, cpu_to_le16(L2CAP_PSM_IPSP)); 1085 err = l2cap_add_psm(chan, addr, cpu_to_le16(L2CAP_PSM_IPSP));
1138 if (err) { 1086 if (err) {
1139 l2cap_chan_put(pchan); 1087 l2cap_chan_put(chan);
1140 BT_ERR("psm cannot be added err %d", err); 1088 BT_ERR("psm cannot be added err %d", err);
1141 return NULL; 1089 return NULL;
1142 } 1090 }
1143 1091
1144 return pchan; 1092 return chan;
1145} 1093}
1146 1094
1147static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, 1095static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
@@ -1165,7 +1113,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
1165 return -ENOENT; 1113 return -ENOENT;
1166 1114
1167 hci_dev_lock(hdev); 1115 hci_dev_lock(hdev);
1168 hcon = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); 1116 hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type);
1169 hci_dev_unlock(hdev); 1117 hci_dev_unlock(hdev);
1170 1118
1171 if (!hcon) 1119 if (!hcon)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 70f9d945faf7..a3bffd1ec2b4 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -33,7 +33,7 @@
33 33
34#include "selftest.h" 34#include "selftest.h"
35 35
36#define VERSION "2.20" 36#define VERSION "2.21"
37 37
38/* Bluetooth sockets */ 38/* Bluetooth sockets */
39#define BT_MAX_PROTO 8 39#define BT_MAX_PROTO 8
@@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
221 221
222 BT_DBG("sock %p sk %p len %zu", sock, sk, len); 222 BT_DBG("sock %p sk %p len %zu", sock, sk, len);
223 223
224 if (flags & (MSG_OOB)) 224 if (flags & MSG_OOB)
225 return -EOPNOTSUPP; 225 return -EOPNOTSUPP;
226 226
227 skb = skb_recv_datagram(sk, flags, noblock, &err); 227 skb = skb_recv_datagram(sk, flags, noblock, &err);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b4548c739a64..85b82f7adbd2 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -59,15 +59,11 @@ static const struct sco_param esco_param_msbc[] = {
59 { EDR_ESCO_MASK | ESCO_EV3, 0x0008, 0x02 }, /* T1 */ 59 { EDR_ESCO_MASK | ESCO_EV3, 0x0008, 0x02 }, /* T1 */
60}; 60};
61 61
62static void hci_le_create_connection_cancel(struct hci_conn *conn)
63{
64 hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
65}
66
67/* This function requires the caller holds hdev->lock */ 62/* This function requires the caller holds hdev->lock */
68static void hci_connect_le_scan_cleanup(struct hci_conn *conn) 63static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
69{ 64{
70 struct hci_conn_params *params; 65 struct hci_conn_params *params;
66 struct hci_dev *hdev = conn->hdev;
71 struct smp_irk *irk; 67 struct smp_irk *irk;
72 bdaddr_t *bdaddr; 68 bdaddr_t *bdaddr;
73 u8 bdaddr_type; 69 u8 bdaddr_type;
@@ -76,14 +72,15 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
76 bdaddr_type = conn->dst_type; 72 bdaddr_type = conn->dst_type;
77 73
78 /* Check if we need to convert to identity address */ 74 /* Check if we need to convert to identity address */
79 irk = hci_get_irk(conn->hdev, bdaddr, bdaddr_type); 75 irk = hci_get_irk(hdev, bdaddr, bdaddr_type);
80 if (irk) { 76 if (irk) {
81 bdaddr = &irk->bdaddr; 77 bdaddr = &irk->bdaddr;
82 bdaddr_type = irk->addr_type; 78 bdaddr_type = irk->addr_type;
83 } 79 }
84 80
85 params = hci_explicit_connect_lookup(conn->hdev, bdaddr, bdaddr_type); 81 params = hci_pend_le_action_lookup(&hdev->pend_le_conns, bdaddr,
86 if (!params) 82 bdaddr_type);
83 if (!params || !params->explicit_connect)
87 return; 84 return;
88 85
89 /* The connection attempt was doing scan for new RPA, and is 86 /* The connection attempt was doing scan for new RPA, and is
@@ -91,19 +88,97 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
91 * autoconnect action, remove them completely. If they are, just unmark 88 * autoconnect action, remove them completely. If they are, just unmark
92 * them as waiting for connection, by clearing explicit_connect field. 89 * them as waiting for connection, by clearing explicit_connect field.
93 */ 90 */
94 if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT) 91 params->explicit_connect = false;
95 hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type); 92
96 else 93 list_del_init(&params->action);
97 params->explicit_connect = false; 94
95 switch (params->auto_connect) {
96 case HCI_AUTO_CONN_EXPLICIT:
97 hci_conn_params_del(hdev, bdaddr, bdaddr_type);
98 /* return instead of break to avoid duplicate scan update */
99 return;
100 case HCI_AUTO_CONN_DIRECT:
101 case HCI_AUTO_CONN_ALWAYS:
102 list_add(&params->action, &hdev->pend_le_conns);
103 break;
104 case HCI_AUTO_CONN_REPORT:
105 list_add(&params->action, &hdev->pend_le_reports);
106 break;
107 default:
108 break;
109 }
110
111 hci_update_background_scan(hdev);
112}
113
114static void hci_conn_cleanup(struct hci_conn *conn)
115{
116 struct hci_dev *hdev = conn->hdev;
117
118 if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags))
119 hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type);
120
121 hci_chan_list_flush(conn);
122
123 hci_conn_hash_del(hdev, conn);
124
125 if (hdev->notify)
126 hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
127
128 hci_conn_del_sysfs(conn);
129
130 debugfs_remove_recursive(conn->debugfs);
131
132 hci_dev_put(hdev);
133
134 hci_conn_put(conn);
135}
136
137static void le_scan_cleanup(struct work_struct *work)
138{
139 struct hci_conn *conn = container_of(work, struct hci_conn,
140 le_scan_cleanup);
141 struct hci_dev *hdev = conn->hdev;
142 struct hci_conn *c = NULL;
143
144 BT_DBG("%s hcon %p", hdev->name, conn);
145
146 hci_dev_lock(hdev);
147
148 /* Check that the hci_conn is still around */
149 rcu_read_lock();
150 list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) {
151 if (c == conn)
152 break;
153 }
154 rcu_read_unlock();
155
156 if (c == conn) {
157 hci_connect_le_scan_cleanup(conn);
158 hci_conn_cleanup(conn);
159 }
160
161 hci_dev_unlock(hdev);
162 hci_dev_put(hdev);
163 hci_conn_put(conn);
98} 164}
99 165
100/* This function requires the caller holds hdev->lock */
101static void hci_connect_le_scan_remove(struct hci_conn *conn) 166static void hci_connect_le_scan_remove(struct hci_conn *conn)
102{ 167{
103 hci_connect_le_scan_cleanup(conn); 168 BT_DBG("%s hcon %p", conn->hdev->name, conn);
169
170 /* We can't call hci_conn_del/hci_conn_cleanup here since that
171 * could deadlock with another hci_conn_del() call that's holding
172 * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work).
173 * Instead, grab temporary extra references to the hci_dev and
174 * hci_conn and perform the necessary cleanup in a separate work
175 * callback.
176 */
177
178 hci_dev_hold(conn->hdev);
179 hci_conn_get(conn);
104 180
105 hci_conn_hash_del(conn->hdev, conn); 181 schedule_work(&conn->le_scan_cleanup);
106 hci_update_background_scan(conn->hdev);
107} 182}
108 183
109static void hci_acl_create_connection(struct hci_conn *conn) 184static void hci_acl_create_connection(struct hci_conn *conn)
@@ -149,33 +224,8 @@ static void hci_acl_create_connection(struct hci_conn *conn)
149 hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp); 224 hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp);
150} 225}
151 226
152static void hci_acl_create_connection_cancel(struct hci_conn *conn)
153{
154 struct hci_cp_create_conn_cancel cp;
155
156 BT_DBG("hcon %p", conn);
157
158 if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2)
159 return;
160
161 bacpy(&cp.bdaddr, &conn->dst);
162 hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);
163}
164
165static void hci_reject_sco(struct hci_conn *conn)
166{
167 struct hci_cp_reject_sync_conn_req cp;
168
169 cp.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
170 bacpy(&cp.bdaddr, &conn->dst);
171
172 hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp);
173}
174
175int hci_disconnect(struct hci_conn *conn, __u8 reason) 227int hci_disconnect(struct hci_conn *conn, __u8 reason)
176{ 228{
177 struct hci_cp_disconnect cp;
178
179 BT_DBG("hcon %p", conn); 229 BT_DBG("hcon %p", conn);
180 230
181 /* When we are master of an established connection and it enters 231 /* When we are master of an established connection and it enters
@@ -183,7 +233,8 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
183 * current clock offset. Processing of the result is done 233 * current clock offset. Processing of the result is done
184 * within the event handling and hci_clock_offset_evt function. 234 * within the event handling and hci_clock_offset_evt function.
185 */ 235 */
186 if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER) { 236 if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER &&
237 (conn->state == BT_CONNECTED || conn->state == BT_CONFIG)) {
187 struct hci_dev *hdev = conn->hdev; 238 struct hci_dev *hdev = conn->hdev;
188 struct hci_cp_read_clock_offset clkoff_cp; 239 struct hci_cp_read_clock_offset clkoff_cp;
189 240
@@ -192,25 +243,7 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
192 &clkoff_cp); 243 &clkoff_cp);
193 } 244 }
194 245
195 conn->state = BT_DISCONN; 246 return hci_abort_conn(conn, reason);
196
197 cp.handle = cpu_to_le16(conn->handle);
198 cp.reason = reason;
199 return hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp);
200}
201
202static void hci_amp_disconn(struct hci_conn *conn)
203{
204 struct hci_cp_disconn_phy_link cp;
205
206 BT_DBG("hcon %p", conn);
207
208 conn->state = BT_DISCONN;
209
210 cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
211 cp.reason = hci_proto_disconn_ind(conn);
212 hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK,
213 sizeof(cp), &cp);
214} 247}
215 248
216static void hci_add_sco(struct hci_conn *conn, __u16 handle) 249static void hci_add_sco(struct hci_conn *conn, __u16 handle)
@@ -376,35 +409,14 @@ static void hci_conn_timeout(struct work_struct *work)
376 if (refcnt > 0) 409 if (refcnt > 0)
377 return; 410 return;
378 411
379 switch (conn->state) { 412 /* LE connections in scanning state need special handling */
380 case BT_CONNECT: 413 if (conn->state == BT_CONNECT && conn->type == LE_LINK &&
381 case BT_CONNECT2: 414 test_bit(HCI_CONN_SCANNING, &conn->flags)) {
382 if (conn->out) { 415 hci_connect_le_scan_remove(conn);
383 if (conn->type == ACL_LINK) 416 return;
384 hci_acl_create_connection_cancel(conn);
385 else if (conn->type == LE_LINK) {
386 if (test_bit(HCI_CONN_SCANNING, &conn->flags))
387 hci_connect_le_scan_remove(conn);
388 else
389 hci_le_create_connection_cancel(conn);
390 }
391 } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
392 hci_reject_sco(conn);
393 }
394 break;
395 case BT_CONFIG:
396 case BT_CONNECTED:
397 if (conn->type == AMP_LINK) {
398 hci_amp_disconn(conn);
399 } else {
400 __u8 reason = hci_proto_disconn_ind(conn);
401 hci_disconnect(conn, reason);
402 }
403 break;
404 default:
405 conn->state = BT_CLOSED;
406 break;
407 } 417 }
418
419 hci_abort_conn(conn, hci_proto_disconn_ind(conn));
408} 420}
409 421
410/* Enter sniff mode */ 422/* Enter sniff mode */
@@ -472,7 +484,7 @@ static void le_conn_timeout(struct work_struct *work)
472 return; 484 return;
473 } 485 }
474 486
475 hci_le_create_connection_cancel(conn); 487 hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
476} 488}
477 489
478struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, 490struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
@@ -535,6 +547,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
535 INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); 547 INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept);
536 INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); 548 INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle);
537 INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); 549 INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout);
550 INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup);
538 551
539 atomic_set(&conn->refcnt, 0); 552 atomic_set(&conn->refcnt, 0);
540 553
@@ -581,27 +594,17 @@ int hci_conn_del(struct hci_conn *conn)
581 } 594 }
582 } 595 }
583 596
584 hci_chan_list_flush(conn);
585
586 if (conn->amp_mgr) 597 if (conn->amp_mgr)
587 amp_mgr_put(conn->amp_mgr); 598 amp_mgr_put(conn->amp_mgr);
588 599
589 hci_conn_hash_del(hdev, conn);
590 if (hdev->notify)
591 hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
592
593 skb_queue_purge(&conn->data_q); 600 skb_queue_purge(&conn->data_q);
594 601
595 hci_conn_del_sysfs(conn); 602 /* Remove the connection from the list and cleanup its remaining
596 603 * state. This is a separate function since for some cases like
597 debugfs_remove_recursive(conn->debugfs); 604 * BT_CONNECT_SCAN we *only* want the cleanup part without the
598 605 * rest of hci_conn_del.
599 if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) 606 */
600 hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); 607 hci_conn_cleanup(conn);
601
602 hci_dev_put(hdev);
603
604 hci_conn_put(conn);
605 608
606 return 0; 609 return 0;
607} 610}
@@ -800,7 +803,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
800 * attempt, we simply update pending_sec_level and auth_type fields 803 * attempt, we simply update pending_sec_level and auth_type fields
801 * and return the object found. 804 * and return the object found.
802 */ 805 */
803 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); 806 conn = hci_conn_hash_lookup_le(hdev, dst, dst_type);
804 conn_unfinished = NULL; 807 conn_unfinished = NULL;
805 if (conn) { 808 if (conn) {
806 if (conn->state == BT_CONNECT && 809 if (conn->state == BT_CONNECT &&
@@ -950,13 +953,10 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
950{ 953{
951 struct hci_conn *conn; 954 struct hci_conn *conn;
952 955
953 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); 956 conn = hci_conn_hash_lookup_le(hdev, addr, type);
954 if (!conn) 957 if (!conn)
955 return false; 958 return false;
956 959
957 if (conn->dst_type != type)
958 return false;
959
960 if (conn->state != BT_CONNECTED) 960 if (conn->state != BT_CONNECTED)
961 return false; 961 return false;
962 962
@@ -973,15 +973,23 @@ static int hci_explicit_conn_params_set(struct hci_request *req,
973 if (is_connected(hdev, addr, addr_type)) 973 if (is_connected(hdev, addr, addr_type))
974 return -EISCONN; 974 return -EISCONN;
975 975
976 params = hci_conn_params_add(hdev, addr, addr_type); 976 params = hci_conn_params_lookup(hdev, addr, addr_type);
977 if (!params) 977 if (!params) {
978 return -EIO; 978 params = hci_conn_params_add(hdev, addr, addr_type);
979 if (!params)
980 return -ENOMEM;
979 981
980 /* If we created new params, or existing params were marked as disabled, 982 /* If we created new params, mark them to be deleted in
981 * mark them to be used just once to connect. 983 * hci_connect_le_scan_cleanup. It's different case than
982 */ 984 * existing disabled params, those will stay after cleanup.
983 if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { 985 */
984 params->auto_connect = HCI_AUTO_CONN_EXPLICIT; 986 params->auto_connect = HCI_AUTO_CONN_EXPLICIT;
987 }
988
989 /* We're trying to connect, so make sure params are at pend_le_conns */
990 if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
991 params->auto_connect == HCI_AUTO_CONN_REPORT ||
992 params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
985 list_del_init(&params->action); 993 list_del_init(&params->action);
986 list_add(&params->action, &hdev->pend_le_conns); 994 list_add(&params->action, &hdev->pend_le_conns);
987 } 995 }
@@ -1021,7 +1029,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
1021 * attempt, we simply update pending_sec_level and auth_type fields 1029 * attempt, we simply update pending_sec_level and auth_type fields
1022 * and return the object found. 1030 * and return the object found.
1023 */ 1031 */
1024 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); 1032 conn = hci_conn_hash_lookup_le(hdev, dst, dst_type);
1025 if (conn) { 1033 if (conn) {
1026 if (conn->pending_sec_level < sec_level) 1034 if (conn->pending_sec_level < sec_level)
1027 conn->pending_sec_level = sec_level; 1035 conn->pending_sec_level = sec_level;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index adcbc74c2432..62edbf1b114e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -65,13 +65,6 @@ static DEFINE_IDA(hci_index_ida);
65#define hci_req_lock(d) mutex_lock(&d->req_lock) 65#define hci_req_lock(d) mutex_lock(&d->req_lock)
66#define hci_req_unlock(d) mutex_unlock(&d->req_lock) 66#define hci_req_unlock(d) mutex_unlock(&d->req_lock)
67 67
68/* ---- HCI notifications ---- */
69
70static void hci_notify(struct hci_dev *hdev, int event)
71{
72 hci_sock_dev_event(hdev, event);
73}
74
75/* ---- HCI debugfs entries ---- */ 68/* ---- HCI debugfs entries ---- */
76 69
77static ssize_t dut_mode_read(struct file *file, char __user *user_buf, 70static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
@@ -134,6 +127,77 @@ static const struct file_operations dut_mode_fops = {
134 .llseek = default_llseek, 127 .llseek = default_llseek,
135}; 128};
136 129
130static ssize_t vendor_diag_read(struct file *file, char __user *user_buf,
131 size_t count, loff_t *ppos)
132{
133 struct hci_dev *hdev = file->private_data;
134 char buf[3];
135
136 buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y': 'N';
137 buf[1] = '\n';
138 buf[2] = '\0';
139 return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
140}
141
142static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
143 size_t count, loff_t *ppos)
144{
145 struct hci_dev *hdev = file->private_data;
146 char buf[32];
147 size_t buf_size = min(count, (sizeof(buf)-1));
148 bool enable;
149 int err;
150
151 if (copy_from_user(buf, user_buf, buf_size))
152 return -EFAULT;
153
154 buf[buf_size] = '\0';
155 if (strtobool(buf, &enable))
156 return -EINVAL;
157
158 /* When the diagnostic flags are not persistent and the transport
159 * is not active, then there is no need for the vendor callback.
160 *
161 * Instead just store the desired value. If needed the setting
162 * will be programmed when the controller gets powered on.
163 */
164 if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
165 !test_bit(HCI_RUNNING, &hdev->flags))
166 goto done;
167
168 hci_req_lock(hdev);
169 err = hdev->set_diag(hdev, enable);
170 hci_req_unlock(hdev);
171
172 if (err < 0)
173 return err;
174
175done:
176 if (enable)
177 hci_dev_set_flag(hdev, HCI_VENDOR_DIAG);
178 else
179 hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG);
180
181 return count;
182}
183
184static const struct file_operations vendor_diag_fops = {
185 .open = simple_open,
186 .read = vendor_diag_read,
187 .write = vendor_diag_write,
188 .llseek = default_llseek,
189};
190
191static void hci_debugfs_create_basic(struct hci_dev *hdev)
192{
193 debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
194 &dut_mode_fops);
195
196 if (hdev->set_diag)
197 debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev,
198 &vendor_diag_fops);
199}
200
137/* ---- HCI requests ---- */ 201/* ---- HCI requests ---- */
138 202
139static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, 203static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
@@ -444,12 +508,6 @@ static void le_setup(struct hci_request *req)
444 /* Read LE Supported States */ 508 /* Read LE Supported States */
445 hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); 509 hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
446 510
447 /* Read LE White List Size */
448 hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
449
450 /* Clear LE White List */
451 hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
452
453 /* LE-only controllers have LE implicitly enabled */ 511 /* LE-only controllers have LE implicitly enabled */
454 if (!lmp_bredr_capable(hdev)) 512 if (!lmp_bredr_capable(hdev))
455 hci_dev_set_flag(hdev, HCI_LE_ENABLED); 513 hci_dev_set_flag(hdev, HCI_LE_ENABLED);
@@ -693,7 +751,8 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
693 751
694 hci_setup_event_mask(req); 752 hci_setup_event_mask(req);
695 753
696 if (hdev->commands[6] & 0x20) { 754 if (hdev->commands[6] & 0x20 &&
755 !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) {
697 struct hci_cp_read_stored_link_key cp; 756 struct hci_cp_read_stored_link_key cp;
698 757
699 bacpy(&cp.bdaddr, BDADDR_ANY); 758 bacpy(&cp.bdaddr, BDADDR_ANY);
@@ -767,6 +826,17 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
767 hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); 826 hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
768 } 827 }
769 828
829 if (hdev->commands[26] & 0x40) {
830 /* Read LE White List Size */
831 hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE,
832 0, NULL);
833 }
834
835 if (hdev->commands[26] & 0x80) {
836 /* Clear LE White List */
837 hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
838 }
839
770 if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { 840 if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
771 /* Read LE Maximum Data Length */ 841 /* Read LE Maximum Data Length */
772 hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL); 842 hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL);
@@ -849,13 +919,8 @@ static int __hci_init(struct hci_dev *hdev)
849 if (err < 0) 919 if (err < 0)
850 return err; 920 return err;
851 921
852 /* The Device Under Test (DUT) mode is special and available for 922 if (hci_dev_test_flag(hdev, HCI_SETUP))
853 * all controller types. So just create it early on. 923 hci_debugfs_create_basic(hdev);
854 */
855 if (hci_dev_test_flag(hdev, HCI_SETUP)) {
856 debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
857 &dut_mode_fops);
858 }
859 924
860 err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); 925 err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
861 if (err < 0) 926 if (err < 0)
@@ -932,6 +997,9 @@ static int __hci_unconf_init(struct hci_dev *hdev)
932 if (err < 0) 997 if (err < 0)
933 return err; 998 return err;
934 999
1000 if (hci_dev_test_flag(hdev, HCI_SETUP))
1001 hci_debugfs_create_basic(hdev);
1002
935 return 0; 1003 return 0;
936} 1004}
937 1005
@@ -1384,10 +1452,15 @@ static int hci_dev_do_open(struct hci_dev *hdev)
1384 goto done; 1452 goto done;
1385 } 1453 }
1386 1454
1455 set_bit(HCI_RUNNING, &hdev->flags);
1456 hci_sock_dev_event(hdev, HCI_DEV_OPEN);
1457
1387 atomic_set(&hdev->cmd_cnt, 1); 1458 atomic_set(&hdev->cmd_cnt, 1);
1388 set_bit(HCI_INIT, &hdev->flags); 1459 set_bit(HCI_INIT, &hdev->flags);
1389 1460
1390 if (hci_dev_test_flag(hdev, HCI_SETUP)) { 1461 if (hci_dev_test_flag(hdev, HCI_SETUP)) {
1462 hci_sock_dev_event(hdev, HCI_DEV_SETUP);
1463
1391 if (hdev->setup) 1464 if (hdev->setup)
1392 ret = hdev->setup(hdev); 1465 ret = hdev->setup(hdev);
1393 1466
@@ -1428,17 +1501,28 @@ static int hci_dev_do_open(struct hci_dev *hdev)
1428 1501
1429 if (!ret) { 1502 if (!ret) {
1430 if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && 1503 if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
1431 !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) 1504 !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
1432 ret = __hci_init(hdev); 1505 ret = __hci_init(hdev);
1506 if (!ret && hdev->post_init)
1507 ret = hdev->post_init(hdev);
1508 }
1433 } 1509 }
1434 1510
1511 /* If the HCI Reset command is clearing all diagnostic settings,
1512 * then they need to be reprogrammed after the init procedure
1513 * completed.
1514 */
1515 if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
1516 hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
1517 ret = hdev->set_diag(hdev, true);
1518
1435 clear_bit(HCI_INIT, &hdev->flags); 1519 clear_bit(HCI_INIT, &hdev->flags);
1436 1520
1437 if (!ret) { 1521 if (!ret) {
1438 hci_dev_hold(hdev); 1522 hci_dev_hold(hdev);
1439 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); 1523 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
1440 set_bit(HCI_UP, &hdev->flags); 1524 set_bit(HCI_UP, &hdev->flags);
1441 hci_notify(hdev, HCI_DEV_UP); 1525 hci_sock_dev_event(hdev, HCI_DEV_UP);
1442 if (!hci_dev_test_flag(hdev, HCI_SETUP) && 1526 if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
1443 !hci_dev_test_flag(hdev, HCI_CONFIG) && 1527 !hci_dev_test_flag(hdev, HCI_CONFIG) &&
1444 !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && 1528 !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
@@ -1465,6 +1549,9 @@ static int hci_dev_do_open(struct hci_dev *hdev)
1465 hdev->sent_cmd = NULL; 1549 hdev->sent_cmd = NULL;
1466 } 1550 }
1467 1551
1552 clear_bit(HCI_RUNNING, &hdev->flags);
1553 hci_sock_dev_event(hdev, HCI_DEV_CLOSE);
1554
1468 hdev->close(hdev); 1555 hdev->close(hdev);
1469 hdev->flags &= BIT(HCI_RAW); 1556 hdev->flags &= BIT(HCI_RAW);
1470 } 1557 }
@@ -1548,8 +1635,10 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
1548 BT_DBG("All LE pending actions cleared"); 1635 BT_DBG("All LE pending actions cleared");
1549} 1636}
1550 1637
1551static int hci_dev_do_close(struct hci_dev *hdev) 1638int hci_dev_do_close(struct hci_dev *hdev)
1552{ 1639{
1640 bool auto_off;
1641
1553 BT_DBG("%s %p", hdev->name, hdev); 1642 BT_DBG("%s %p", hdev->name, hdev);
1554 1643
1555 if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && 1644 if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
@@ -1605,10 +1694,10 @@ static int hci_dev_do_close(struct hci_dev *hdev)
1605 1694
1606 hci_discovery_set_state(hdev, DISCOVERY_STOPPED); 1695 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
1607 1696
1608 if (!hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { 1697 auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
1609 if (hdev->dev_type == HCI_BREDR) 1698
1610 mgmt_powered(hdev, 0); 1699 if (!auto_off && hdev->dev_type == HCI_BREDR)
1611 } 1700 mgmt_powered(hdev, 0);
1612 1701
1613 hci_inquiry_cache_flush(hdev); 1702 hci_inquiry_cache_flush(hdev);
1614 hci_pend_le_actions_clear(hdev); 1703 hci_pend_le_actions_clear(hdev);
@@ -1617,7 +1706,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
1617 1706
1618 smp_unregister(hdev); 1707 smp_unregister(hdev);
1619 1708
1620 hci_notify(hdev, HCI_DEV_DOWN); 1709 hci_sock_dev_event(hdev, HCI_DEV_DOWN);
1621 1710
1622 if (hdev->flush) 1711 if (hdev->flush)
1623 hdev->flush(hdev); 1712 hdev->flush(hdev);
@@ -1625,9 +1714,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
1625 /* Reset device */ 1714 /* Reset device */
1626 skb_queue_purge(&hdev->cmd_q); 1715 skb_queue_purge(&hdev->cmd_q);
1627 atomic_set(&hdev->cmd_cnt, 1); 1716 atomic_set(&hdev->cmd_cnt, 1);
1628 if (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) && 1717 if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) &&
1629 !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && 1718 !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
1630 test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
1631 set_bit(HCI_INIT, &hdev->flags); 1719 set_bit(HCI_INIT, &hdev->flags);
1632 __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); 1720 __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
1633 clear_bit(HCI_INIT, &hdev->flags); 1721 clear_bit(HCI_INIT, &hdev->flags);
@@ -1648,6 +1736,9 @@ static int hci_dev_do_close(struct hci_dev *hdev)
1648 hdev->sent_cmd = NULL; 1736 hdev->sent_cmd = NULL;
1649 } 1737 }
1650 1738
1739 clear_bit(HCI_RUNNING, &hdev->flags);
1740 hci_sock_dev_event(hdev, HCI_DEV_CLOSE);
1741
1651 /* After this point our queues are empty 1742 /* After this point our queues are empty
1652 * and no tasks are scheduled. */ 1743 * and no tasks are scheduled. */
1653 hdev->close(hdev); 1744 hdev->close(hdev);
@@ -2848,30 +2939,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
2848} 2939}
2849 2940
2850/* This function requires the caller holds hdev->lock */ 2941/* This function requires the caller holds hdev->lock */
2851struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev,
2852 bdaddr_t *addr,
2853 u8 addr_type)
2854{
2855 struct hci_conn_params *param;
2856
2857 list_for_each_entry(param, &hdev->pend_le_conns, action) {
2858 if (bacmp(&param->addr, addr) == 0 &&
2859 param->addr_type == addr_type &&
2860 param->explicit_connect)
2861 return param;
2862 }
2863
2864 list_for_each_entry(param, &hdev->pend_le_reports, action) {
2865 if (bacmp(&param->addr, addr) == 0 &&
2866 param->addr_type == addr_type &&
2867 param->explicit_connect)
2868 return param;
2869 }
2870
2871 return NULL;
2872}
2873
2874/* This function requires the caller holds hdev->lock */
2875struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, 2942struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
2876 bdaddr_t *addr, u8 addr_type) 2943 bdaddr_t *addr, u8 addr_type)
2877{ 2944{
@@ -3345,7 +3412,7 @@ int hci_register_dev(struct hci_dev *hdev)
3345 if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) 3412 if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
3346 hci_dev_set_flag(hdev, HCI_UNCONFIGURED); 3413 hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
3347 3414
3348 hci_notify(hdev, HCI_DEV_REG); 3415 hci_sock_dev_event(hdev, HCI_DEV_REG);
3349 hci_dev_hold(hdev); 3416 hci_dev_hold(hdev);
3350 3417
3351 queue_work(hdev->req_workqueue, &hdev->power_on); 3418 queue_work(hdev->req_workqueue, &hdev->power_on);
@@ -3393,7 +3460,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
3393 * pending list */ 3460 * pending list */
3394 BUG_ON(!list_empty(&hdev->mgmt_pending)); 3461 BUG_ON(!list_empty(&hdev->mgmt_pending));
3395 3462
3396 hci_notify(hdev, HCI_DEV_UNREG); 3463 hci_sock_dev_event(hdev, HCI_DEV_UNREG);
3397 3464
3398 if (hdev->rfkill) { 3465 if (hdev->rfkill) {
3399 rfkill_unregister(hdev->rfkill); 3466 rfkill_unregister(hdev->rfkill);
@@ -3430,7 +3497,7 @@ EXPORT_SYMBOL(hci_unregister_dev);
3430/* Suspend HCI device */ 3497/* Suspend HCI device */
3431int hci_suspend_dev(struct hci_dev *hdev) 3498int hci_suspend_dev(struct hci_dev *hdev)
3432{ 3499{
3433 hci_notify(hdev, HCI_DEV_SUSPEND); 3500 hci_sock_dev_event(hdev, HCI_DEV_SUSPEND);
3434 return 0; 3501 return 0;
3435} 3502}
3436EXPORT_SYMBOL(hci_suspend_dev); 3503EXPORT_SYMBOL(hci_suspend_dev);
@@ -3438,7 +3505,7 @@ EXPORT_SYMBOL(hci_suspend_dev);
3438/* Resume HCI device */ 3505/* Resume HCI device */
3439int hci_resume_dev(struct hci_dev *hdev) 3506int hci_resume_dev(struct hci_dev *hdev)
3440{ 3507{
3441 hci_notify(hdev, HCI_DEV_RESUME); 3508 hci_sock_dev_event(hdev, HCI_DEV_RESUME);
3442 return 0; 3509 return 0;
3443} 3510}
3444EXPORT_SYMBOL(hci_resume_dev); 3511EXPORT_SYMBOL(hci_resume_dev);
@@ -3470,6 +3537,13 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
3470 return -ENXIO; 3537 return -ENXIO;
3471 } 3538 }
3472 3539
3540 if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
3541 bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
3542 bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
3543 kfree_skb(skb);
3544 return -EINVAL;
3545 }
3546
3473 /* Incoming skb */ 3547 /* Incoming skb */
3474 bt_cb(skb)->incoming = 1; 3548 bt_cb(skb)->incoming = 1;
3475 3549
@@ -3483,6 +3557,22 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
3483} 3557}
3484EXPORT_SYMBOL(hci_recv_frame); 3558EXPORT_SYMBOL(hci_recv_frame);
3485 3559
3560/* Receive diagnostic message from HCI drivers */
3561int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
3562{
3563 /* Mark as diagnostic packet */
3564 bt_cb(skb)->pkt_type = HCI_DIAG_PKT;
3565
3566 /* Time stamp */
3567 __net_timestamp(skb);
3568
3569 skb_queue_tail(&hdev->rx_q, skb);
3570 queue_work(hdev->workqueue, &hdev->rx_work);
3571
3572 return 0;
3573}
3574EXPORT_SYMBOL(hci_recv_diag);
3575
3486/* ---- Interface to upper protocols ---- */ 3576/* ---- Interface to upper protocols ---- */
3487 3577
3488int hci_register_cb(struct hci_cb *cb) 3578int hci_register_cb(struct hci_cb *cb)
@@ -3529,6 +3619,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
3529 /* Get rid of skb owner, prior to sending to the driver. */ 3619 /* Get rid of skb owner, prior to sending to the driver. */
3530 skb_orphan(skb); 3620 skb_orphan(skb);
3531 3621
3622 if (!test_bit(HCI_RUNNING, &hdev->flags)) {
3623 kfree_skb(skb);
3624 return;
3625 }
3626
3532 err = hdev->send(hdev, skb); 3627 err = hdev->send(hdev, skb);
3533 if (err < 0) { 3628 if (err < 0) {
3534 BT_ERR("%s sending frame failed (%d)", hdev->name, err); 3629 BT_ERR("%s sending frame failed (%d)", hdev->name, err);
@@ -3553,7 +3648,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
3553 /* Stand-alone HCI commands must be flagged as 3648 /* Stand-alone HCI commands must be flagged as
3554 * single-command requests. 3649 * single-command requests.
3555 */ 3650 */
3556 bt_cb(skb)->req.start = true; 3651 bt_cb(skb)->hci.req_start = true;
3557 3652
3558 skb_queue_tail(&hdev->cmd_q, skb); 3653 skb_queue_tail(&hdev->cmd_q, skb);
3559 queue_work(hdev->workqueue, &hdev->cmd_work); 3654 queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -3579,6 +3674,25 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
3579 return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; 3674 return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE;
3580} 3675}
3581 3676
3677/* Send HCI command and wait for command commplete event */
3678struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
3679 const void *param, u32 timeout)
3680{
3681 struct sk_buff *skb;
3682
3683 if (!test_bit(HCI_UP, &hdev->flags))
3684 return ERR_PTR(-ENETDOWN);
3685
3686 bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen);
3687
3688 hci_req_lock(hdev);
3689 skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout);
3690 hci_req_unlock(hdev);
3691
3692 return skb;
3693}
3694EXPORT_SYMBOL(hci_cmd_sync);
3695
3582/* Send ACL data */ 3696/* Send ACL data */
3583static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags) 3697static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
3584{ 3698{
@@ -4231,7 +4345,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
4231 if (!skb) 4345 if (!skb)
4232 return true; 4346 return true;
4233 4347
4234 return bt_cb(skb)->req.start; 4348 return bt_cb(skb)->hci.req_start;
4235} 4349}
4236 4350
4237static void hci_resend_last(struct hci_dev *hdev) 4351static void hci_resend_last(struct hci_dev *hdev)
@@ -4291,26 +4405,26 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
4291 * callback would be found in hdev->sent_cmd instead of the 4405 * callback would be found in hdev->sent_cmd instead of the
4292 * command queue (hdev->cmd_q). 4406 * command queue (hdev->cmd_q).
4293 */ 4407 */
4294 if (bt_cb(hdev->sent_cmd)->req.complete) { 4408 if (bt_cb(hdev->sent_cmd)->hci.req_complete) {
4295 *req_complete = bt_cb(hdev->sent_cmd)->req.complete; 4409 *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete;
4296 return; 4410 return;
4297 } 4411 }
4298 4412
4299 if (bt_cb(hdev->sent_cmd)->req.complete_skb) { 4413 if (bt_cb(hdev->sent_cmd)->hci.req_complete_skb) {
4300 *req_complete_skb = bt_cb(hdev->sent_cmd)->req.complete_skb; 4414 *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb;
4301 return; 4415 return;
4302 } 4416 }
4303 4417
4304 /* Remove all pending commands belonging to this request */ 4418 /* Remove all pending commands belonging to this request */
4305 spin_lock_irqsave(&hdev->cmd_q.lock, flags); 4419 spin_lock_irqsave(&hdev->cmd_q.lock, flags);
4306 while ((skb = __skb_dequeue(&hdev->cmd_q))) { 4420 while ((skb = __skb_dequeue(&hdev->cmd_q))) {
4307 if (bt_cb(skb)->req.start) { 4421 if (bt_cb(skb)->hci.req_start) {
4308 __skb_queue_head(&hdev->cmd_q, skb); 4422 __skb_queue_head(&hdev->cmd_q, skb);
4309 break; 4423 break;
4310 } 4424 }
4311 4425
4312 *req_complete = bt_cb(skb)->req.complete; 4426 *req_complete = bt_cb(skb)->hci.req_complete;
4313 *req_complete_skb = bt_cb(skb)->req.complete_skb; 4427 *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
4314 kfree_skb(skb); 4428 kfree_skb(skb);
4315 } 4429 }
4316 spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); 4430 spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 186041866315..d57c11c1c6b5 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -55,7 +55,12 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
55 wake_up_bit(&hdev->flags, HCI_INQUIRY); 55 wake_up_bit(&hdev->flags, HCI_INQUIRY);
56 56
57 hci_dev_lock(hdev); 57 hci_dev_lock(hdev);
58 hci_discovery_set_state(hdev, DISCOVERY_STOPPED); 58 /* Set discovery state to stopped if we're not doing LE active
59 * scanning.
60 */
61 if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) ||
62 hdev->le_scan_type != LE_SCAN_ACTIVE)
63 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
59 hci_dev_unlock(hdev); 64 hci_dev_unlock(hdev);
60 65
61 hci_conn_check_pending(hdev); 66 hci_conn_check_pending(hdev);
@@ -1910,7 +1915,8 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
1910 1915
1911 hci_dev_lock(hdev); 1916 hci_dev_lock(hdev);
1912 1917
1913 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); 1918 conn = hci_conn_hash_lookup_le(hdev, &cp->peer_addr,
1919 cp->peer_addr_type);
1914 if (!conn) 1920 if (!conn)
1915 goto unlock; 1921 goto unlock;
1916 1922
@@ -3132,7 +3138,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
3132 * complete event). 3138 * complete event).
3133 */ 3139 */
3134 if (ev->status || 3140 if (ev->status ||
3135 (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event)) 3141 (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->hci.req_event))
3136 hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, 3142 hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete,
3137 req_complete_skb); 3143 req_complete_skb);
3138 3144
@@ -4648,8 +4654,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
4648 /* If we're not connectable only connect devices that we have in 4654 /* If we're not connectable only connect devices that we have in
4649 * our pend_le_conns list. 4655 * our pend_le_conns list.
4650 */ 4656 */
4651 params = hci_explicit_connect_lookup(hdev, addr, addr_type); 4657 params = hci_pend_le_action_lookup(&hdev->pend_le_conns, addr,
4652 4658 addr_type);
4653 if (!params) 4659 if (!params)
4654 return NULL; 4660 return NULL;
4655 4661
@@ -4719,6 +4725,27 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
4719 struct hci_conn *conn; 4725 struct hci_conn *conn;
4720 bool match; 4726 bool match;
4721 u32 flags; 4727 u32 flags;
4728 u8 *ptr, real_len;
4729
4730 /* Find the end of the data in case the report contains padded zero
4731 * bytes at the end causing an invalid length value.
4732 *
4733 * When data is NULL, len is 0 so there is no need for extra ptr
4734 * check as 'ptr < data + 0' is already false in such case.
4735 */
4736 for (ptr = data; ptr < data + len && *ptr; ptr += *ptr + 1) {
4737 if (ptr + 1 + *ptr > data + len)
4738 break;
4739 }
4740
4741 real_len = ptr - data;
4742
4743 /* Adjust for actual length */
4744 if (len != real_len) {
4745 BT_ERR_RATELIMITED("%s advertising data length corrected",
4746 hdev->name);
4747 len = real_len;
4748 }
4722 4749
4723 /* If the direct address is present, then this report is from 4750 /* If the direct address is present, then this report is from
4724 * a LE Direct Advertising Report event. In that case it is 4751 * a LE Direct Advertising Report event. In that case it is
@@ -5182,7 +5209,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
5182 u8 status = 0, event = hdr->evt, req_evt = 0; 5209 u8 status = 0, event = hdr->evt, req_evt = 0;
5183 u16 opcode = HCI_OP_NOP; 5210 u16 opcode = HCI_OP_NOP;
5184 5211
5185 if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) { 5212 if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) {
5186 struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data; 5213 struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
5187 opcode = __le16_to_cpu(cmd_hdr->opcode); 5214 opcode = __le16_to_cpu(cmd_hdr->opcode);
5188 hci_req_cmd_complete(hdev, opcode, status, &req_complete, 5215 hci_req_cmd_complete(hdev, opcode, status, &req_complete,
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b7369220c9ef..981f8a202c27 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -56,8 +56,8 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete,
56 return -ENODATA; 56 return -ENODATA;
57 57
58 skb = skb_peek_tail(&req->cmd_q); 58 skb = skb_peek_tail(&req->cmd_q);
59 bt_cb(skb)->req.complete = complete; 59 bt_cb(skb)->hci.req_complete = complete;
60 bt_cb(skb)->req.complete_skb = complete_skb; 60 bt_cb(skb)->hci.req_complete_skb = complete_skb;
61 61
62 spin_lock_irqsave(&hdev->cmd_q.lock, flags); 62 spin_lock_irqsave(&hdev->cmd_q.lock, flags);
63 skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); 63 skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -99,7 +99,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
99 BT_DBG("skb len %d", skb->len); 99 BT_DBG("skb len %d", skb->len);
100 100
101 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; 101 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
102 bt_cb(skb)->opcode = opcode; 102 bt_cb(skb)->hci.opcode = opcode;
103 103
104 return skb; 104 return skb;
105} 105}
@@ -128,9 +128,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
128 } 128 }
129 129
130 if (skb_queue_empty(&req->cmd_q)) 130 if (skb_queue_empty(&req->cmd_q))
131 bt_cb(skb)->req.start = true; 131 bt_cb(skb)->hci.req_start = true;
132 132
133 bt_cb(skb)->req.event = event; 133 bt_cb(skb)->hci.req_event = event;
134 134
135 skb_queue_tail(&req->cmd_q, skb); 135 skb_queue_tail(&req->cmd_q, skb);
136} 136}
@@ -564,3 +564,96 @@ void hci_update_background_scan(struct hci_dev *hdev)
564 if (err && err != -ENODATA) 564 if (err && err != -ENODATA)
565 BT_ERR("Failed to run HCI request: err %d", err); 565 BT_ERR("Failed to run HCI request: err %d", err);
566} 566}
567
568void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
569 u8 reason)
570{
571 switch (conn->state) {
572 case BT_CONNECTED:
573 case BT_CONFIG:
574 if (conn->type == AMP_LINK) {
575 struct hci_cp_disconn_phy_link cp;
576
577 cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
578 cp.reason = reason;
579 hci_req_add(req, HCI_OP_DISCONN_PHY_LINK, sizeof(cp),
580 &cp);
581 } else {
582 struct hci_cp_disconnect dc;
583
584 dc.handle = cpu_to_le16(conn->handle);
585 dc.reason = reason;
586 hci_req_add(req, HCI_OP_DISCONNECT, sizeof(dc), &dc);
587 }
588
589 conn->state = BT_DISCONN;
590
591 break;
592 case BT_CONNECT:
593 if (conn->type == LE_LINK) {
594 if (test_bit(HCI_CONN_SCANNING, &conn->flags))
595 break;
596 hci_req_add(req, HCI_OP_LE_CREATE_CONN_CANCEL,
597 0, NULL);
598 } else if (conn->type == ACL_LINK) {
599 if (req->hdev->hci_ver < BLUETOOTH_VER_1_2)
600 break;
601 hci_req_add(req, HCI_OP_CREATE_CONN_CANCEL,
602 6, &conn->dst);
603 }
604 break;
605 case BT_CONNECT2:
606 if (conn->type == ACL_LINK) {
607 struct hci_cp_reject_conn_req rej;
608
609 bacpy(&rej.bdaddr, &conn->dst);
610 rej.reason = reason;
611
612 hci_req_add(req, HCI_OP_REJECT_CONN_REQ,
613 sizeof(rej), &rej);
614 } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
615 struct hci_cp_reject_sync_conn_req rej;
616
617 bacpy(&rej.bdaddr, &conn->dst);
618
619 /* SCO rejection has its own limited set of
620 * allowed error values (0x0D-0x0F) which isn't
621 * compatible with most values passed to this
622 * function. To be safe hard-code one of the
623 * values that's suitable for SCO.
624 */
625 rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES;
626
627 hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ,
628 sizeof(rej), &rej);
629 }
630 break;
631 default:
632 conn->state = BT_CLOSED;
633 break;
634 }
635}
636
637static void abort_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
638{
639 if (status)
640 BT_DBG("Failed to abort connection: status 0x%2.2x", status);
641}
642
643int hci_abort_conn(struct hci_conn *conn, u8 reason)
644{
645 struct hci_request req;
646 int err;
647
648 hci_req_init(&req, conn->hdev);
649
650 __hci_abort_conn(&req, conn, reason);
651
652 err = hci_req_run(&req, abort_conn_complete);
653 if (err && err != -ENODATA) {
654 BT_ERR("Failed to run HCI request: err %d", err);
655 return err;
656 }
657
658 return 0;
659}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index bf6df92f42db..25c7f1305dcb 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -55,3 +55,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
55 55
56void hci_update_background_scan(struct hci_dev *hdev); 56void hci_update_background_scan(struct hci_dev *hdev);
57void __hci_update_background_scan(struct hci_request *req); 57void __hci_update_background_scan(struct hci_request *req);
58
59int hci_abort_conn(struct hci_conn *conn, u8 reason);
60void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
61 u8 reason);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f2d30d1156c9..b1eb8c09a660 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -120,10 +120,7 @@ static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb)
120 /* Apply filter */ 120 /* Apply filter */
121 flt = &hci_pi(sk)->filter; 121 flt = &hci_pi(sk)->filter;
122 122
123 if (bt_cb(skb)->pkt_type == HCI_VENDOR_PKT) 123 flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS;
124 flt_type = 0;
125 else
126 flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS;
127 124
128 if (!test_bit(flt_type, &flt->type_mask)) 125 if (!test_bit(flt_type, &flt->type_mask))
129 return true; 126 return true;
@@ -173,6 +170,11 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
173 continue; 170 continue;
174 171
175 if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) { 172 if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) {
173 if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT &&
174 bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
175 bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
176 bt_cb(skb)->pkt_type != HCI_SCODATA_PKT)
177 continue;
176 if (is_filtered_packet(sk, skb)) 178 if (is_filtered_packet(sk, skb))
177 continue; 179 continue;
178 } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { 180 } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
@@ -279,6 +281,9 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
279 else 281 else
280 opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT); 282 opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT);
281 break; 283 break;
284 case HCI_DIAG_PKT:
285 opcode = cpu_to_le16(HCI_MON_VENDOR_DIAG);
286 break;
282 default: 287 default:
283 return; 288 return;
284 } 289 }
@@ -303,6 +308,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
303{ 308{
304 struct hci_mon_hdr *hdr; 309 struct hci_mon_hdr *hdr;
305 struct hci_mon_new_index *ni; 310 struct hci_mon_new_index *ni;
311 struct hci_mon_index_info *ii;
306 struct sk_buff *skb; 312 struct sk_buff *skb;
307 __le16 opcode; 313 __le16 opcode;
308 314
@@ -312,7 +318,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
312 if (!skb) 318 if (!skb)
313 return NULL; 319 return NULL;
314 320
315 ni = (void *) skb_put(skb, HCI_MON_NEW_INDEX_SIZE); 321 ni = (void *)skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
316 ni->type = hdev->dev_type; 322 ni->type = hdev->dev_type;
317 ni->bus = hdev->bus; 323 ni->bus = hdev->bus;
318 bacpy(&ni->bdaddr, &hdev->bdaddr); 324 bacpy(&ni->bdaddr, &hdev->bdaddr);
@@ -329,6 +335,40 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
329 opcode = cpu_to_le16(HCI_MON_DEL_INDEX); 335 opcode = cpu_to_le16(HCI_MON_DEL_INDEX);
330 break; 336 break;
331 337
338 case HCI_DEV_SETUP:
339 if (hdev->manufacturer == 0xffff)
340 return NULL;
341
342 /* fall through */
343
344 case HCI_DEV_UP:
345 skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC);
346 if (!skb)
347 return NULL;
348
349 ii = (void *)skb_put(skb, HCI_MON_INDEX_INFO_SIZE);
350 bacpy(&ii->bdaddr, &hdev->bdaddr);
351 ii->manufacturer = cpu_to_le16(hdev->manufacturer);
352
353 opcode = cpu_to_le16(HCI_MON_INDEX_INFO);
354 break;
355
356 case HCI_DEV_OPEN:
357 skb = bt_skb_alloc(0, GFP_ATOMIC);
358 if (!skb)
359 return NULL;
360
361 opcode = cpu_to_le16(HCI_MON_OPEN_INDEX);
362 break;
363
364 case HCI_DEV_CLOSE:
365 skb = bt_skb_alloc(0, GFP_ATOMIC);
366 if (!skb)
367 return NULL;
368
369 opcode = cpu_to_le16(HCI_MON_CLOSE_INDEX);
370 break;
371
332 default: 372 default:
333 return NULL; 373 return NULL;
334 } 374 }
@@ -358,6 +398,28 @@ static void send_monitor_replay(struct sock *sk)
358 398
359 if (sock_queue_rcv_skb(sk, skb)) 399 if (sock_queue_rcv_skb(sk, skb))
360 kfree_skb(skb); 400 kfree_skb(skb);
401
402 if (!test_bit(HCI_RUNNING, &hdev->flags))
403 continue;
404
405 skb = create_monitor_event(hdev, HCI_DEV_OPEN);
406 if (!skb)
407 continue;
408
409 if (sock_queue_rcv_skb(sk, skb))
410 kfree_skb(skb);
411
412 if (test_bit(HCI_UP, &hdev->flags))
413 skb = create_monitor_event(hdev, HCI_DEV_UP);
414 else if (hci_dev_test_flag(hdev, HCI_SETUP))
415 skb = create_monitor_event(hdev, HCI_DEV_SETUP);
416 else
417 skb = NULL;
418
419 if (skb) {
420 if (sock_queue_rcv_skb(sk, skb))
421 kfree_skb(skb);
422 }
361 } 423 }
362 424
363 read_unlock(&hci_dev_list_lock); 425 read_unlock(&hci_dev_list_lock);
@@ -392,14 +454,12 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
392 454
393void hci_sock_dev_event(struct hci_dev *hdev, int event) 455void hci_sock_dev_event(struct hci_dev *hdev, int event)
394{ 456{
395 struct hci_ev_si_device ev;
396
397 BT_DBG("hdev %s event %d", hdev->name, event); 457 BT_DBG("hdev %s event %d", hdev->name, event);
398 458
399 /* Send event to monitor */
400 if (atomic_read(&monitor_promisc)) { 459 if (atomic_read(&monitor_promisc)) {
401 struct sk_buff *skb; 460 struct sk_buff *skb;
402 461
462 /* Send event to monitor */
403 skb = create_monitor_event(hdev, event); 463 skb = create_monitor_event(hdev, event);
404 if (skb) { 464 if (skb) {
405 hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, 465 hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
@@ -408,10 +468,14 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
408 } 468 }
409 } 469 }
410 470
411 /* Send event to sockets */ 471 if (event <= HCI_DEV_DOWN) {
412 ev.event = event; 472 struct hci_ev_si_device ev;
413 ev.dev_id = hdev->id; 473
414 hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev); 474 /* Send event to sockets */
475 ev.event = event;
476 ev.dev_id = hdev->id;
477 hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev);
478 }
415 479
416 if (event == HCI_DEV_UNREG) { 480 if (event == HCI_DEV_UNREG) {
417 struct sock *sk; 481 struct sock *sk;
@@ -503,7 +567,16 @@ static int hci_sock_release(struct socket *sock)
503 567
504 if (hdev) { 568 if (hdev) {
505 if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { 569 if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
506 hci_dev_close(hdev->id); 570 /* When releasing an user channel exclusive access,
571 * call hci_dev_do_close directly instead of calling
572 * hci_dev_close to ensure the exclusive access will
573 * be released and the controller brought back down.
574 *
575 * The checking of HCI_AUTO_OFF is not needed in this
576 * case since it will have been cleared already when
577 * opening the user channel.
578 */
579 hci_dev_do_close(hdev);
507 hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); 580 hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
508 mgmt_index_added(hdev); 581 mgmt_index_added(hdev);
509 } 582 }
@@ -928,7 +1001,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
928 1001
929 BT_DBG("sock %p, sk %p", sock, sk); 1002 BT_DBG("sock %p, sk %p", sock, sk);
930 1003
931 if (flags & (MSG_OOB)) 1004 if (flags & MSG_OOB)
932 return -EOPNOTSUPP; 1005 return -EOPNOTSUPP;
933 1006
934 if (sk->sk_state == BT_CLOSED) 1007 if (sk->sk_state == BT_CLOSED)
@@ -1176,7 +1249,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
1176 /* Stand-alone HCI commands must be flagged as 1249 /* Stand-alone HCI commands must be flagged as
1177 * single-command requests. 1250 * single-command requests.
1178 */ 1251 */
1179 bt_cb(skb)->req.start = true; 1252 bt_cb(skb)->hci.req_start = true;
1180 1253
1181 skb_queue_tail(&hdev->cmd_q, skb); 1254 skb_queue_tail(&hdev->cmd_q, skb);
1182 queue_work(hdev->workqueue, &hdev->cmd_work); 1255 queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -1187,6 +1260,12 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
1187 goto drop; 1260 goto drop;
1188 } 1261 }
1189 1262
1263 if (bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
1264 bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
1265 err = -EINVAL;
1266 goto drop;
1267 }
1268
1190 skb_queue_tail(&hdev->raw_q, skb); 1269 skb_queue_tail(&hdev->raw_q, skb);
1191 queue_work(hdev->workqueue, &hdev->tx_work); 1270 queue_work(hdev->workqueue, &hdev->tx_work);
1192 } 1271 }
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index f1a117f8cad2..0bec4588c3c8 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -401,6 +401,20 @@ static void hidp_idle_timeout(unsigned long arg)
401{ 401{
402 struct hidp_session *session = (struct hidp_session *) arg; 402 struct hidp_session *session = (struct hidp_session *) arg;
403 403
404 /* The HIDP user-space API only contains calls to add and remove
405 * devices. There is no way to forward events of any kind. Therefore,
406 * we have to forcefully disconnect a device on idle-timeouts. This is
407 * unfortunate and weird API design, but it is spec-compliant and
408 * required for backwards-compatibility. Hence, on idle-timeout, we
409 * signal driver-detach events, so poll() will be woken up with an
410 * error-condition on both sockets.
411 */
412
413 session->intr_sock->sk->sk_err = EUNATCH;
414 session->ctrl_sock->sk->sk_err = EUNATCH;
415 wake_up_interruptible(sk_sleep(session->intr_sock->sk));
416 wake_up_interruptible(sk_sleep(session->ctrl_sock->sk));
417
404 hidp_session_terminate(session); 418 hidp_session_terminate(session);
405} 419}
406 420
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7c65ee200c29..66e8b6ee19a5 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -239,7 +239,7 @@ static u16 l2cap_alloc_cid(struct l2cap_conn *conn)
239 else 239 else
240 dyn_end = L2CAP_CID_DYN_END; 240 dyn_end = L2CAP_CID_DYN_END;
241 241
242 for (cid = L2CAP_CID_DYN_START; cid < dyn_end; cid++) { 242 for (cid = L2CAP_CID_DYN_START; cid <= dyn_end; cid++) {
243 if (!__l2cap_get_chan_by_scid(conn, cid)) 243 if (!__l2cap_get_chan_by_scid(conn, cid))
244 return cid; 244 return cid;
245 } 245 }
@@ -5250,7 +5250,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
5250 credits = __le16_to_cpu(rsp->credits); 5250 credits = __le16_to_cpu(rsp->credits);
5251 result = __le16_to_cpu(rsp->result); 5251 result = __le16_to_cpu(rsp->result);
5252 5252
5253 if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23)) 5253 if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 ||
5254 dcid < L2CAP_CID_DYN_START ||
5255 dcid > L2CAP_CID_LE_DYN_END))
5254 return -EPROTO; 5256 return -EPROTO;
5255 5257
5256 BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", 5258 BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x",
@@ -5270,6 +5272,11 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
5270 5272
5271 switch (result) { 5273 switch (result) {
5272 case L2CAP_CR_SUCCESS: 5274 case L2CAP_CR_SUCCESS:
5275 if (__l2cap_get_chan_by_dcid(conn, dcid)) {
5276 err = -EBADSLT;
5277 break;
5278 }
5279
5273 chan->ident = 0; 5280 chan->ident = 0;
5274 chan->dcid = dcid; 5281 chan->dcid = dcid;
5275 chan->omtu = mtu; 5282 chan->omtu = mtu;
@@ -5437,9 +5444,16 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
5437 goto response_unlock; 5444 goto response_unlock;
5438 } 5445 }
5439 5446
5447 /* Check for valid dynamic CID range */
5448 if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
5449 result = L2CAP_CR_INVALID_SCID;
5450 chan = NULL;
5451 goto response_unlock;
5452 }
5453
5440 /* Check if we already have channel with that dcid */ 5454 /* Check if we already have channel with that dcid */
5441 if (__l2cap_get_chan_by_dcid(conn, scid)) { 5455 if (__l2cap_get_chan_by_dcid(conn, scid)) {
5442 result = L2CAP_CR_NO_MEM; 5456 result = L2CAP_CR_SCID_IN_USE;
5443 chan = NULL; 5457 chan = NULL;
5444 goto response_unlock; 5458 goto response_unlock;
5445 } 5459 }
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 586b3d580cfc..1bb551527044 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1111,53 +1111,76 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
1111 if (!sk) 1111 if (!sk)
1112 return 0; 1112 return 0;
1113 1113
1114 lock_sock(sk);
1115
1116 if (sk->sk_shutdown)
1117 goto shutdown_already;
1118
1119 BT_DBG("Handling sock shutdown");
1120
1114 /* prevent sk structure from being freed whilst unlocked */ 1121 /* prevent sk structure from being freed whilst unlocked */
1115 sock_hold(sk); 1122 sock_hold(sk);
1116 1123
1117 chan = l2cap_pi(sk)->chan; 1124 chan = l2cap_pi(sk)->chan;
1118 /* prevent chan structure from being freed whilst unlocked */ 1125 /* prevent chan structure from being freed whilst unlocked */
1119 l2cap_chan_hold(chan); 1126 l2cap_chan_hold(chan);
1120 conn = chan->conn;
1121 1127
1122 BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); 1128 BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
1123 1129
1130 if (chan->mode == L2CAP_MODE_ERTM &&
1131 chan->unacked_frames > 0 &&
1132 chan->state == BT_CONNECTED) {
1133 err = __l2cap_wait_ack(sk, chan);
1134
1135 /* After waiting for ACKs, check whether shutdown
1136 * has already been actioned to close the L2CAP
1137 * link such as by l2cap_disconnection_req().
1138 */
1139 if (sk->sk_shutdown)
1140 goto has_shutdown;
1141 }
1142
1143 sk->sk_shutdown = SHUTDOWN_MASK;
1144 release_sock(sk);
1145
1146 l2cap_chan_lock(chan);
1147 conn = chan->conn;
1148 if (conn)
1149 /* prevent conn structure from being freed */
1150 l2cap_conn_get(conn);
1151 l2cap_chan_unlock(chan);
1152
1124 if (conn) 1153 if (conn)
1154 /* mutex lock must be taken before l2cap_chan_lock() */
1125 mutex_lock(&conn->chan_lock); 1155 mutex_lock(&conn->chan_lock);
1126 1156
1127 l2cap_chan_lock(chan); 1157 l2cap_chan_lock(chan);
1128 lock_sock(sk); 1158 l2cap_chan_close(chan, 0);
1159 l2cap_chan_unlock(chan);
1129 1160
1130 if (!sk->sk_shutdown) { 1161 if (conn) {
1131 if (chan->mode == L2CAP_MODE_ERTM && 1162 mutex_unlock(&conn->chan_lock);
1132 chan->unacked_frames > 0 && 1163 l2cap_conn_put(conn);
1133 chan->state == BT_CONNECTED) 1164 }
1134 err = __l2cap_wait_ack(sk, chan);
1135 1165
1136 sk->sk_shutdown = SHUTDOWN_MASK; 1166 lock_sock(sk);
1137 1167
1138 release_sock(sk); 1168 if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
1139 l2cap_chan_close(chan, 0); 1169 !(current->flags & PF_EXITING))
1140 lock_sock(sk); 1170 err = bt_sock_wait_state(sk, BT_CLOSED,
1171 sk->sk_lingertime);
1141 1172
1142 if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && 1173has_shutdown:
1143 !(current->flags & PF_EXITING)) 1174 l2cap_chan_put(chan);
1144 err = bt_sock_wait_state(sk, BT_CLOSED, 1175 sock_put(sk);
1145 sk->sk_lingertime);
1146 }
1147 1176
1177shutdown_already:
1148 if (!err && sk->sk_err) 1178 if (!err && sk->sk_err)
1149 err = -sk->sk_err; 1179 err = -sk->sk_err;
1150 1180
1151 release_sock(sk); 1181 release_sock(sk);
1152 l2cap_chan_unlock(chan);
1153
1154 if (conn)
1155 mutex_unlock(&conn->chan_lock);
1156
1157 l2cap_chan_put(chan);
1158 sock_put(sk);
1159 1182
1160 BT_DBG("err: %d", err); 1183 BT_DBG("Sock shutdown complete err: %d", err);
1161 1184
1162 return err; 1185 return err;
1163} 1186}
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index b36bc0415854..aa4cf64e32a6 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -151,6 +151,22 @@ void bt_info(const char *format, ...)
151} 151}
152EXPORT_SYMBOL(bt_info); 152EXPORT_SYMBOL(bt_info);
153 153
154void bt_warn(const char *format, ...)
155{
156 struct va_format vaf;
157 va_list args;
158
159 va_start(args, format);
160
161 vaf.fmt = format;
162 vaf.va = &args;
163
164 pr_warn("%pV", &vaf);
165
166 va_end(args);
167}
168EXPORT_SYMBOL(bt_warn);
169
154void bt_err(const char *format, ...) 170void bt_err(const char *format, ...)
155{ 171{
156 struct va_format vaf; 172 struct va_format vaf;
@@ -166,3 +182,19 @@ void bt_err(const char *format, ...)
166 va_end(args); 182 va_end(args);
167} 183}
168EXPORT_SYMBOL(bt_err); 184EXPORT_SYMBOL(bt_err);
185
186void bt_err_ratelimited(const char *format, ...)
187{
188 struct va_format vaf;
189 va_list args;
190
191 va_start(args, format);
192
193 vaf.fmt = format;
194 vaf.va = &args;
195
196 pr_err_ratelimited("%pV", &vaf);
197
198 va_end(args);
199}
200EXPORT_SYMBOL(bt_err_ratelimited);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ccaf5a436d8f..7f22119276f3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -268,6 +268,14 @@ static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len,
268 HCI_SOCK_TRUSTED, skip_sk); 268 HCI_SOCK_TRUSTED, skip_sk);
269} 269}
270 270
271static u8 le_addr_type(u8 mgmt_addr_type)
272{
273 if (mgmt_addr_type == BDADDR_LE_PUBLIC)
274 return ADDR_LE_DEV_PUBLIC;
275 else
276 return ADDR_LE_DEV_RANDOM;
277}
278
271static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, 279static int read_version(struct sock *sk, struct hci_dev *hdev, void *data,
272 u16 data_len) 280 u16 data_len)
273{ 281{
@@ -1631,35 +1639,8 @@ static int clean_up_hci_state(struct hci_dev *hdev)
1631 discov_stopped = hci_stop_discovery(&req); 1639 discov_stopped = hci_stop_discovery(&req);
1632 1640
1633 list_for_each_entry(conn, &hdev->conn_hash.list, list) { 1641 list_for_each_entry(conn, &hdev->conn_hash.list, list) {
1634 struct hci_cp_disconnect dc; 1642 /* 0x15 == Terminated due to Power Off */
1635 struct hci_cp_reject_conn_req rej; 1643 __hci_abort_conn(&req, conn, 0x15);
1636
1637 switch (conn->state) {
1638 case BT_CONNECTED:
1639 case BT_CONFIG:
1640 dc.handle = cpu_to_le16(conn->handle);
1641 dc.reason = 0x15; /* Terminated due to Power Off */
1642 hci_req_add(&req, HCI_OP_DISCONNECT, sizeof(dc), &dc);
1643 break;
1644 case BT_CONNECT:
1645 if (conn->type == LE_LINK)
1646 hci_req_add(&req, HCI_OP_LE_CREATE_CONN_CANCEL,
1647 0, NULL);
1648 else if (conn->type == ACL_LINK)
1649 hci_req_add(&req, HCI_OP_CREATE_CONN_CANCEL,
1650 6, &conn->dst);
1651 break;
1652 case BT_CONNECT2:
1653 bacpy(&rej.bdaddr, &conn->dst);
1654 rej.reason = 0x15; /* Terminated due to Power Off */
1655 if (conn->type == ACL_LINK)
1656 hci_req_add(&req, HCI_OP_REJECT_CONN_REQ,
1657 sizeof(rej), &rej);
1658 else if (conn->type == SCO_LINK)
1659 hci_req_add(&req, HCI_OP_REJECT_SYNC_CONN_REQ,
1660 sizeof(rej), &rej);
1661 break;
1662 }
1663 } 1644 }
1664 1645
1665 err = hci_req_run(&req, clean_up_hci_complete); 1646 err = hci_req_run(&req, clean_up_hci_complete);
@@ -3044,9 +3025,10 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
3044{ 3025{
3045 struct mgmt_cp_unpair_device *cp = data; 3026 struct mgmt_cp_unpair_device *cp = data;
3046 struct mgmt_rp_unpair_device rp; 3027 struct mgmt_rp_unpair_device rp;
3047 struct hci_cp_disconnect dc; 3028 struct hci_conn_params *params;
3048 struct mgmt_pending_cmd *cmd; 3029 struct mgmt_pending_cmd *cmd;
3049 struct hci_conn *conn; 3030 struct hci_conn *conn;
3031 u8 addr_type;
3050 int err; 3032 int err;
3051 3033
3052 memset(&rp, 0, sizeof(rp)); 3034 memset(&rp, 0, sizeof(rp));
@@ -3087,36 +3069,23 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
3087 conn = NULL; 3069 conn = NULL;
3088 3070
3089 err = hci_remove_link_key(hdev, &cp->addr.bdaddr); 3071 err = hci_remove_link_key(hdev, &cp->addr.bdaddr);
3090 } else { 3072 if (err < 0) {
3091 u8 addr_type; 3073 err = mgmt_cmd_complete(sk, hdev->id,
3092 3074 MGMT_OP_UNPAIR_DEVICE,
3093 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, 3075 MGMT_STATUS_NOT_PAIRED, &rp,
3094 &cp->addr.bdaddr); 3076 sizeof(rp));
3095 if (conn) { 3077 goto unlock;
3096 /* Defer clearing up the connection parameters
3097 * until closing to give a chance of keeping
3098 * them if a repairing happens.
3099 */
3100 set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
3101
3102 /* If disconnection is not requested, then
3103 * clear the connection variable so that the
3104 * link is not terminated.
3105 */
3106 if (!cp->disconnect)
3107 conn = NULL;
3108 } 3078 }
3109 3079
3110 if (cp->addr.type == BDADDR_LE_PUBLIC) 3080 goto done;
3111 addr_type = ADDR_LE_DEV_PUBLIC; 3081 }
3112 else
3113 addr_type = ADDR_LE_DEV_RANDOM;
3114 3082
3115 hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); 3083 /* LE address type */
3084 addr_type = le_addr_type(cp->addr.type);
3116 3085
3117 err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); 3086 hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
3118 }
3119 3087
3088 err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);
3120 if (err < 0) { 3089 if (err < 0) {
3121 err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, 3090 err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
3122 MGMT_STATUS_NOT_PAIRED, &rp, 3091 MGMT_STATUS_NOT_PAIRED, &rp,
@@ -3124,6 +3093,36 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
3124 goto unlock; 3093 goto unlock;
3125 } 3094 }
3126 3095
3096 conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, addr_type);
3097 if (!conn) {
3098 hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type);
3099 goto done;
3100 }
3101
3102 /* Abort any ongoing SMP pairing */
3103 smp_cancel_pairing(conn);
3104
3105 /* Defer clearing up the connection parameters until closing to
3106 * give a chance of keeping them if a repairing happens.
3107 */
3108 set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
3109
3110 /* Disable auto-connection parameters if present */
3111 params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type);
3112 if (params) {
3113 if (params->explicit_connect)
3114 params->auto_connect = HCI_AUTO_CONN_EXPLICIT;
3115 else
3116 params->auto_connect = HCI_AUTO_CONN_DISABLED;
3117 }
3118
3119 /* If disconnection is not requested, then clear the connection
3120 * variable so that the link is not terminated.
3121 */
3122 if (!cp->disconnect)
3123 conn = NULL;
3124
3125done:
3127 /* If the connection variable is set, then termination of the 3126 /* If the connection variable is set, then termination of the
3128 * link is requested. 3127 * link is requested.
3129 */ 3128 */
@@ -3143,9 +3142,7 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
3143 3142
3144 cmd->cmd_complete = addr_cmd_complete; 3143 cmd->cmd_complete = addr_cmd_complete;
3145 3144
3146 dc.handle = cpu_to_le16(conn->handle); 3145 err = hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
3147 dc.reason = 0x13; /* Remote User Terminated Connection */
3148 err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc);
3149 if (err < 0) 3146 if (err < 0)
3150 mgmt_pending_remove(cmd); 3147 mgmt_pending_remove(cmd);
3151 3148
@@ -3193,7 +3190,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
3193 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, 3190 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
3194 &cp->addr.bdaddr); 3191 &cp->addr.bdaddr);
3195 else 3192 else
3196 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); 3193 conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr,
3194 le_addr_type(cp->addr.type));
3197 3195
3198 if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { 3196 if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) {
3199 err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, 3197 err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
@@ -3544,14 +3542,8 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
3544 conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level, 3542 conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level,
3545 auth_type); 3543 auth_type);
3546 } else { 3544 } else {
3547 u8 addr_type; 3545 u8 addr_type = le_addr_type(cp->addr.type);
3548 3546 struct hci_conn_params *p;
3549 /* Convert from L2CAP channel address type to HCI address type
3550 */
3551 if (cp->addr.type == BDADDR_LE_PUBLIC)
3552 addr_type = ADDR_LE_DEV_PUBLIC;
3553 else
3554 addr_type = ADDR_LE_DEV_RANDOM;
3555 3547
3556 /* When pairing a new device, it is expected to remember 3548 /* When pairing a new device, it is expected to remember
3557 * this device for future connections. Adding the connection 3549 * this device for future connections. Adding the connection
@@ -3562,7 +3554,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
3562 * If connection parameters already exist, then they 3554 * If connection parameters already exist, then they
3563 * will be kept and this function does nothing. 3555 * will be kept and this function does nothing.
3564 */ 3556 */
3565 hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); 3557 p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type);
3558
3559 if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT)
3560 p->auto_connect = HCI_AUTO_CONN_DISABLED;
3566 3561
3567 conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, 3562 conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr,
3568 addr_type, sec_level, 3563 addr_type, sec_level,
@@ -3693,7 +3688,8 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
3693 if (addr->type == BDADDR_BREDR) 3688 if (addr->type == BDADDR_BREDR)
3694 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr); 3689 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);
3695 else 3690 else
3696 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr); 3691 conn = hci_conn_hash_lookup_le(hdev, &addr->bdaddr,
3692 le_addr_type(addr->type));
3697 3693
3698 if (!conn) { 3694 if (!conn) {
3699 err = mgmt_cmd_complete(sk, hdev->id, mgmt_op, 3695 err = mgmt_cmd_complete(sk, hdev->id, mgmt_op,
@@ -5596,14 +5592,9 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
5596 5592
5597 for (i = 0; i < irk_count; i++) { 5593 for (i = 0; i < irk_count; i++) {
5598 struct mgmt_irk_info *irk = &cp->irks[i]; 5594 struct mgmt_irk_info *irk = &cp->irks[i];
5599 u8 addr_type;
5600 5595
5601 if (irk->addr.type == BDADDR_LE_PUBLIC) 5596 hci_add_irk(hdev, &irk->addr.bdaddr,
5602 addr_type = ADDR_LE_DEV_PUBLIC; 5597 le_addr_type(irk->addr.type), irk->val,
5603 else
5604 addr_type = ADDR_LE_DEV_RANDOM;
5605
5606 hci_add_irk(hdev, &irk->addr.bdaddr, addr_type, irk->val,
5607 BDADDR_ANY); 5598 BDADDR_ANY);
5608 } 5599 }
5609 5600
@@ -5683,12 +5674,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
5683 5674
5684 for (i = 0; i < key_count; i++) { 5675 for (i = 0; i < key_count; i++) {
5685 struct mgmt_ltk_info *key = &cp->keys[i]; 5676 struct mgmt_ltk_info *key = &cp->keys[i];
5686 u8 type, addr_type, authenticated; 5677 u8 type, authenticated;
5687
5688 if (key->addr.type == BDADDR_LE_PUBLIC)
5689 addr_type = ADDR_LE_DEV_PUBLIC;
5690 else
5691 addr_type = ADDR_LE_DEV_RANDOM;
5692 5678
5693 switch (key->type) { 5679 switch (key->type) {
5694 case MGMT_LTK_UNAUTHENTICATED: 5680 case MGMT_LTK_UNAUTHENTICATED:
@@ -5714,9 +5700,9 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
5714 continue; 5700 continue;
5715 } 5701 }
5716 5702
5717 hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type, 5703 hci_add_ltk(hdev, &key->addr.bdaddr,
5718 authenticated, key->val, key->enc_size, key->ediv, 5704 le_addr_type(key->addr.type), type, authenticated,
5719 key->rand); 5705 key->val, key->enc_size, key->ediv, key->rand);
5720 } 5706 }
5721 5707
5722 err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0, 5708 err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0,
@@ -6117,14 +6103,21 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr,
6117 __hci_update_background_scan(req); 6103 __hci_update_background_scan(req);
6118 break; 6104 break;
6119 case HCI_AUTO_CONN_REPORT: 6105 case HCI_AUTO_CONN_REPORT:
6120 list_add(&params->action, &hdev->pend_le_reports); 6106 if (params->explicit_connect)
6107 list_add(&params->action, &hdev->pend_le_conns);
6108 else
6109 list_add(&params->action, &hdev->pend_le_reports);
6121 __hci_update_background_scan(req); 6110 __hci_update_background_scan(req);
6122 break; 6111 break;
6123 case HCI_AUTO_CONN_DIRECT: 6112 case HCI_AUTO_CONN_DIRECT:
6124 case HCI_AUTO_CONN_ALWAYS: 6113 case HCI_AUTO_CONN_ALWAYS:
6125 if (!is_connected(hdev, addr, addr_type)) { 6114 if (!is_connected(hdev, addr, addr_type)) {
6126 list_add(&params->action, &hdev->pend_le_conns); 6115 list_add(&params->action, &hdev->pend_le_conns);
6127 __hci_update_background_scan(req); 6116 /* If we are in scan phase of connecting, we were
6117 * already added to pend_le_conns and scanning.
6118 */
6119 if (params->auto_connect != HCI_AUTO_CONN_EXPLICIT)
6120 __hci_update_background_scan(req);
6128 } 6121 }
6129 break; 6122 break;
6130 } 6123 }
@@ -6221,10 +6214,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
6221 goto added; 6214 goto added;
6222 } 6215 }
6223 6216
6224 if (cp->addr.type == BDADDR_LE_PUBLIC) 6217 addr_type = le_addr_type(cp->addr.type);
6225 addr_type = ADDR_LE_DEV_PUBLIC;
6226 else
6227 addr_type = ADDR_LE_DEV_RANDOM;
6228 6218
6229 if (cp->action == 0x02) 6219 if (cp->action == 0x02)
6230 auto_conn = HCI_AUTO_CONN_ALWAYS; 6220 auto_conn = HCI_AUTO_CONN_ALWAYS;
@@ -6353,10 +6343,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
6353 goto complete; 6343 goto complete;
6354 } 6344 }
6355 6345
6356 if (cp->addr.type == BDADDR_LE_PUBLIC) 6346 addr_type = le_addr_type(cp->addr.type);
6357 addr_type = ADDR_LE_DEV_PUBLIC;
6358 else
6359 addr_type = ADDR_LE_DEV_RANDOM;
6360 6347
6361 /* Kernel internally uses conn_params with resolvable private 6348 /* Kernel internally uses conn_params with resolvable private
6362 * address, but Remove Device allows only identity addresses. 6349 * address, but Remove Device allows only identity addresses.
@@ -6379,7 +6366,8 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
6379 goto unlock; 6366 goto unlock;
6380 } 6367 }
6381 6368
6382 if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { 6369 if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
6370 params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
6383 err = cmd->cmd_complete(cmd, 6371 err = cmd->cmd_complete(cmd,
6384 MGMT_STATUS_INVALID_PARAMS); 6372 MGMT_STATUS_INVALID_PARAMS);
6385 mgmt_pending_remove(cmd); 6373 mgmt_pending_remove(cmd);
@@ -6415,6 +6403,10 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
6415 if (p->auto_connect == HCI_AUTO_CONN_DISABLED) 6403 if (p->auto_connect == HCI_AUTO_CONN_DISABLED)
6416 continue; 6404 continue;
6417 device_removed(sk, hdev, &p->addr, p->addr_type); 6405 device_removed(sk, hdev, &p->addr, p->addr_type);
6406 if (p->explicit_connect) {
6407 p->auto_connect = HCI_AUTO_CONN_EXPLICIT;
6408 continue;
6409 }
6418 list_del(&p->action); 6410 list_del(&p->action);
6419 list_del(&p->list); 6411 list_del(&p->list);
6420 kfree(p); 6412 kfree(p);
@@ -7857,27 +7849,13 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
7857 mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL); 7849 mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL);
7858} 7850}
7859 7851
7860void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk) 7852void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent)
7861{ 7853{
7862 struct mgmt_ev_new_irk ev; 7854 struct mgmt_ev_new_irk ev;
7863 7855
7864 memset(&ev, 0, sizeof(ev)); 7856 memset(&ev, 0, sizeof(ev));
7865 7857
7866 /* For identity resolving keys from devices that are already 7858 ev.store_hint = persistent;
7867 * using a public address or static random address, do not
7868 * ask for storing this key. The identity resolving key really
7869 * is only mandatory for devices using resolvable random
7870 * addresses.
7871 *
7872 * Storing all identity resolving keys has the downside that
7873 * they will be also loaded on next boot of they system. More
7874 * identity resolving keys, means more time during scanning is
7875 * needed to actually resolve these addresses.
7876 */
7877 if (bacmp(&irk->rpa, BDADDR_ANY))
7878 ev.store_hint = 0x01;
7879 else
7880 ev.store_hint = 0x00;
7881 7859
7882 bacpy(&ev.rpa, &irk->rpa); 7860 bacpy(&ev.rpa, &irk->rpa);
7883 bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); 7861 bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f315c8d0e43b..fe129663bd3f 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -74,7 +74,7 @@ struct sco_pinfo {
74 74
75static void sco_sock_timeout(unsigned long arg) 75static void sco_sock_timeout(unsigned long arg)
76{ 76{
77 struct sock *sk = (struct sock *) arg; 77 struct sock *sk = (struct sock *)arg;
78 78
79 BT_DBG("sock %p state %d", sk, sk->sk_state); 79 BT_DBG("sock %p state %d", sk, sk->sk_state);
80 80
@@ -170,18 +170,21 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
170 sco_conn_unlock(conn); 170 sco_conn_unlock(conn);
171 171
172 if (sk) { 172 if (sk) {
173 sock_hold(sk);
173 bh_lock_sock(sk); 174 bh_lock_sock(sk);
174 sco_sock_clear_timer(sk); 175 sco_sock_clear_timer(sk);
175 sco_chan_del(sk, err); 176 sco_chan_del(sk, err);
176 bh_unlock_sock(sk); 177 bh_unlock_sock(sk);
177 sco_sock_kill(sk); 178 sco_sock_kill(sk);
179 sock_put(sk);
178 } 180 }
179 181
180 hcon->sco_data = NULL; 182 hcon->sco_data = NULL;
181 kfree(conn); 183 kfree(conn);
182} 184}
183 185
184static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent) 186static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
187 struct sock *parent)
185{ 188{
186 BT_DBG("conn %p", conn); 189 BT_DBG("conn %p", conn);
187 190
@@ -414,8 +417,10 @@ static void __sco_sock_close(struct sock *sk)
414 if (sco_pi(sk)->conn->hcon) { 417 if (sco_pi(sk)->conn->hcon) {
415 sk->sk_state = BT_DISCONN; 418 sk->sk_state = BT_DISCONN;
416 sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); 419 sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
420 sco_conn_lock(sco_pi(sk)->conn);
417 hci_conn_drop(sco_pi(sk)->conn->hcon); 421 hci_conn_drop(sco_pi(sk)->conn->hcon);
418 sco_pi(sk)->conn->hcon = NULL; 422 sco_pi(sk)->conn->hcon = NULL;
423 sco_conn_unlock(sco_pi(sk)->conn);
419 } else 424 } else
420 sco_chan_del(sk, ECONNRESET); 425 sco_chan_del(sk, ECONNRESET);
421 break; 426 break;
@@ -459,7 +464,8 @@ static struct proto sco_proto = {
459 .obj_size = sizeof(struct sco_pinfo) 464 .obj_size = sizeof(struct sco_pinfo)
460}; 465};
461 466
462static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) 467static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
468 int proto, gfp_t prio, int kern)
463{ 469{
464 struct sock *sk; 470 struct sock *sk;
465 471
@@ -508,7 +514,8 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
508 return 0; 514 return 0;
509} 515}
510 516
511static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 517static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
518 int addr_len)
512{ 519{
513 struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; 520 struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
514 struct sock *sk = sock->sk; 521 struct sock *sk = sock->sk;
@@ -615,7 +622,8 @@ done:
615 return err; 622 return err;
616} 623}
617 624
618static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags) 625static int sco_sock_accept(struct socket *sock, struct socket *newsock,
626 int flags)
619{ 627{
620 DEFINE_WAIT_FUNC(wait, woken_wake_function); 628 DEFINE_WAIT_FUNC(wait, woken_wake_function);
621 struct sock *sk = sock->sk, *ch; 629 struct sock *sk = sock->sk, *ch;
@@ -669,7 +677,8 @@ done:
669 return err; 677 return err;
670} 678}
671 679
672static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) 680static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
681 int *len, int peer)
673{ 682{
674 struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; 683 struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
675 struct sock *sk = sock->sk; 684 struct sock *sk = sock->sk;
@@ -779,7 +788,8 @@ static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg,
779 return bt_sock_recvmsg(sock, msg, len, flags); 788 return bt_sock_recvmsg(sock, msg, len, flags);
780} 789}
781 790
782static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) 791static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
792 char __user *optval, unsigned int optlen)
783{ 793{
784 struct sock *sk = sock->sk; 794 struct sock *sk = sock->sk;
785 int len, err = 0; 795 int len, err = 0;
@@ -819,7 +829,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
819 voice.setting = sco_pi(sk)->setting; 829 voice.setting = sco_pi(sk)->setting;
820 830
821 len = min_t(unsigned int, sizeof(voice), optlen); 831 len = min_t(unsigned int, sizeof(voice), optlen);
822 if (copy_from_user((char *) &voice, optval, len)) { 832 if (copy_from_user((char *)&voice, optval, len)) {
823 err = -EFAULT; 833 err = -EFAULT;
824 break; 834 break;
825 } 835 }
@@ -843,7 +853,8 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
843 return err; 853 return err;
844} 854}
845 855
846static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) 856static int sco_sock_getsockopt_old(struct socket *sock, int optname,
857 char __user *optval, int __user *optlen)
847{ 858{
848 struct sock *sk = sock->sk; 859 struct sock *sk = sock->sk;
849 struct sco_options opts; 860 struct sco_options opts;
@@ -903,7 +914,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
903 return err; 914 return err;
904} 915}
905 916
906static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) 917static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
918 char __user *optval, int __user *optlen)
907{ 919{
908 struct sock *sk = sock->sk; 920 struct sock *sk = sock->sk;
909 int len, err = 0; 921 int len, err = 0;
@@ -928,7 +940,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
928 } 940 }
929 941
930 if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags), 942 if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags),
931 (u32 __user *) optval)) 943 (u32 __user *)optval))
932 err = -EFAULT; 944 err = -EFAULT;
933 945
934 break; 946 break;
@@ -961,7 +973,9 @@ static int sco_sock_shutdown(struct socket *sock, int how)
961 if (!sk) 973 if (!sk)
962 return 0; 974 return 0;
963 975
976 sock_hold(sk);
964 lock_sock(sk); 977 lock_sock(sk);
978
965 if (!sk->sk_shutdown) { 979 if (!sk->sk_shutdown) {
966 sk->sk_shutdown = SHUTDOWN_MASK; 980 sk->sk_shutdown = SHUTDOWN_MASK;
967 sco_sock_clear_timer(sk); 981 sco_sock_clear_timer(sk);
@@ -972,7 +986,10 @@ static int sco_sock_shutdown(struct socket *sock, int how)
972 err = bt_sock_wait_state(sk, BT_CLOSED, 986 err = bt_sock_wait_state(sk, BT_CLOSED,
973 sk->sk_lingertime); 987 sk->sk_lingertime);
974 } 988 }
989
975 release_sock(sk); 990 release_sock(sk);
991 sock_put(sk);
992
976 return err; 993 return err;
977} 994}
978 995
@@ -1016,6 +1033,11 @@ static void sco_conn_ready(struct sco_conn *conn)
1016 } else { 1033 } else {
1017 sco_conn_lock(conn); 1034 sco_conn_lock(conn);
1018 1035
1036 if (!conn->hcon) {
1037 sco_conn_unlock(conn);
1038 return;
1039 }
1040
1019 parent = sco_get_sock_listen(&conn->hcon->src); 1041 parent = sco_get_sock_listen(&conn->hcon->src);
1020 if (!parent) { 1042 if (!parent) {
1021 sco_conn_unlock(conn); 1043 sco_conn_unlock(conn);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 0510a577a7b5..c91353841e40 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -495,7 +495,7 @@ static int smp_ah(struct crypto_blkcipher *tfm, const u8 irk[16],
495 } 495 }
496 496
497 /* The output of the random address function ah is: 497 /* The output of the random address function ah is:
498 * ah(h, r) = e(k, r') mod 2^24 498 * ah(k, r) = e(k, r') mod 2^24
499 * The output of the security function e is then truncated to 24 bits 499 * The output of the security function e is then truncated to 24 bits
500 * by taking the least significant 24 bits of the output of e as the 500 * by taking the least significant 24 bits of the output of e as the
501 * result of ah. 501 * result of ah.
@@ -811,7 +811,6 @@ static void smp_failure(struct l2cap_conn *conn, u8 reason)
811 smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason), 811 smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason),
812 &reason); 812 &reason);
813 813
814 clear_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags);
815 mgmt_auth_failed(hcon, HCI_ERROR_AUTH_FAILURE); 814 mgmt_auth_failed(hcon, HCI_ERROR_AUTH_FAILURE);
816 815
817 if (chan->data) 816 if (chan->data)
@@ -1046,8 +1045,24 @@ static void smp_notify_keys(struct l2cap_conn *conn)
1046 struct smp_cmd_pairing *rsp = (void *) &smp->prsp[1]; 1045 struct smp_cmd_pairing *rsp = (void *) &smp->prsp[1];
1047 bool persistent; 1046 bool persistent;
1048 1047
1048 if (hcon->type == ACL_LINK) {
1049 if (hcon->key_type == HCI_LK_DEBUG_COMBINATION)
1050 persistent = false;
1051 else
1052 persistent = !test_bit(HCI_CONN_FLUSH_KEY,
1053 &hcon->flags);
1054 } else {
1055 /* The LTKs, IRKs and CSRKs should be persistent only if
1056 * both sides had the bonding bit set in their
1057 * authentication requests.
1058 */
1059 persistent = !!((req->auth_req & rsp->auth_req) &
1060 SMP_AUTH_BONDING);
1061 }
1062
1049 if (smp->remote_irk) { 1063 if (smp->remote_irk) {
1050 mgmt_new_irk(hdev, smp->remote_irk); 1064 mgmt_new_irk(hdev, smp->remote_irk, persistent);
1065
1051 /* Now that user space can be considered to know the 1066 /* Now that user space can be considered to know the
1052 * identity address track the connection based on it 1067 * identity address track the connection based on it
1053 * from now on (assuming this is an LE link). 1068 * from now on (assuming this is an LE link).
@@ -1075,21 +1090,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
1075 } 1090 }
1076 } 1091 }
1077 1092
1078 if (hcon->type == ACL_LINK) {
1079 if (hcon->key_type == HCI_LK_DEBUG_COMBINATION)
1080 persistent = false;
1081 else
1082 persistent = !test_bit(HCI_CONN_FLUSH_KEY,
1083 &hcon->flags);
1084 } else {
1085 /* The LTKs and CSRKs should be persistent only if both sides
1086 * had the bonding bit set in their authentication requests.
1087 */
1088 persistent = !!((req->auth_req & rsp->auth_req) &
1089 SMP_AUTH_BONDING);
1090 }
1091
1092
1093 if (smp->csrk) { 1093 if (smp->csrk) {
1094 smp->csrk->bdaddr_type = hcon->dst_type; 1094 smp->csrk->bdaddr_type = hcon->dst_type;
1095 bacpy(&smp->csrk->bdaddr, &hcon->dst); 1095 bacpy(&smp->csrk->bdaddr, &hcon->dst);
@@ -2380,6 +2380,32 @@ unlock:
2380 return ret; 2380 return ret;
2381} 2381}
2382 2382
2383void smp_cancel_pairing(struct hci_conn *hcon)
2384{
2385 struct l2cap_conn *conn = hcon->l2cap_data;
2386 struct l2cap_chan *chan;
2387 struct smp_chan *smp;
2388
2389 if (!conn)
2390 return;
2391
2392 chan = conn->smp;
2393 if (!chan)
2394 return;
2395
2396 l2cap_chan_lock(chan);
2397
2398 smp = chan->data;
2399 if (smp) {
2400 if (test_bit(SMP_FLAG_COMPLETE, &smp->flags))
2401 smp_failure(conn, 0);
2402 else
2403 smp_failure(conn, SMP_UNSPECIFIED);
2404 }
2405
2406 l2cap_chan_unlock(chan);
2407}
2408
2383static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) 2409static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
2384{ 2410{
2385 struct smp_cmd_encrypt_info *rp = (void *) skb->data; 2411 struct smp_cmd_encrypt_info *rp = (void *) skb->data;
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 6cf872563ea7..ffcc70b6b199 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -180,6 +180,7 @@ enum smp_key_pref {
180}; 180};
181 181
182/* SMP Commands */ 182/* SMP Commands */
183void smp_cancel_pairing(struct hci_conn *hcon);
183bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, 184bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level,
184 enum smp_key_pref key_pref); 185 enum smp_key_pref key_pref);
185int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); 186int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6ed2feb51e3c..5e88d3e17546 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -56,7 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
56 skb_reset_mac_header(skb); 56 skb_reset_mac_header(skb);
57 skb_pull(skb, ETH_HLEN); 57 skb_pull(skb, ETH_HLEN);
58 58
59 if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) 59 if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
60 goto out; 60 goto out;
61 61
62 if (is_broadcast_ether_addr(dest)) 62 if (is_broadcast_ether_addr(dest))
@@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev)
391 br->bridge_max_age = br->max_age = 20 * HZ; 391 br->bridge_max_age = br->max_age = 20 * HZ;
392 br->bridge_hello_time = br->hello_time = 2 * HZ; 392 br->bridge_hello_time = br->hello_time = 2 * HZ;
393 br->bridge_forward_delay = br->forward_delay = 15 * HZ; 393 br->bridge_forward_delay = br->forward_delay = 15 * HZ;
394 br->ageing_time = 300 * HZ; 394 br->ageing_time = BR_DEFAULT_AGEING_TIME;
395 395
396 br_netfilter_rtable_init(br); 396 br_netfilter_rtable_init(br);
397 br_stp_timer_init(br); 397 br_stp_timer_init(br);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875da0a4f..a642bb829d09 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -133,15 +133,16 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
133 133
134static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) 134static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
135{ 135{
136 struct switchdev_obj obj = { 136 struct switchdev_obj_port_fdb fdb = {
137 .id = SWITCHDEV_OBJ_PORT_FDB, 137 .obj = {
138 .u.fdb = { 138 .id = SWITCHDEV_OBJ_ID_PORT_FDB,
139 .addr = f->addr.addr, 139 .flags = SWITCHDEV_F_DEFER,
140 .vid = f->vlan_id,
141 }, 140 },
141 .vid = f->vlan_id,
142 }; 142 };
143 143
144 switchdev_port_obj_del(f->dst->dev, &obj); 144 ether_addr_copy(fdb.addr, f->addr.addr);
145 switchdev_port_obj_del(f->dst->dev, &fdb.obj);
145} 146}
146 147
147static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) 148static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
@@ -163,22 +164,27 @@ static void fdb_delete_local(struct net_bridge *br,
163 struct net_bridge_fdb_entry *f) 164 struct net_bridge_fdb_entry *f)
164{ 165{
165 const unsigned char *addr = f->addr.addr; 166 const unsigned char *addr = f->addr.addr;
166 u16 vid = f->vlan_id; 167 struct net_bridge_vlan_group *vg;
168 const struct net_bridge_vlan *v;
167 struct net_bridge_port *op; 169 struct net_bridge_port *op;
170 u16 vid = f->vlan_id;
168 171
169 /* Maybe another port has same hw addr? */ 172 /* Maybe another port has same hw addr? */
170 list_for_each_entry(op, &br->port_list, list) { 173 list_for_each_entry(op, &br->port_list, list) {
174 vg = nbp_vlan_group(op);
171 if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && 175 if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
172 (!vid || nbp_vlan_find(op, vid))) { 176 (!vid || br_vlan_find(vg, vid))) {
173 f->dst = op; 177 f->dst = op;
174 f->added_by_user = 0; 178 f->added_by_user = 0;
175 return; 179 return;
176 } 180 }
177 } 181 }
178 182
183 vg = br_vlan_group(br);
184 v = br_vlan_find(vg, vid);
179 /* Maybe bridge device has same hw addr? */ 185 /* Maybe bridge device has same hw addr? */
180 if (p && ether_addr_equal(br->dev->dev_addr, addr) && 186 if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
181 (!vid || br_vlan_find(br, vid))) { 187 (!vid || (v && br_vlan_should_use(v)))) {
182 f->dst = NULL; 188 f->dst = NULL;
183 f->added_by_user = 0; 189 f->added_by_user = 0;
184 return; 190 return;
@@ -203,14 +209,14 @@ void br_fdb_find_delete_local(struct net_bridge *br,
203 209
204void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) 210void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
205{ 211{
212 struct net_bridge_vlan_group *vg;
206 struct net_bridge *br = p->br; 213 struct net_bridge *br = p->br;
207 struct net_port_vlans *pv = nbp_get_vlan_info(p); 214 struct net_bridge_vlan *v;
208 bool no_vlan = !pv;
209 int i; 215 int i;
210 u16 vid;
211 216
212 spin_lock_bh(&br->hash_lock); 217 spin_lock_bh(&br->hash_lock);
213 218
219 vg = nbp_vlan_group(p);
214 /* Search all chains since old address/hash is unknown */ 220 /* Search all chains since old address/hash is unknown */
215 for (i = 0; i < BR_HASH_SIZE; i++) { 221 for (i = 0; i < BR_HASH_SIZE; i++) {
216 struct hlist_node *h; 222 struct hlist_node *h;
@@ -226,7 +232,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
226 * configured, we can safely be done at 232 * configured, we can safely be done at
227 * this point. 233 * this point.
228 */ 234 */
229 if (no_vlan) 235 if (!vg || !vg->num_vlans)
230 goto insert; 236 goto insert;
231 } 237 }
232 } 238 }
@@ -236,15 +242,15 @@ insert:
236 /* insert new address, may fail if invalid address or dup. */ 242 /* insert new address, may fail if invalid address or dup. */
237 fdb_insert(br, p, newaddr, 0); 243 fdb_insert(br, p, newaddr, 0);
238 244
239 if (no_vlan) 245 if (!vg || !vg->num_vlans)
240 goto done; 246 goto done;
241 247
242 /* Now add entries for every VLAN configured on the port. 248 /* Now add entries for every VLAN configured on the port.
243 * This function runs under RTNL so the bitmap will not change 249 * This function runs under RTNL so the bitmap will not change
244 * from under us. 250 * from under us.
245 */ 251 */
246 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) 252 list_for_each_entry(v, &vg->vlan_list, vlist)
247 fdb_insert(br, p, newaddr, vid); 253 fdb_insert(br, p, newaddr, v->vid);
248 254
249done: 255done:
250 spin_unlock_bh(&br->hash_lock); 256 spin_unlock_bh(&br->hash_lock);
@@ -252,9 +258,9 @@ done:
252 258
253void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) 259void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
254{ 260{
261 struct net_bridge_vlan_group *vg;
255 struct net_bridge_fdb_entry *f; 262 struct net_bridge_fdb_entry *f;
256 struct net_port_vlans *pv; 263 struct net_bridge_vlan *v;
257 u16 vid = 0;
258 264
259 spin_lock_bh(&br->hash_lock); 265 spin_lock_bh(&br->hash_lock);
260 266
@@ -264,20 +270,18 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
264 fdb_delete_local(br, NULL, f); 270 fdb_delete_local(br, NULL, f);
265 271
266 fdb_insert(br, NULL, newaddr, 0); 272 fdb_insert(br, NULL, newaddr, 0);
267 273 vg = br_vlan_group(br);
274 if (!vg || !vg->num_vlans)
275 goto out;
268 /* Now remove and add entries for every VLAN configured on the 276 /* Now remove and add entries for every VLAN configured on the
269 * bridge. This function runs under RTNL so the bitmap will not 277 * bridge. This function runs under RTNL so the bitmap will not
270 * change from under us. 278 * change from under us.
271 */ 279 */
272 pv = br_get_vlan_info(br); 280 list_for_each_entry(v, &vg->vlan_list, vlist) {
273 if (!pv) 281 f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
274 goto out;
275
276 for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
277 f = __br_fdb_get(br, br->dev->dev_addr, vid);
278 if (f && f->is_local && !f->dst) 282 if (f && f->is_local && !f->dst)
279 fdb_delete_local(br, NULL, f); 283 fdb_delete_local(br, NULL, f);
280 fdb_insert(br, NULL, newaddr, vid); 284 fdb_insert(br, NULL, newaddr, v->vid);
281 } 285 }
282out: 286out:
283 spin_unlock_bh(&br->hash_lock); 287 spin_unlock_bh(&br->hash_lock);
@@ -299,6 +303,8 @@ void br_fdb_cleanup(unsigned long _data)
299 unsigned long this_timer; 303 unsigned long this_timer;
300 if (f->is_static) 304 if (f->is_static)
301 continue; 305 continue;
306 if (f->added_by_external_learn)
307 continue;
302 this_timer = f->updated + delay; 308 this_timer = f->updated + delay;
303 if (time_before_eq(this_timer, jiffies)) 309 if (time_before_eq(this_timer, jiffies))
304 fdb_delete(br, f); 310 fdb_delete(br, f);
@@ -489,7 +495,9 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
489static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, 495static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
490 struct net_bridge_port *source, 496 struct net_bridge_port *source,
491 const unsigned char *addr, 497 const unsigned char *addr,
492 __u16 vid) 498 __u16 vid,
499 unsigned char is_local,
500 unsigned char is_static)
493{ 501{
494 struct net_bridge_fdb_entry *fdb; 502 struct net_bridge_fdb_entry *fdb;
495 503
@@ -498,8 +506,8 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
498 memcpy(fdb->addr.addr, addr, ETH_ALEN); 506 memcpy(fdb->addr.addr, addr, ETH_ALEN);
499 fdb->dst = source; 507 fdb->dst = source;
500 fdb->vlan_id = vid; 508 fdb->vlan_id = vid;
501 fdb->is_local = 0; 509 fdb->is_local = is_local;
502 fdb->is_static = 0; 510 fdb->is_static = is_static;
503 fdb->added_by_user = 0; 511 fdb->added_by_user = 0;
504 fdb->added_by_external_learn = 0; 512 fdb->added_by_external_learn = 0;
505 fdb->updated = fdb->used = jiffies; 513 fdb->updated = fdb->used = jiffies;
@@ -530,11 +538,10 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
530 fdb_delete(br, fdb); 538 fdb_delete(br, fdb);
531 } 539 }
532 540
533 fdb = fdb_create(head, source, addr, vid); 541 fdb = fdb_create(head, source, addr, vid, 1, 1);
534 if (!fdb) 542 if (!fdb)
535 return -ENOMEM; 543 return -ENOMEM;
536 544
537 fdb->is_local = fdb->is_static = 1;
538 fdb_add_hw_addr(br, addr); 545 fdb_add_hw_addr(br, addr);
539 fdb_notify(br, fdb, RTM_NEWNEIGH); 546 fdb_notify(br, fdb, RTM_NEWNEIGH);
540 return 0; 547 return 0;
@@ -591,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
591 } else { 598 } else {
592 spin_lock(&br->hash_lock); 599 spin_lock(&br->hash_lock);
593 if (likely(!fdb_find(head, addr, vid))) { 600 if (likely(!fdb_find(head, addr, vid))) {
594 fdb = fdb_create(head, source, addr, vid); 601 fdb = fdb_create(head, source, addr, vid, 0, 0);
595 if (fdb) { 602 if (fdb) {
596 if (unlikely(added_by_user)) 603 if (unlikely(added_by_user))
597 fdb->added_by_user = 1; 604 fdb->added_by_user = 1;
@@ -605,13 +612,14 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
605 } 612 }
606} 613}
607 614
608static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) 615static int fdb_to_nud(const struct net_bridge *br,
616 const struct net_bridge_fdb_entry *fdb)
609{ 617{
610 if (fdb->is_local) 618 if (fdb->is_local)
611 return NUD_PERMANENT; 619 return NUD_PERMANENT;
612 else if (fdb->is_static) 620 else if (fdb->is_static)
613 return NUD_NOARP; 621 return NUD_NOARP;
614 else if (has_expired(fdb->dst->br, fdb)) 622 else if (has_expired(br, fdb))
615 return NUD_STALE; 623 return NUD_STALE;
616 else 624 else
617 return NUD_REACHABLE; 625 return NUD_REACHABLE;
@@ -637,7 +645,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
637 ndm->ndm_flags = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0; 645 ndm->ndm_flags = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0;
638 ndm->ndm_type = 0; 646 ndm->ndm_type = 0;
639 ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex; 647 ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
640 ndm->ndm_state = fdb_to_nud(fdb); 648 ndm->ndm_state = fdb_to_nud(br, fdb);
641 649
642 if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) 650 if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
643 goto nla_put_failure; 651 goto nla_put_failure;
@@ -767,7 +775,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
767 if (!(flags & NLM_F_CREATE)) 775 if (!(flags & NLM_F_CREATE))
768 return -ENOENT; 776 return -ENOENT;
769 777
770 fdb = fdb_create(head, source, addr, vid); 778 fdb = fdb_create(head, source, addr, vid, 0, 0);
771 if (!fdb) 779 if (!fdb)
772 return -ENOMEM; 780 return -ENOMEM;
773 781
@@ -782,7 +790,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
782 } 790 }
783 } 791 }
784 792
785 if (fdb_to_nud(fdb) != state) { 793 if (fdb_to_nud(br, fdb) != state) {
786 if (state & NUD_PERMANENT) { 794 if (state & NUD_PERMANENT) {
787 fdb->is_local = 1; 795 fdb->is_local = 1;
788 if (!fdb->is_static) { 796 if (!fdb->is_static) {
@@ -842,9 +850,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
842 struct net_device *dev, 850 struct net_device *dev,
843 const unsigned char *addr, u16 vid, u16 nlh_flags) 851 const unsigned char *addr, u16 vid, u16 nlh_flags)
844{ 852{
845 struct net_bridge_port *p; 853 struct net_bridge_vlan_group *vg;
854 struct net_bridge_port *p = NULL;
855 struct net_bridge_vlan *v;
856 struct net_bridge *br = NULL;
846 int err = 0; 857 int err = 0;
847 struct net_port_vlans *pv;
848 858
849 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { 859 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
850 pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); 860 pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
@@ -856,34 +866,51 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
856 return -EINVAL; 866 return -EINVAL;
857 } 867 }
858 868
859 p = br_port_get_rtnl(dev); 869 if (dev->priv_flags & IFF_EBRIDGE) {
860 if (p == NULL) { 870 br = netdev_priv(dev);
861 pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", 871 vg = br_vlan_group(br);
862 dev->name); 872 } else {
863 return -EINVAL; 873 p = br_port_get_rtnl(dev);
874 if (!p) {
875 pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
876 dev->name);
877 return -EINVAL;
878 }
879 vg = nbp_vlan_group(p);
864 } 880 }
865 881
866 pv = nbp_get_vlan_info(p);
867 if (vid) { 882 if (vid) {
868 if (!pv || !test_bit(vid, pv->vlan_bitmap)) { 883 v = br_vlan_find(vg, vid);
869 pr_info("bridge: RTM_NEWNEIGH with unconfigured " 884 if (!v || !br_vlan_should_use(v)) {
870 "vlan %d on port %s\n", vid, dev->name); 885 pr_info("bridge: RTM_NEWNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
871 return -EINVAL; 886 return -EINVAL;
872 } 887 }
873 888
874 /* VID was specified, so use it. */ 889 /* VID was specified, so use it. */
875 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); 890 if (dev->priv_flags & IFF_EBRIDGE)
891 err = br_fdb_insert(br, NULL, addr, vid);
892 else
893 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
876 } else { 894 } else {
877 err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); 895 if (dev->priv_flags & IFF_EBRIDGE)
878 if (err || !pv) 896 err = br_fdb_insert(br, NULL, addr, 0);
897 else
898 err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
899 if (err || !vg || !vg->num_vlans)
879 goto out; 900 goto out;
880 901
881 /* We have vlans configured on this port and user didn't 902 /* We have vlans configured on this port and user didn't
882 * specify a VLAN. To be nice, add/update entry for every 903 * specify a VLAN. To be nice, add/update entry for every
883 * vlan on this port. 904 * vlan on this port.
884 */ 905 */
885 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { 906 list_for_each_entry(v, &vg->vlan_list, vlist) {
886 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); 907 if (!br_vlan_should_use(v))
908 continue;
909 if (dev->priv_flags & IFF_EBRIDGE)
910 err = br_fdb_insert(br, NULL, addr, v->vid);
911 else
912 err = __br_fdb_add(ndm, p, addr, nlh_flags,
913 v->vid);
887 if (err) 914 if (err)
888 goto out; 915 goto out;
889 } 916 }
@@ -893,6 +920,32 @@ out:
893 return err; 920 return err;
894} 921}
895 922
923static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
924 u16 vid)
925{
926 struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
927 struct net_bridge_fdb_entry *fdb;
928
929 fdb = fdb_find(head, addr, vid);
930 if (!fdb)
931 return -ENOENT;
932
933 fdb_delete(br, fdb);
934 return 0;
935}
936
937static int __br_fdb_delete_by_addr(struct net_bridge *br,
938 const unsigned char *addr, u16 vid)
939{
940 int err;
941
942 spin_lock_bh(&br->hash_lock);
943 err = fdb_delete_by_addr(br, addr, vid);
944 spin_unlock_bh(&br->hash_lock);
945
946 return err;
947}
948
896static int fdb_delete_by_addr_and_port(struct net_bridge_port *p, 949static int fdb_delete_by_addr_and_port(struct net_bridge_port *p,
897 const u8 *addr, u16 vlan) 950 const u8 *addr, u16 vlan)
898{ 951{
@@ -925,38 +978,53 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
925 struct net_device *dev, 978 struct net_device *dev,
926 const unsigned char *addr, u16 vid) 979 const unsigned char *addr, u16 vid)
927{ 980{
928 struct net_bridge_port *p; 981 struct net_bridge_vlan_group *vg;
982 struct net_bridge_port *p = NULL;
983 struct net_bridge_vlan *v;
984 struct net_bridge *br = NULL;
929 int err; 985 int err;
930 struct net_port_vlans *pv;
931 986
932 p = br_port_get_rtnl(dev); 987 if (dev->priv_flags & IFF_EBRIDGE) {
933 if (p == NULL) { 988 br = netdev_priv(dev);
934 pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", 989 vg = br_vlan_group(br);
935 dev->name); 990 } else {
936 return -EINVAL; 991 p = br_port_get_rtnl(dev);
992 if (!p) {
993 pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
994 dev->name);
995 return -EINVAL;
996 }
997 vg = nbp_vlan_group(p);
937 } 998 }
938 999
939 pv = nbp_get_vlan_info(p);
940 if (vid) { 1000 if (vid) {
941 if (!pv || !test_bit(vid, pv->vlan_bitmap)) { 1001 v = br_vlan_find(vg, vid);
942 pr_info("bridge: RTM_DELNEIGH with unconfigured " 1002 if (!v) {
943 "vlan %d on port %s\n", vid, dev->name); 1003 pr_info("bridge: RTM_DELNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
944 return -EINVAL; 1004 return -EINVAL;
945 } 1005 }
946 1006
947 err = __br_fdb_delete(p, addr, vid); 1007 if (dev->priv_flags & IFF_EBRIDGE)
1008 err = __br_fdb_delete_by_addr(br, addr, vid);
1009 else
1010 err = __br_fdb_delete(p, addr, vid);
948 } else { 1011 } else {
949 err = -ENOENT; 1012 err = -ENOENT;
950 err &= __br_fdb_delete(p, addr, 0); 1013 if (dev->priv_flags & IFF_EBRIDGE)
951 if (!pv) 1014 err = __br_fdb_delete_by_addr(br, addr, 0);
1015 else
1016 err &= __br_fdb_delete(p, addr, 0);
1017
1018 if (!vg || !vg->num_vlans)
952 goto out; 1019 goto out;
953 1020
954 /* We have vlans configured on this port and user didn't 1021 list_for_each_entry(v, &vg->vlan_list, vlist) {
955 * specify a VLAN. To be nice, add/update entry for every 1022 if (!br_vlan_should_use(v))
956 * vlan on this port. 1023 continue;
957 */ 1024 if (dev->priv_flags & IFF_EBRIDGE)
958 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { 1025 err = __br_fdb_delete_by_addr(br, addr, v->vid);
959 err &= __br_fdb_delete(p, addr, vid); 1026 else
1027 err &= __br_fdb_delete(p, addr, v->vid);
960 } 1028 }
961 } 1029 }
962out: 1030out:
@@ -1032,7 +1100,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
1032 head = &br->hash[br_mac_hash(addr, vid)]; 1100 head = &br->hash[br_mac_hash(addr, vid)];
1033 fdb = fdb_find(head, addr, vid); 1101 fdb = fdb_find(head, addr, vid);
1034 if (!fdb) { 1102 if (!fdb) {
1035 fdb = fdb_create(head, p, addr, vid); 1103 fdb = fdb_create(head, p, addr, vid, 0, 0);
1036 if (!fdb) { 1104 if (!fdb) {
1037 err = -ENOMEM; 1105 err = -ENOMEM;
1038 goto err_unlock; 1106 goto err_unlock;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index fa7bfced888e..fcdb86dd5a23 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -30,12 +30,14 @@ static int deliver_clone(const struct net_bridge_port *prev,
30static inline int should_deliver(const struct net_bridge_port *p, 30static inline int should_deliver(const struct net_bridge_port *p,
31 const struct sk_buff *skb) 31 const struct sk_buff *skb)
32{ 32{
33 struct net_bridge_vlan_group *vg;
34
35 vg = nbp_vlan_group_rcu(p);
33 return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && 36 return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
34 br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) && 37 br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
35 p->state == BR_STATE_FORWARDING;
36} 38}
37 39
38int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) 40int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
39{ 41{
40 if (!is_skb_forwardable(skb->dev, skb)) 42 if (!is_skb_forwardable(skb->dev, skb))
41 goto drop; 43 goto drop;
@@ -65,10 +67,10 @@ drop:
65} 67}
66EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); 68EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
67 69
68int br_forward_finish(struct sock *sk, struct sk_buff *skb) 70int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
69{ 71{
70 return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb, 72 return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
71 NULL, skb->dev, 73 net, sk, skb, NULL, skb->dev,
72 br_dev_queue_push_xmit); 74 br_dev_queue_push_xmit);
73 75
74} 76}
@@ -76,7 +78,10 @@ EXPORT_SYMBOL_GPL(br_forward_finish);
76 78
77static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) 79static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
78{ 80{
79 skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); 81 struct net_bridge_vlan_group *vg;
82
83 vg = nbp_vlan_group_rcu(to);
84 skb = br_handle_vlan(to->br, vg, skb);
80 if (!skb) 85 if (!skb)
81 return; 86 return;
82 87
@@ -92,13 +97,14 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
92 return; 97 return;
93 } 98 }
94 99
95 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, 100 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
96 NULL, skb->dev, 101 dev_net(skb->dev), NULL, skb,NULL, skb->dev,
97 br_forward_finish); 102 br_forward_finish);
98} 103}
99 104
100static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) 105static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
101{ 106{
107 struct net_bridge_vlan_group *vg;
102 struct net_device *indev; 108 struct net_device *indev;
103 109
104 if (skb_warn_if_lro(skb)) { 110 if (skb_warn_if_lro(skb)) {
@@ -106,7 +112,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
106 return; 112 return;
107 } 113 }
108 114
109 skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); 115 vg = nbp_vlan_group_rcu(to);
116 skb = br_handle_vlan(to->br, vg, skb);
110 if (!skb) 117 if (!skb)
111 return; 118 return;
112 119
@@ -114,8 +121,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
114 skb->dev = to->dev; 121 skb->dev = to->dev;
115 skb_forward_csum(skb); 122 skb_forward_csum(skb);
116 123
117 NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb, 124 NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,
118 indev, skb->dev, 125 dev_net(indev), NULL, skb, indev, skb->dev,
119 br_forward_finish); 126 br_forward_finish);
120} 127}
121 128
@@ -134,7 +141,7 @@ EXPORT_SYMBOL_GPL(br_deliver);
134/* called with rcu_read_lock */ 141/* called with rcu_read_lock */
135void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) 142void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0)
136{ 143{
137 if (should_deliver(to, skb)) { 144 if (to && should_deliver(to, skb)) {
138 if (skb0) 145 if (skb0)
139 deliver_clone(to, skb, __br_forward); 146 deliver_clone(to, skb, __br_forward);
140 else 147 else
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 45e4757c6fd2..ec02f5869a78 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -24,6 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <net/sock.h> 25#include <net/sock.h>
26#include <linux/if_vlan.h> 26#include <linux/if_vlan.h>
27#include <net/switchdev.h>
27 28
28#include "br_private.h" 29#include "br_private.h"
29 30
@@ -250,6 +251,8 @@ static void del_nbp(struct net_bridge_port *p)
250 251
251 nbp_vlan_flush(p); 252 nbp_vlan_flush(p);
252 br_fdb_delete_by_port(br, p, 0, 1); 253 br_fdb_delete_by_port(br, p, 0, 1);
254 switchdev_deferred_process();
255
253 nbp_update_port_count(br); 256 nbp_update_port_count(br);
254 257
255 netdev_upper_dev_unlink(dev, br->dev); 258 netdev_upper_dev_unlink(dev, br->dev);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f921a5dce22d..f7fba74108a9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,38 +26,44 @@
26br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; 26br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
27EXPORT_SYMBOL(br_should_route_hook); 27EXPORT_SYMBOL(br_should_route_hook);
28 28
29static int
30br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
31{
32 return netif_receive_skb(skb);
33}
34
29static int br_pass_frame_up(struct sk_buff *skb) 35static int br_pass_frame_up(struct sk_buff *skb)
30{ 36{
31 struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; 37 struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
32 struct net_bridge *br = netdev_priv(brdev); 38 struct net_bridge *br = netdev_priv(brdev);
39 struct net_bridge_vlan_group *vg;
33 struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); 40 struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
34 struct net_port_vlans *pv;
35 41
36 u64_stats_update_begin(&brstats->syncp); 42 u64_stats_update_begin(&brstats->syncp);
37 brstats->rx_packets++; 43 brstats->rx_packets++;
38 brstats->rx_bytes += skb->len; 44 brstats->rx_bytes += skb->len;
39 u64_stats_update_end(&brstats->syncp); 45 u64_stats_update_end(&brstats->syncp);
40 46
47 vg = br_vlan_group_rcu(br);
41 /* Bridge is just like any other port. Make sure the 48 /* Bridge is just like any other port. Make sure the
42 * packet is allowed except in promisc modue when someone 49 * packet is allowed except in promisc modue when someone
43 * may be running packet capture. 50 * may be running packet capture.
44 */ 51 */
45 pv = br_get_vlan_info(br);
46 if (!(brdev->flags & IFF_PROMISC) && 52 if (!(brdev->flags & IFF_PROMISC) &&
47 !br_allowed_egress(br, pv, skb)) { 53 !br_allowed_egress(vg, skb)) {
48 kfree_skb(skb); 54 kfree_skb(skb);
49 return NET_RX_DROP; 55 return NET_RX_DROP;
50 } 56 }
51 57
52 indev = skb->dev; 58 indev = skb->dev;
53 skb->dev = brdev; 59 skb->dev = brdev;
54 skb = br_handle_vlan(br, pv, skb); 60 skb = br_handle_vlan(br, vg, skb);
55 if (!skb) 61 if (!skb)
56 return NET_RX_DROP; 62 return NET_RX_DROP;
57 63
58 return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, 64 return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
59 indev, NULL, 65 dev_net(indev), NULL, skb, indev, NULL,
60 netif_receive_skb_sk); 66 br_netif_receive_skb);
61} 67}
62 68
63static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, 69static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
@@ -120,7 +126,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
120} 126}
121 127
122/* note: already called with rcu_read_lock */ 128/* note: already called with rcu_read_lock */
123int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb) 129int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
124{ 130{
125 const unsigned char *dest = eth_hdr(skb)->h_dest; 131 const unsigned char *dest = eth_hdr(skb)->h_dest;
126 struct net_bridge_port *p = br_port_get_rcu(skb->dev); 132 struct net_bridge_port *p = br_port_get_rcu(skb->dev);
@@ -134,7 +140,7 @@ int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
134 if (!p || p->state == BR_STATE_DISABLED) 140 if (!p || p->state == BR_STATE_DISABLED)
135 goto drop; 141 goto drop;
136 142
137 if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid)) 143 if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
138 goto out; 144 goto out;
139 145
140 /* insert into forwarding database after filtering to avoid spoofing */ 146 /* insert into forwarding database after filtering to avoid spoofing */
@@ -208,7 +214,7 @@ drop:
208EXPORT_SYMBOL_GPL(br_handle_frame_finish); 214EXPORT_SYMBOL_GPL(br_handle_frame_finish);
209 215
210/* note: already called with rcu_read_lock */ 216/* note: already called with rcu_read_lock */
211static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb) 217static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
212{ 218{
213 struct net_bridge_port *p = br_port_get_rcu(skb->dev); 219 struct net_bridge_port *p = br_port_get_rcu(skb->dev);
214 u16 vid = 0; 220 u16 vid = 0;
@@ -278,8 +284,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
278 } 284 }
279 285
280 /* Deliver packet to local host only */ 286 /* Deliver packet to local host only */
281 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, 287 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
282 skb->dev, NULL, br_handle_local_finish)) { 288 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
289 br_handle_local_finish)) {
283 return RX_HANDLER_CONSUMED; /* consumed by filter */ 290 return RX_HANDLER_CONSUMED; /* consumed by filter */
284 } else { 291 } else {
285 *pskb = skb; 292 *pskb = skb;
@@ -303,8 +310,8 @@ forward:
303 if (ether_addr_equal(p->br->dev->dev_addr, dest)) 310 if (ether_addr_equal(p->br->dev->dev_addr, dest))
304 skb->pkt_type = PACKET_HOST; 311 skb->pkt_type = PACKET_HOST;
305 312
306 NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb, 313 NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
307 skb->dev, NULL, 314 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
308 br_handle_frame_finish); 315 br_handle_frame_finish);
309 break; 316 break;
310 default: 317 default:
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 8d423bc649b9..263b4de4de57 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
200 if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) 200 if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
201 return -EPERM; 201 return -EPERM;
202 202
203 br->ageing_time = clock_t_to_jiffies(args[1]); 203 return br_set_ageing_time(br, args[1]);
204 return 0;
205 204
206 case BRCTL_GET_PORT_INFO: 205 case BRCTL_GET_PORT_INFO:
207 { 206 {
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index d747275fad18..cd8deea2d074 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -464,11 +464,11 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
464static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) 464static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
465{ 465{
466 struct net *net = sock_net(skb->sk); 466 struct net *net = sock_net(skb->sk);
467 unsigned short vid = VLAN_N_VID; 467 struct net_bridge_vlan_group *vg;
468 struct net_device *dev, *pdev; 468 struct net_device *dev, *pdev;
469 struct br_mdb_entry *entry; 469 struct br_mdb_entry *entry;
470 struct net_bridge_port *p; 470 struct net_bridge_port *p;
471 struct net_port_vlans *pv; 471 struct net_bridge_vlan *v;
472 struct net_bridge *br; 472 struct net_bridge *br;
473 int err; 473 int err;
474 474
@@ -489,10 +489,10 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
489 if (!p || p->br != br || p->state == BR_STATE_DISABLED) 489 if (!p || p->br != br || p->state == BR_STATE_DISABLED)
490 return -EINVAL; 490 return -EINVAL;
491 491
492 pv = nbp_get_vlan_info(p); 492 vg = nbp_vlan_group(p);
493 if (br_vlan_enabled(br) && pv && entry->vid == 0) { 493 if (br_vlan_enabled(br) && vg && entry->vid == 0) {
494 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { 494 list_for_each_entry(v, &vg->vlan_list, vlist) {
495 entry->vid = vid; 495 entry->vid = v->vid;
496 err = __br_mdb_add(net, br, entry); 496 err = __br_mdb_add(net, br, entry);
497 if (err) 497 if (err)
498 break; 498 break;
@@ -566,11 +566,11 @@ unlock:
566static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) 566static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
567{ 567{
568 struct net *net = sock_net(skb->sk); 568 struct net *net = sock_net(skb->sk);
569 unsigned short vid = VLAN_N_VID; 569 struct net_bridge_vlan_group *vg;
570 struct net_device *dev, *pdev; 570 struct net_device *dev, *pdev;
571 struct br_mdb_entry *entry; 571 struct br_mdb_entry *entry;
572 struct net_bridge_port *p; 572 struct net_bridge_port *p;
573 struct net_port_vlans *pv; 573 struct net_bridge_vlan *v;
574 struct net_bridge *br; 574 struct net_bridge *br;
575 int err; 575 int err;
576 576
@@ -591,10 +591,10 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
591 if (!p || p->br != br || p->state == BR_STATE_DISABLED) 591 if (!p || p->br != br || p->state == BR_STATE_DISABLED)
592 return -EINVAL; 592 return -EINVAL;
593 593
594 pv = nbp_get_vlan_info(p); 594 vg = nbp_vlan_group(p);
595 if (br_vlan_enabled(br) && pv && entry->vid == 0) { 595 if (br_vlan_enabled(br) && vg && entry->vid == 0) {
596 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { 596 list_for_each_entry(v, &vg->vlan_list, vlist) {
597 entry->vid = vid; 597 entry->vid = v->vid;
598 err = __br_mdb_del(br, entry); 598 err = __br_mdb_del(br, entry);
599 if (!err) 599 if (!err)
600 __br_mdb_notify(dev, entry, RTM_DELMDB); 600 __br_mdb_notify(dev, entry, RTM_DELMDB);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 480b3de1a0e3..03661d97463c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -829,8 +829,8 @@ static void __br_multicast_send_query(struct net_bridge *br,
829 829
830 if (port) { 830 if (port) {
831 skb->dev = port->dev; 831 skb->dev = port->dev;
832 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, 832 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
833 NULL, skb->dev, 833 dev_net(port->dev), NULL, skb, NULL, skb->dev,
834 br_dev_queue_push_xmit); 834 br_dev_queue_push_xmit);
835 } else { 835 } else {
836 br_multicast_select_own_querier(br, ip, skb); 836 br_multicast_select_own_querier(br, ip, skb);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 0a6f095bb0c9..7ddbe7ec81d6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -111,7 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
111/* largest possible L2 header, see br_nf_dev_queue_xmit() */ 111/* largest possible L2 header, see br_nf_dev_queue_xmit() */
112#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 112#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
113 113
114#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
115struct brnf_frag_data { 114struct brnf_frag_data {
116 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 115 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
117 u8 encap_size; 116 u8 encap_size;
@@ -121,7 +120,6 @@ struct brnf_frag_data {
121}; 120};
122 121
123static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 122static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
124#endif
125 123
126static void nf_bridge_info_free(struct sk_buff *skb) 124static void nf_bridge_info_free(struct sk_buff *skb)
127{ 125{
@@ -189,10 +187,9 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
189 * expected format 187 * expected format
190 */ 188 */
191 189
192static int br_validate_ipv4(struct sk_buff *skb) 190static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
193{ 191{
194 const struct iphdr *iph; 192 const struct iphdr *iph;
195 struct net_device *dev = skb->dev;
196 u32 len; 193 u32 len;
197 194
198 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 195 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
@@ -213,13 +210,13 @@ static int br_validate_ipv4(struct sk_buff *skb)
213 210
214 len = ntohs(iph->tot_len); 211 len = ntohs(iph->tot_len);
215 if (skb->len < len) { 212 if (skb->len < len) {
216 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); 213 IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
217 goto drop; 214 goto drop;
218 } else if (len < (iph->ihl*4)) 215 } else if (len < (iph->ihl*4))
219 goto inhdr_error; 216 goto inhdr_error;
220 217
221 if (pskb_trim_rcsum(skb, len)) { 218 if (pskb_trim_rcsum(skb, len)) {
222 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); 219 IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
223 goto drop; 220 goto drop;
224 } 221 }
225 222
@@ -232,7 +229,7 @@ static int br_validate_ipv4(struct sk_buff *skb)
232 return 0; 229 return 0;
233 230
234inhdr_error: 231inhdr_error:
235 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); 232 IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
236drop: 233drop:
237 return -1; 234 return -1;
238} 235}
@@ -256,7 +253,7 @@ void nf_bridge_update_protocol(struct sk_buff *skb)
256 * don't, we use the neighbour framework to find out. In both cases, we make 253 * don't, we use the neighbour framework to find out. In both cases, we make
257 * sure that br_handle_frame_finish() is called afterwards. 254 * sure that br_handle_frame_finish() is called afterwards.
258 */ 255 */
259int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) 256int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb)
260{ 257{
261 struct neighbour *neigh; 258 struct neighbour *neigh;
262 struct dst_entry *dst; 259 struct dst_entry *dst;
@@ -273,7 +270,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
273 if (neigh->hh.hh_len) { 270 if (neigh->hh.hh_len) {
274 neigh_hh_bridge(&neigh->hh, skb); 271 neigh_hh_bridge(&neigh->hh, skb);
275 skb->dev = nf_bridge->physindev; 272 skb->dev = nf_bridge->physindev;
276 ret = br_handle_frame_finish(sk, skb); 273 ret = br_handle_frame_finish(net, sk, skb);
277 } else { 274 } else {
278 /* the neighbour function below overwrites the complete 275 /* the neighbour function below overwrites the complete
279 * MAC header, so we save the Ethernet source address and 276 * MAC header, so we save the Ethernet source address and
@@ -342,7 +339,7 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
342 * device, we proceed as if ip_route_input() succeeded. If it differs from the 339 * device, we proceed as if ip_route_input() succeeded. If it differs from the
343 * logical bridge port or if ip_route_output_key() fails we drop the packet. 340 * logical bridge port or if ip_route_output_key() fails we drop the packet.
344 */ 341 */
345static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) 342static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
346{ 343{
347 struct net_device *dev = skb->dev; 344 struct net_device *dev = skb->dev;
348 struct iphdr *iph = ip_hdr(skb); 345 struct iphdr *iph = ip_hdr(skb);
@@ -371,7 +368,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
371 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 368 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
372 goto free_skb; 369 goto free_skb;
373 370
374 rt = ip_route_output(dev_net(dev), iph->daddr, 0, 371 rt = ip_route_output(net, iph->daddr, 0,
375 RT_TOS(iph->tos), 0); 372 RT_TOS(iph->tos), 0);
376 if (!IS_ERR(rt)) { 373 if (!IS_ERR(rt)) {
377 /* - Bridged-and-DNAT'ed traffic doesn't 374 /* - Bridged-and-DNAT'ed traffic doesn't
@@ -393,7 +390,7 @@ bridged_dnat:
393 nf_bridge_push_encap_header(skb); 390 nf_bridge_push_encap_header(skb);
394 NF_HOOK_THRESH(NFPROTO_BRIDGE, 391 NF_HOOK_THRESH(NFPROTO_BRIDGE,
395 NF_BR_PRE_ROUTING, 392 NF_BR_PRE_ROUTING,
396 sk, skb, skb->dev, NULL, 393 net, sk, skb, skb->dev, NULL,
397 br_nf_pre_routing_finish_bridge, 394 br_nf_pre_routing_finish_bridge,
398 1); 395 1);
399 return 0; 396 return 0;
@@ -413,7 +410,7 @@ bridged_dnat:
413 skb->dev = nf_bridge->physindev; 410 skb->dev = nf_bridge->physindev;
414 nf_bridge_update_protocol(skb); 411 nf_bridge_update_protocol(skb);
415 nf_bridge_push_encap_header(skb); 412 nf_bridge_push_encap_header(skb);
416 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, 413 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
417 skb->dev, NULL, 414 skb->dev, NULL,
418 br_handle_frame_finish, 1); 415 br_handle_frame_finish, 1);
419 416
@@ -464,7 +461,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
464 * receiving device) to make netfilter happy, the REDIRECT 461 * receiving device) to make netfilter happy, the REDIRECT
465 * target in particular. Save the original destination IP 462 * target in particular. Save the original destination IP
466 * address to be able to detect DNAT afterwards. */ 463 * address to be able to detect DNAT afterwards. */
467static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, 464static unsigned int br_nf_pre_routing(void *priv,
468 struct sk_buff *skb, 465 struct sk_buff *skb,
469 const struct nf_hook_state *state) 466 const struct nf_hook_state *state)
470{ 467{
@@ -486,7 +483,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
486 return NF_ACCEPT; 483 return NF_ACCEPT;
487 484
488 nf_bridge_pull_encap_header_rcsum(skb); 485 nf_bridge_pull_encap_header_rcsum(skb);
489 return br_nf_pre_routing_ipv6(ops, skb, state); 486 return br_nf_pre_routing_ipv6(priv, skb, state);
490 } 487 }
491 488
492 if (!brnf_call_iptables && !br->nf_call_iptables) 489 if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -497,7 +494,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
497 494
498 nf_bridge_pull_encap_header_rcsum(skb); 495 nf_bridge_pull_encap_header_rcsum(skb);
499 496
500 if (br_validate_ipv4(skb)) 497 if (br_validate_ipv4(state->net, skb))
501 return NF_DROP; 498 return NF_DROP;
502 499
503 nf_bridge_put(skb->nf_bridge); 500 nf_bridge_put(skb->nf_bridge);
@@ -511,7 +508,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
511 508
512 skb->protocol = htons(ETH_P_IP); 509 skb->protocol = htons(ETH_P_IP);
513 510
514 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, 511 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
515 skb->dev, NULL, 512 skb->dev, NULL,
516 br_nf_pre_routing_finish); 513 br_nf_pre_routing_finish);
517 514
@@ -526,7 +523,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
526 * took place when the packet entered the bridge), but we 523 * took place when the packet entered the bridge), but we
527 * register an IPv4 PRE_ROUTING 'sabotage' hook that will 524 * register an IPv4 PRE_ROUTING 'sabotage' hook that will
528 * prevent this from happening. */ 525 * prevent this from happening. */
529static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, 526static unsigned int br_nf_local_in(void *priv,
530 struct sk_buff *skb, 527 struct sk_buff *skb,
531 const struct nf_hook_state *state) 528 const struct nf_hook_state *state)
532{ 529{
@@ -535,7 +532,7 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
535} 532}
536 533
537/* PF_BRIDGE/FORWARD *************************************************/ 534/* PF_BRIDGE/FORWARD *************************************************/
538static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) 535static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
539{ 536{
540 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 537 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
541 struct net_device *in; 538 struct net_device *in;
@@ -559,7 +556,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
559 } 556 }
560 nf_bridge_push_encap_header(skb); 557 nf_bridge_push_encap_header(skb);
561 558
562 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb, 559 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb,
563 in, skb->dev, br_forward_finish, 1); 560 in, skb->dev, br_forward_finish, 1);
564 return 0; 561 return 0;
565} 562}
@@ -570,7 +567,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
570 * but we are still able to filter on the 'real' indev/outdev 567 * but we are still able to filter on the 'real' indev/outdev
571 * because of the physdev module. For ARP, indev and outdev are the 568 * because of the physdev module. For ARP, indev and outdev are the
572 * bridge ports. */ 569 * bridge ports. */
573static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, 570static unsigned int br_nf_forward_ip(void *priv,
574 struct sk_buff *skb, 571 struct sk_buff *skb,
575 const struct nf_hook_state *state) 572 const struct nf_hook_state *state)
576{ 573{
@@ -609,13 +606,13 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
609 } 606 }
610 607
611 if (pf == NFPROTO_IPV4) { 608 if (pf == NFPROTO_IPV4) {
612 if (br_validate_ipv4(skb)) 609 if (br_validate_ipv4(state->net, skb))
613 return NF_DROP; 610 return NF_DROP;
614 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 611 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
615 } 612 }
616 613
617 if (pf == NFPROTO_IPV6) { 614 if (pf == NFPROTO_IPV6) {
618 if (br_validate_ipv6(skb)) 615 if (br_validate_ipv6(state->net, skb))
619 return NF_DROP; 616 return NF_DROP;
620 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 617 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
621 } 618 }
@@ -626,14 +623,14 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
626 else 623 else
627 skb->protocol = htons(ETH_P_IPV6); 624 skb->protocol = htons(ETH_P_IPV6);
628 625
629 NF_HOOK(pf, NF_INET_FORWARD, NULL, skb, 626 NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
630 brnf_get_logical_dev(skb, state->in), 627 brnf_get_logical_dev(skb, state->in),
631 parent, br_nf_forward_finish); 628 parent, br_nf_forward_finish);
632 629
633 return NF_STOLEN; 630 return NF_STOLEN;
634} 631}
635 632
636static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, 633static unsigned int br_nf_forward_arp(void *priv,
637 struct sk_buff *skb, 634 struct sk_buff *skb,
638 const struct nf_hook_state *state) 635 const struct nf_hook_state *state)
639{ 636{
@@ -661,14 +658,13 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
661 return NF_ACCEPT; 658 return NF_ACCEPT;
662 } 659 }
663 *d = state->in; 660 *d = state->in;
664 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb, 661 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb,
665 state->in, state->out, br_nf_forward_finish); 662 state->in, state->out, br_nf_forward_finish);
666 663
667 return NF_STOLEN; 664 return NF_STOLEN;
668} 665}
669 666
670#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 667static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
671static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
672{ 668{
673 struct brnf_frag_data *data; 669 struct brnf_frag_data *data;
674 int err; 670 int err;
@@ -690,30 +686,26 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
690 __skb_push(skb, data->encap_size); 686 __skb_push(skb, data->encap_size);
691 687
692 nf_bridge_info_free(skb); 688 nf_bridge_info_free(skb);
693 return br_dev_queue_push_xmit(sk, skb); 689 return br_dev_queue_push_xmit(net, sk, skb);
694} 690}
695#endif
696 691
697#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) 692static int
698static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, 693br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
699 int (*output)(struct sock *, struct sk_buff *)) 694 int (*output)(struct net *, struct sock *, struct sk_buff *))
700{ 695{
701 unsigned int mtu = ip_skb_dst_mtu(skb); 696 unsigned int mtu = ip_skb_dst_mtu(skb);
702 struct iphdr *iph = ip_hdr(skb); 697 struct iphdr *iph = ip_hdr(skb);
703 struct rtable *rt = skb_rtable(skb);
704 struct net_device *dev = rt->dst.dev;
705 698
706 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || 699 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
707 (IPCB(skb)->frag_max_size && 700 (IPCB(skb)->frag_max_size &&
708 IPCB(skb)->frag_max_size > mtu))) { 701 IPCB(skb)->frag_max_size > mtu))) {
709 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 702 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
710 kfree_skb(skb); 703 kfree_skb(skb);
711 return -EMSGSIZE; 704 return -EMSGSIZE;
712 } 705 }
713 706
714 return ip_do_fragment(sk, skb, output); 707 return ip_do_fragment(net, sk, skb, output);
715} 708}
716#endif
717 709
718static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 710static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
719{ 711{
@@ -722,7 +714,7 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
722 return 0; 714 return 0;
723} 715}
724 716
725static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) 717static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
726{ 718{
727 struct nf_bridge_info *nf_bridge; 719 struct nf_bridge_info *nf_bridge;
728 unsigned int mtu_reserved; 720 unsigned int mtu_reserved;
@@ -731,19 +723,19 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
731 723
732 if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { 724 if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
733 nf_bridge_info_free(skb); 725 nf_bridge_info_free(skb);
734 return br_dev_queue_push_xmit(sk, skb); 726 return br_dev_queue_push_xmit(net, sk, skb);
735 } 727 }
736 728
737 nf_bridge = nf_bridge_info_get(skb); 729 nf_bridge = nf_bridge_info_get(skb);
738 730
739#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
740 /* This is wrong! We should preserve the original fragment 731 /* This is wrong! We should preserve the original fragment
741 * boundaries by preserving frag_list rather than refragmenting. 732 * boundaries by preserving frag_list rather than refragmenting.
742 */ 733 */
743 if (skb->protocol == htons(ETH_P_IP)) { 734 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) &&
735 skb->protocol == htons(ETH_P_IP)) {
744 struct brnf_frag_data *data; 736 struct brnf_frag_data *data;
745 737
746 if (br_validate_ipv4(skb)) 738 if (br_validate_ipv4(net, skb))
747 goto drop; 739 goto drop;
748 740
749 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 741 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
@@ -760,15 +752,14 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
760 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 752 skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
761 data->size); 753 data->size);
762 754
763 return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); 755 return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
764 } 756 }
765#endif 757 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
766#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 758 skb->protocol == htons(ETH_P_IPV6)) {
767 if (skb->protocol == htons(ETH_P_IPV6)) {
768 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 759 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
769 struct brnf_frag_data *data; 760 struct brnf_frag_data *data;
770 761
771 if (br_validate_ipv6(skb)) 762 if (br_validate_ipv6(net, skb))
772 goto drop; 763 goto drop;
773 764
774 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 765 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
@@ -783,21 +774,20 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
783 data->size); 774 data->size);
784 775
785 if (v6ops) 776 if (v6ops)
786 return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); 777 return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
787 778
788 kfree_skb(skb); 779 kfree_skb(skb);
789 return -EMSGSIZE; 780 return -EMSGSIZE;
790 } 781 }
791#endif
792 nf_bridge_info_free(skb); 782 nf_bridge_info_free(skb);
793 return br_dev_queue_push_xmit(sk, skb); 783 return br_dev_queue_push_xmit(net, sk, skb);
794 drop: 784 drop:
795 kfree_skb(skb); 785 kfree_skb(skb);
796 return 0; 786 return 0;
797} 787}
798 788
799/* PF_BRIDGE/POST_ROUTING ********************************************/ 789/* PF_BRIDGE/POST_ROUTING ********************************************/
800static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, 790static unsigned int br_nf_post_routing(void *priv,
801 struct sk_buff *skb, 791 struct sk_buff *skb,
802 const struct nf_hook_state *state) 792 const struct nf_hook_state *state)
803{ 793{
@@ -836,7 +826,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
836 else 826 else
837 skb->protocol = htons(ETH_P_IPV6); 827 skb->protocol = htons(ETH_P_IPV6);
838 828
839 NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb, 829 NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb,
840 NULL, realoutdev, 830 NULL, realoutdev,
841 br_nf_dev_queue_xmit); 831 br_nf_dev_queue_xmit);
842 832
@@ -846,7 +836,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
846/* IP/SABOTAGE *****************************************************/ 836/* IP/SABOTAGE *****************************************************/
847/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 837/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
848 * for the second time. */ 838 * for the second time. */
849static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, 839static unsigned int ip_sabotage_in(void *priv,
850 struct sk_buff *skb, 840 struct sk_buff *skb,
851 const struct nf_hook_state *state) 841 const struct nf_hook_state *state)
852{ 842{
@@ -880,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
880 skb->dev = nf_bridge->physindev; 870 skb->dev = nf_bridge->physindev;
881 871
882 nf_bridge->physoutdev = NULL; 872 nf_bridge->physoutdev = NULL;
883 br_handle_frame_finish(NULL, skb); 873 br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
884} 874}
885 875
886static int br_nf_dev_xmit(struct sk_buff *skb) 876static int br_nf_dev_xmit(struct sk_buff *skb)
@@ -906,49 +896,42 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
906static struct nf_hook_ops br_nf_ops[] __read_mostly = { 896static struct nf_hook_ops br_nf_ops[] __read_mostly = {
907 { 897 {
908 .hook = br_nf_pre_routing, 898 .hook = br_nf_pre_routing,
909 .owner = THIS_MODULE,
910 .pf = NFPROTO_BRIDGE, 899 .pf = NFPROTO_BRIDGE,
911 .hooknum = NF_BR_PRE_ROUTING, 900 .hooknum = NF_BR_PRE_ROUTING,
912 .priority = NF_BR_PRI_BRNF, 901 .priority = NF_BR_PRI_BRNF,
913 }, 902 },
914 { 903 {
915 .hook = br_nf_local_in, 904 .hook = br_nf_local_in,
916 .owner = THIS_MODULE,
917 .pf = NFPROTO_BRIDGE, 905 .pf = NFPROTO_BRIDGE,
918 .hooknum = NF_BR_LOCAL_IN, 906 .hooknum = NF_BR_LOCAL_IN,
919 .priority = NF_BR_PRI_BRNF, 907 .priority = NF_BR_PRI_BRNF,
920 }, 908 },
921 { 909 {
922 .hook = br_nf_forward_ip, 910 .hook = br_nf_forward_ip,
923 .owner = THIS_MODULE,
924 .pf = NFPROTO_BRIDGE, 911 .pf = NFPROTO_BRIDGE,
925 .hooknum = NF_BR_FORWARD, 912 .hooknum = NF_BR_FORWARD,
926 .priority = NF_BR_PRI_BRNF - 1, 913 .priority = NF_BR_PRI_BRNF - 1,
927 }, 914 },
928 { 915 {
929 .hook = br_nf_forward_arp, 916 .hook = br_nf_forward_arp,
930 .owner = THIS_MODULE,
931 .pf = NFPROTO_BRIDGE, 917 .pf = NFPROTO_BRIDGE,
932 .hooknum = NF_BR_FORWARD, 918 .hooknum = NF_BR_FORWARD,
933 .priority = NF_BR_PRI_BRNF, 919 .priority = NF_BR_PRI_BRNF,
934 }, 920 },
935 { 921 {
936 .hook = br_nf_post_routing, 922 .hook = br_nf_post_routing,
937 .owner = THIS_MODULE,
938 .pf = NFPROTO_BRIDGE, 923 .pf = NFPROTO_BRIDGE,
939 .hooknum = NF_BR_POST_ROUTING, 924 .hooknum = NF_BR_POST_ROUTING,
940 .priority = NF_BR_PRI_LAST, 925 .priority = NF_BR_PRI_LAST,
941 }, 926 },
942 { 927 {
943 .hook = ip_sabotage_in, 928 .hook = ip_sabotage_in,
944 .owner = THIS_MODULE,
945 .pf = NFPROTO_IPV4, 929 .pf = NFPROTO_IPV4,
946 .hooknum = NF_INET_PRE_ROUTING, 930 .hooknum = NF_INET_PRE_ROUTING,
947 .priority = NF_IP_PRI_FIRST, 931 .priority = NF_IP_PRI_FIRST,
948 }, 932 },
949 { 933 {
950 .hook = ip_sabotage_in, 934 .hook = ip_sabotage_in,
951 .owner = THIS_MODULE,
952 .pf = NFPROTO_IPV6, 935 .pf = NFPROTO_IPV6,
953 .hooknum = NF_INET_PRE_ROUTING, 936 .hooknum = NF_INET_PRE_ROUTING,
954 .priority = NF_IP6_PRI_FIRST, 937 .priority = NF_IP6_PRI_FIRST,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 77383bfe7ea3..d61f56efc8dc 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -100,10 +100,9 @@ bad:
100 return -1; 100 return -1;
101} 101}
102 102
103int br_validate_ipv6(struct sk_buff *skb) 103int br_validate_ipv6(struct net *net, struct sk_buff *skb)
104{ 104{
105 const struct ipv6hdr *hdr; 105 const struct ipv6hdr *hdr;
106 struct net_device *dev = skb->dev;
107 struct inet6_dev *idev = __in6_dev_get(skb->dev); 106 struct inet6_dev *idev = __in6_dev_get(skb->dev);
108 u32 pkt_len; 107 u32 pkt_len;
109 u8 ip6h_len = sizeof(struct ipv6hdr); 108 u8 ip6h_len = sizeof(struct ipv6hdr);
@@ -123,12 +122,12 @@ int br_validate_ipv6(struct sk_buff *skb)
123 122
124 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { 123 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
125 if (pkt_len + ip6h_len > skb->len) { 124 if (pkt_len + ip6h_len > skb->len) {
126 IP6_INC_STATS_BH(dev_net(dev), idev, 125 IP6_INC_STATS_BH(net, idev,
127 IPSTATS_MIB_INTRUNCATEDPKTS); 126 IPSTATS_MIB_INTRUNCATEDPKTS);
128 goto drop; 127 goto drop;
129 } 128 }
130 if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { 129 if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
131 IP6_INC_STATS_BH(dev_net(dev), idev, 130 IP6_INC_STATS_BH(net, idev,
132 IPSTATS_MIB_INDISCARDS); 131 IPSTATS_MIB_INDISCARDS);
133 goto drop; 132 goto drop;
134 } 133 }
@@ -143,7 +142,7 @@ int br_validate_ipv6(struct sk_buff *skb)
143 return 0; 142 return 0;
144 143
145inhdr_error: 144inhdr_error:
146 IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); 145 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
147drop: 146drop:
148 return -1; 147 return -1;
149} 148}
@@ -161,7 +160,7 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb,
161 * for br_nf_pre_routing_finish(), same logic is used here but 160 * for br_nf_pre_routing_finish(), same logic is used here but
162 * equivalent IPv6 function ip6_route_input() called indirectly. 161 * equivalent IPv6 function ip6_route_input() called indirectly.
163 */ 162 */
164static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) 163static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb)
165{ 164{
166 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 165 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
167 struct rtable *rt; 166 struct rtable *rt;
@@ -189,7 +188,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
189 nf_bridge_update_protocol(skb); 188 nf_bridge_update_protocol(skb);
190 nf_bridge_push_encap_header(skb); 189 nf_bridge_push_encap_header(skb);
191 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, 190 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
192 sk, skb, skb->dev, NULL, 191 net, sk, skb, skb->dev, NULL,
193 br_nf_pre_routing_finish_bridge, 192 br_nf_pre_routing_finish_bridge,
194 1); 193 1);
195 return 0; 194 return 0;
@@ -208,7 +207,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
208 skb->dev = nf_bridge->physindev; 207 skb->dev = nf_bridge->physindev;
209 nf_bridge_update_protocol(skb); 208 nf_bridge_update_protocol(skb);
210 nf_bridge_push_encap_header(skb); 209 nf_bridge_push_encap_header(skb);
211 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, 210 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
212 skb->dev, NULL, 211 skb->dev, NULL,
213 br_handle_frame_finish, 1); 212 br_handle_frame_finish, 1);
214 213
@@ -218,13 +217,13 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
218/* Replicate the checks that IPv6 does on packet reception and pass the packet 217/* Replicate the checks that IPv6 does on packet reception and pass the packet
219 * to ip6tables. 218 * to ip6tables.
220 */ 219 */
221unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, 220unsigned int br_nf_pre_routing_ipv6(void *priv,
222 struct sk_buff *skb, 221 struct sk_buff *skb,
223 const struct nf_hook_state *state) 222 const struct nf_hook_state *state)
224{ 223{
225 struct nf_bridge_info *nf_bridge; 224 struct nf_bridge_info *nf_bridge;
226 225
227 if (br_validate_ipv6(skb)) 226 if (br_validate_ipv6(state->net, skb))
228 return NF_DROP; 227 return NF_DROP;
229 228
230 nf_bridge_put(skb->nf_bridge); 229 nf_bridge_put(skb->nf_bridge);
@@ -237,7 +236,7 @@ unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
237 nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; 236 nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
238 237
239 skb->protocol = htons(ETH_P_IPV6); 238 skb->protocol = htons(ETH_P_IPV6);
240 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, 239 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
241 skb->dev, NULL, 240 skb->dev, NULL,
242 br_nf_pre_routing_finish_ipv6); 241 br_nf_pre_routing_finish_ipv6);
243 242
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ea748c93a07f..40197ff8918a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -21,36 +21,35 @@
21#include "br_private.h" 21#include "br_private.h"
22#include "br_private_stp.h" 22#include "br_private_stp.h"
23 23
24static int br_get_num_vlan_infos(const struct net_port_vlans *pv, 24static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
25 u32 filter_mask) 25 u32 filter_mask)
26{ 26{
27 u16 vid_range_start = 0, vid_range_end = 0; 27 struct net_bridge_vlan *v;
28 u16 vid_range_flags = 0; 28 u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0;
29 u16 pvid, vid, flags; 29 u16 flags, pvid;
30 int num_vlans = 0; 30 int num_vlans = 0;
31 31
32 if (filter_mask & RTEXT_FILTER_BRVLAN)
33 return pv->num_vlans;
34
35 if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) 32 if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
36 return 0; 33 return 0;
37 34
38 /* Count number of vlan info's 35 pvid = br_get_pvid(vg);
39 */ 36 /* Count number of vlan infos */
40 pvid = br_get_pvid(pv); 37 list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
41 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
42 flags = 0; 38 flags = 0;
43 if (vid == pvid) 39 /* only a context, bridge vlan not activated */
40 if (!br_vlan_should_use(v))
41 continue;
42 if (v->vid == pvid)
44 flags |= BRIDGE_VLAN_INFO_PVID; 43 flags |= BRIDGE_VLAN_INFO_PVID;
45 44
46 if (test_bit(vid, pv->untagged_bitmap)) 45 if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
47 flags |= BRIDGE_VLAN_INFO_UNTAGGED; 46 flags |= BRIDGE_VLAN_INFO_UNTAGGED;
48 47
49 if (vid_range_start == 0) { 48 if (vid_range_start == 0) {
50 goto initvars; 49 goto initvars;
51 } else if ((vid - vid_range_end) == 1 && 50 } else if ((v->vid - vid_range_end) == 1 &&
52 flags == vid_range_flags) { 51 flags == vid_range_flags) {
53 vid_range_end = vid; 52 vid_range_end = v->vid;
54 continue; 53 continue;
55 } else { 54 } else {
56 if ((vid_range_end - vid_range_start) > 0) 55 if ((vid_range_end - vid_range_start) > 0)
@@ -59,8 +58,8 @@ static int br_get_num_vlan_infos(const struct net_port_vlans *pv,
59 num_vlans += 1; 58 num_vlans += 1;
60 } 59 }
61initvars: 60initvars:
62 vid_range_start = vid; 61 vid_range_start = v->vid;
63 vid_range_end = vid; 62 vid_range_end = v->vid;
64 vid_range_flags = flags; 63 vid_range_flags = flags;
65 } 64 }
66 65
@@ -74,28 +73,43 @@ initvars:
74 return num_vlans; 73 return num_vlans;
75} 74}
76 75
76static int br_get_num_vlan_infos(struct net_bridge_vlan_group *vg,
77 u32 filter_mask)
78{
79 int num_vlans;
80
81 if (!vg)
82 return 0;
83
84 if (filter_mask & RTEXT_FILTER_BRVLAN)
85 return vg->num_vlans;
86
87 rcu_read_lock();
88 num_vlans = __get_num_vlan_infos(vg, filter_mask);
89 rcu_read_unlock();
90
91 return num_vlans;
92}
93
77static size_t br_get_link_af_size_filtered(const struct net_device *dev, 94static size_t br_get_link_af_size_filtered(const struct net_device *dev,
78 u32 filter_mask) 95 u32 filter_mask)
79{ 96{
80 struct net_port_vlans *pv; 97 struct net_bridge_vlan_group *vg = NULL;
98 struct net_bridge_port *p;
99 struct net_bridge *br;
81 int num_vlan_infos; 100 int num_vlan_infos;
82 101
83 rcu_read_lock(); 102 rcu_read_lock();
84 if (br_port_exists(dev)) 103 if (br_port_exists(dev)) {
85 pv = nbp_get_vlan_info(br_port_get_rcu(dev)); 104 p = br_port_get_rcu(dev);
86 else if (dev->priv_flags & IFF_EBRIDGE) 105 vg = nbp_vlan_group_rcu(p);
87 pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); 106 } else if (dev->priv_flags & IFF_EBRIDGE) {
88 else 107 br = netdev_priv(dev);
89 pv = NULL; 108 vg = br_vlan_group_rcu(br);
90 if (pv) 109 }
91 num_vlan_infos = br_get_num_vlan_infos(pv, filter_mask); 110 num_vlan_infos = br_get_num_vlan_infos(vg, filter_mask);
92 else
93 num_vlan_infos = 0;
94 rcu_read_unlock(); 111 rcu_read_unlock();
95 112
96 if (!num_vlan_infos)
97 return 0;
98
99 /* Each VLAN is returned in bridge_vlan_info along with flags */ 113 /* Each VLAN is returned in bridge_vlan_info along with flags */
100 return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); 114 return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info));
101} 115}
@@ -113,6 +127,20 @@ static inline size_t br_port_info_size(void)
113 + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ 127 + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */
114 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ 128 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
115 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ 129 + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
130 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
131 + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
132 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
133 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_COST */
134 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_ID */
135 + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_NO */
136 + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_TOPOLOGY_CHANGE_ACK */
137 + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_CONFIG_PENDING */
138 + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_MESSAGE_AGE_TIMER */
139 + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_FORWARD_DELAY_TIMER */
140 + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */
141#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
142 + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
143#endif
116 + 0; 144 + 0;
117} 145}
118 146
@@ -134,6 +162,7 @@ static int br_port_fill_attrs(struct sk_buff *skb,
134 const struct net_bridge_port *p) 162 const struct net_bridge_port *p)
135{ 163{
136 u8 mode = !!(p->flags & BR_HAIRPIN_MODE); 164 u8 mode = !!(p->flags & BR_HAIRPIN_MODE);
165 u64 timerval;
137 166
138 if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) || 167 if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) ||
139 nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || 168 nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) ||
@@ -146,9 +175,36 @@ static int br_port_fill_attrs(struct sk_buff *skb,
146 nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || 175 nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
147 nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || 176 nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
148 nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, 177 nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
149 !!(p->flags & BR_PROXYARP_WIFI))) 178 !!(p->flags & BR_PROXYARP_WIFI)) ||
179 nla_put(skb, IFLA_BRPORT_ROOT_ID, sizeof(struct ifla_bridge_id),
180 &p->designated_root) ||
181 nla_put(skb, IFLA_BRPORT_BRIDGE_ID, sizeof(struct ifla_bridge_id),
182 &p->designated_bridge) ||
183 nla_put_u16(skb, IFLA_BRPORT_DESIGNATED_PORT, p->designated_port) ||
184 nla_put_u16(skb, IFLA_BRPORT_DESIGNATED_COST, p->designated_cost) ||
185 nla_put_u16(skb, IFLA_BRPORT_ID, p->port_id) ||
186 nla_put_u16(skb, IFLA_BRPORT_NO, p->port_no) ||
187 nla_put_u8(skb, IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
188 p->topology_change_ack) ||
189 nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending))
190 return -EMSGSIZE;
191
192 timerval = br_timer_value(&p->message_age_timer);
193 if (nla_put_u64(skb, IFLA_BRPORT_MESSAGE_AGE_TIMER, timerval))
194 return -EMSGSIZE;
195 timerval = br_timer_value(&p->forward_delay_timer);
196 if (nla_put_u64(skb, IFLA_BRPORT_FORWARD_DELAY_TIMER, timerval))
197 return -EMSGSIZE;
198 timerval = br_timer_value(&p->hold_timer);
199 if (nla_put_u64(skb, IFLA_BRPORT_HOLD_TIMER, timerval))
150 return -EMSGSIZE; 200 return -EMSGSIZE;
151 201
202#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
203 if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
204 p->multicast_router))
205 return -EMSGSIZE;
206#endif
207
152 return 0; 208 return 0;
153} 209}
154 210
@@ -185,31 +241,33 @@ nla_put_failure:
185} 241}
186 242
187static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb, 243static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
188 const struct net_port_vlans *pv) 244 struct net_bridge_vlan_group *vg)
189{ 245{
190 u16 vid_range_start = 0, vid_range_end = 0; 246 struct net_bridge_vlan *v;
191 u16 vid_range_flags = 0; 247 u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0;
192 u16 pvid, vid, flags; 248 u16 flags, pvid;
193 int err = 0; 249 int err = 0;
194 250
195 /* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan 251 /* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan
196 * and mark vlan info with begin and end flags 252 * and mark vlan info with begin and end flags
197 * if vlaninfo represents a range 253 * if vlaninfo represents a range
198 */ 254 */
199 pvid = br_get_pvid(pv); 255 pvid = br_get_pvid(vg);
200 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { 256 list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
201 flags = 0; 257 flags = 0;
202 if (vid == pvid) 258 if (!br_vlan_should_use(v))
259 continue;
260 if (v->vid == pvid)
203 flags |= BRIDGE_VLAN_INFO_PVID; 261 flags |= BRIDGE_VLAN_INFO_PVID;
204 262
205 if (test_bit(vid, pv->untagged_bitmap)) 263 if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
206 flags |= BRIDGE_VLAN_INFO_UNTAGGED; 264 flags |= BRIDGE_VLAN_INFO_UNTAGGED;
207 265
208 if (vid_range_start == 0) { 266 if (vid_range_start == 0) {
209 goto initvars; 267 goto initvars;
210 } else if ((vid - vid_range_end) == 1 && 268 } else if ((v->vid - vid_range_end) == 1 &&
211 flags == vid_range_flags) { 269 flags == vid_range_flags) {
212 vid_range_end = vid; 270 vid_range_end = v->vid;
213 continue; 271 continue;
214 } else { 272 } else {
215 err = br_fill_ifvlaninfo_range(skb, vid_range_start, 273 err = br_fill_ifvlaninfo_range(skb, vid_range_start,
@@ -220,8 +278,8 @@ static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
220 } 278 }
221 279
222initvars: 280initvars:
223 vid_range_start = vid; 281 vid_range_start = v->vid;
224 vid_range_end = vid; 282 vid_range_end = v->vid;
225 vid_range_flags = flags; 283 vid_range_flags = flags;
226 } 284 }
227 285
@@ -238,19 +296,23 @@ initvars:
238} 296}
239 297
240static int br_fill_ifvlaninfo(struct sk_buff *skb, 298static int br_fill_ifvlaninfo(struct sk_buff *skb,
241 const struct net_port_vlans *pv) 299 struct net_bridge_vlan_group *vg)
242{ 300{
243 struct bridge_vlan_info vinfo; 301 struct bridge_vlan_info vinfo;
244 u16 pvid, vid; 302 struct net_bridge_vlan *v;
303 u16 pvid;
304
305 pvid = br_get_pvid(vg);
306 list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
307 if (!br_vlan_should_use(v))
308 continue;
245 309
246 pvid = br_get_pvid(pv); 310 vinfo.vid = v->vid;
247 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
248 vinfo.vid = vid;
249 vinfo.flags = 0; 311 vinfo.flags = 0;
250 if (vid == pvid) 312 if (v->vid == pvid)
251 vinfo.flags |= BRIDGE_VLAN_INFO_PVID; 313 vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
252 314
253 if (test_bit(vid, pv->untagged_bitmap)) 315 if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
254 vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; 316 vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
255 317
256 if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, 318 if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
@@ -269,11 +331,11 @@ nla_put_failure:
269 * Contains port and master info as well as carrier and bridge state. 331 * Contains port and master info as well as carrier and bridge state.
270 */ 332 */
271static int br_fill_ifinfo(struct sk_buff *skb, 333static int br_fill_ifinfo(struct sk_buff *skb,
272 const struct net_bridge_port *port, 334 struct net_bridge_port *port,
273 u32 pid, u32 seq, int event, unsigned int flags, 335 u32 pid, u32 seq, int event, unsigned int flags,
274 u32 filter_mask, const struct net_device *dev) 336 u32 filter_mask, const struct net_device *dev)
275{ 337{
276 const struct net_bridge *br; 338 struct net_bridge *br;
277 struct ifinfomsg *hdr; 339 struct ifinfomsg *hdr;
278 struct nlmsghdr *nlh; 340 struct nlmsghdr *nlh;
279 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; 341 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
@@ -320,26 +382,31 @@ static int br_fill_ifinfo(struct sk_buff *skb,
320 /* Check if the VID information is requested */ 382 /* Check if the VID information is requested */
321 if ((filter_mask & RTEXT_FILTER_BRVLAN) || 383 if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
322 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { 384 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
323 const struct net_port_vlans *pv; 385 struct net_bridge_vlan_group *vg;
324 struct nlattr *af; 386 struct nlattr *af;
325 int err; 387 int err;
326 388
389 /* RCU needed because of the VLAN locking rules (rcu || rtnl) */
390 rcu_read_lock();
327 if (port) 391 if (port)
328 pv = nbp_get_vlan_info(port); 392 vg = nbp_vlan_group_rcu(port);
329 else 393 else
330 pv = br_get_vlan_info(br); 394 vg = br_vlan_group_rcu(br);
331 395
332 if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) 396 if (!vg || !vg->num_vlans) {
397 rcu_read_unlock();
333 goto done; 398 goto done;
334 399 }
335 af = nla_nest_start(skb, IFLA_AF_SPEC); 400 af = nla_nest_start(skb, IFLA_AF_SPEC);
336 if (!af) 401 if (!af) {
402 rcu_read_unlock();
337 goto nla_put_failure; 403 goto nla_put_failure;
338 404 }
339 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) 405 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
340 err = br_fill_ifvlaninfo_compressed(skb, pv); 406 err = br_fill_ifvlaninfo_compressed(skb, vg);
341 else 407 else
342 err = br_fill_ifvlaninfo(skb, pv); 408 err = br_fill_ifvlaninfo(skb, vg);
409 rcu_read_unlock();
343 if (err) 410 if (err)
344 goto nla_put_failure; 411 goto nla_put_failure;
345 nla_nest_end(skb, af); 412 nla_nest_end(skb, af);
@@ -413,14 +480,14 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
413 switch (cmd) { 480 switch (cmd) {
414 case RTM_SETLINK: 481 case RTM_SETLINK:
415 if (p) { 482 if (p) {
483 /* if the MASTER flag is set this will act on the global
484 * per-VLAN entry as well
485 */
416 err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); 486 err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
417 if (err) 487 if (err)
418 break; 488 break;
419
420 if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
421 err = br_vlan_add(p->br, vinfo->vid,
422 vinfo->flags);
423 } else { 489 } else {
490 vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
424 err = br_vlan_add(br, vinfo->vid, vinfo->flags); 491 err = br_vlan_add(br, vinfo->vid, vinfo->flags);
425 } 492 }
426 break; 493 break;
@@ -462,6 +529,9 @@ static int br_afspec(struct net_bridge *br,
462 if (vinfo_start) 529 if (vinfo_start)
463 return -EINVAL; 530 return -EINVAL;
464 vinfo_start = vinfo; 531 vinfo_start = vinfo;
532 /* don't allow range of pvids */
533 if (vinfo_start->flags & BRIDGE_VLAN_INFO_PVID)
534 return -EINVAL;
465 continue; 535 continue;
466 } 536 }
467 537
@@ -507,6 +577,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
507 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, 577 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
508 [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, 578 [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 },
509 [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, 579 [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 },
580 [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 },
510}; 581};
511 582
512/* Change the state of the port and notify spanning tree */ 583/* Change the state of the port and notify spanning tree */
@@ -578,6 +649,18 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
578 return err; 649 return err;
579 } 650 }
580 651
652 if (tb[IFLA_BRPORT_FLUSH])
653 br_fdb_delete_by_port(p->br, p, 0, 0);
654
655#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
656 if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) {
657 u8 mcast_router = nla_get_u8(tb[IFLA_BRPORT_MULTICAST_ROUTER]);
658
659 err = br_multicast_set_port_router(p, mcast_router);
660 if (err)
661 return err;
662 }
663#endif
581 br_port_flags_change(p, old_flags ^ p->flags); 664 br_port_flags_change(p, old_flags ^ p->flags);
582 return 0; 665 return 0;
583} 666}
@@ -744,6 +827,27 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
744 [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, 827 [IFLA_BR_PRIORITY] = { .type = NLA_U16 },
745 [IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 }, 828 [IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 },
746 [IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 }, 829 [IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 },
830 [IFLA_BR_GROUP_FWD_MASK] = { .type = NLA_U16 },
831 [IFLA_BR_GROUP_ADDR] = { .type = NLA_BINARY,
832 .len = ETH_ALEN },
833 [IFLA_BR_MCAST_ROUTER] = { .type = NLA_U8 },
834 [IFLA_BR_MCAST_SNOOPING] = { .type = NLA_U8 },
835 [IFLA_BR_MCAST_QUERY_USE_IFADDR] = { .type = NLA_U8 },
836 [IFLA_BR_MCAST_QUERIER] = { .type = NLA_U8 },
837 [IFLA_BR_MCAST_HASH_ELASTICITY] = { .type = NLA_U32 },
838 [IFLA_BR_MCAST_HASH_MAX] = { .type = NLA_U32 },
839 [IFLA_BR_MCAST_LAST_MEMBER_CNT] = { .type = NLA_U32 },
840 [IFLA_BR_MCAST_STARTUP_QUERY_CNT] = { .type = NLA_U32 },
841 [IFLA_BR_MCAST_LAST_MEMBER_INTVL] = { .type = NLA_U64 },
842 [IFLA_BR_MCAST_MEMBERSHIP_INTVL] = { .type = NLA_U64 },
843 [IFLA_BR_MCAST_QUERIER_INTVL] = { .type = NLA_U64 },
844 [IFLA_BR_MCAST_QUERY_INTVL] = { .type = NLA_U64 },
845 [IFLA_BR_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 },
846 [IFLA_BR_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 },
847 [IFLA_BR_NF_CALL_IPTABLES] = { .type = NLA_U8 },
848 [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 },
849 [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 },
850 [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
747}; 851};
748 852
749static int br_changelink(struct net_device *brdev, struct nlattr *tb[], 853static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -774,9 +878,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
774 } 878 }
775 879
776 if (data[IFLA_BR_AGEING_TIME]) { 880 if (data[IFLA_BR_AGEING_TIME]) {
777 u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]); 881 err = br_set_ageing_time(br, nla_get_u32(data[IFLA_BR_AGEING_TIME]));
778 882 if (err)
779 br->ageing_time = clock_t_to_jiffies(ageing_time); 883 return err;
780 } 884 }
781 885
782 if (data[IFLA_BR_STP_STATE]) { 886 if (data[IFLA_BR_STP_STATE]) {
@@ -807,6 +911,158 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
807 if (err) 911 if (err)
808 return err; 912 return err;
809 } 913 }
914
915 if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
916 __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
917
918 err = __br_vlan_set_default_pvid(br, defpvid);
919 if (err)
920 return err;
921 }
922#endif
923
924 if (data[IFLA_BR_GROUP_FWD_MASK]) {
925 u16 fwd_mask = nla_get_u16(data[IFLA_BR_GROUP_FWD_MASK]);
926
927 if (fwd_mask & BR_GROUPFWD_RESTRICTED)
928 return -EINVAL;
929 br->group_fwd_mask = fwd_mask;
930 }
931
932 if (data[IFLA_BR_GROUP_ADDR]) {
933 u8 new_addr[ETH_ALEN];
934
935 if (nla_len(data[IFLA_BR_GROUP_ADDR]) != ETH_ALEN)
936 return -EINVAL;
937 memcpy(new_addr, nla_data(data[IFLA_BR_GROUP_ADDR]), ETH_ALEN);
938 if (!is_link_local_ether_addr(new_addr))
939 return -EINVAL;
940 if (new_addr[5] == 1 || /* 802.3x Pause address */
941 new_addr[5] == 2 || /* 802.3ad Slow protocols */
942 new_addr[5] == 3) /* 802.1X PAE address */
943 return -EINVAL;
944 spin_lock_bh(&br->lock);
945 memcpy(br->group_addr, new_addr, sizeof(br->group_addr));
946 spin_unlock_bh(&br->lock);
947 br->group_addr_set = true;
948 br_recalculate_fwd_mask(br);
949 }
950
951 if (data[IFLA_BR_FDB_FLUSH])
952 br_fdb_flush(br);
953
954#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
955 if (data[IFLA_BR_MCAST_ROUTER]) {
956 u8 multicast_router = nla_get_u8(data[IFLA_BR_MCAST_ROUTER]);
957
958 err = br_multicast_set_router(br, multicast_router);
959 if (err)
960 return err;
961 }
962
963 if (data[IFLA_BR_MCAST_SNOOPING]) {
964 u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]);
965
966 err = br_multicast_toggle(br, mcast_snooping);
967 if (err)
968 return err;
969 }
970
971 if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) {
972 u8 val;
973
974 val = nla_get_u8(data[IFLA_BR_MCAST_QUERY_USE_IFADDR]);
975 br->multicast_query_use_ifaddr = !!val;
976 }
977
978 if (data[IFLA_BR_MCAST_QUERIER]) {
979 u8 mcast_querier = nla_get_u8(data[IFLA_BR_MCAST_QUERIER]);
980
981 err = br_multicast_set_querier(br, mcast_querier);
982 if (err)
983 return err;
984 }
985
986 if (data[IFLA_BR_MCAST_HASH_ELASTICITY]) {
987 u32 val = nla_get_u32(data[IFLA_BR_MCAST_HASH_ELASTICITY]);
988
989 br->hash_elasticity = val;
990 }
991
992 if (data[IFLA_BR_MCAST_HASH_MAX]) {
993 u32 hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
994
995 err = br_multicast_set_hash_max(br, hash_max);
996 if (err)
997 return err;
998 }
999
1000 if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
1001 u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
1002
1003 br->multicast_last_member_count = val;
1004 }
1005
1006 if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) {
1007 u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]);
1008
1009 br->multicast_startup_query_count = val;
1010 }
1011
1012 if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) {
1013 u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]);
1014
1015 br->multicast_last_member_interval = clock_t_to_jiffies(val);
1016 }
1017
1018 if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) {
1019 u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]);
1020
1021 br->multicast_membership_interval = clock_t_to_jiffies(val);
1022 }
1023
1024 if (data[IFLA_BR_MCAST_QUERIER_INTVL]) {
1025 u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]);
1026
1027 br->multicast_querier_interval = clock_t_to_jiffies(val);
1028 }
1029
1030 if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
1031 u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
1032
1033 br->multicast_query_interval = clock_t_to_jiffies(val);
1034 }
1035
1036 if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
1037 u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]);
1038
1039 br->multicast_query_response_interval = clock_t_to_jiffies(val);
1040 }
1041
1042 if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
1043 u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
1044
1045 br->multicast_startup_query_interval = clock_t_to_jiffies(val);
1046 }
1047#endif
1048#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1049 if (data[IFLA_BR_NF_CALL_IPTABLES]) {
1050 u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IPTABLES]);
1051
1052 br->nf_call_iptables = val ? true : false;
1053 }
1054
1055 if (data[IFLA_BR_NF_CALL_IP6TABLES]) {
1056 u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IP6TABLES]);
1057
1058 br->nf_call_ip6tables = val ? true : false;
1059 }
1060
1061 if (data[IFLA_BR_NF_CALL_ARPTABLES]) {
1062 u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_ARPTABLES]);
1063
1064 br->nf_call_arptables = val ? true : false;
1065 }
810#endif 1066#endif
811 1067
812 return 0; 1068 return 0;
@@ -823,6 +1079,40 @@ static size_t br_get_size(const struct net_device *brdev)
823 nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */ 1079 nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */
824#ifdef CONFIG_BRIDGE_VLAN_FILTERING 1080#ifdef CONFIG_BRIDGE_VLAN_FILTERING
825 nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */ 1081 nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */
1082 nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */
1083#endif
1084 nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */
1085 nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */
1086 nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_BRIDGE_ID */
1087 nla_total_size(sizeof(u16)) + /* IFLA_BR_ROOT_PORT */
1088 nla_total_size(sizeof(u32)) + /* IFLA_BR_ROOT_PATH_COST */
1089 nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE */
1090 nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE_DETECTED */
1091 nla_total_size(sizeof(u64)) + /* IFLA_BR_HELLO_TIMER */
1092 nla_total_size(sizeof(u64)) + /* IFLA_BR_TCN_TIMER */
1093 nla_total_size(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */
1094 nla_total_size(sizeof(u64)) + /* IFLA_BR_GC_TIMER */
1095 nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */
1096#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
1097 nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */
1098 nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */
1099 nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERY_USE_IFADDR */
1100 nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERIER */
1101 nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_ELASTICITY */
1102 nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_MAX */
1103 nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_LAST_MEMBER_CNT */
1104 nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_STARTUP_QUERY_CNT */
1105 nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_LAST_MEMBER_INTVL */
1106 nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_MEMBERSHIP_INTVL */
1107 nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERIER_INTVL */
1108 nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */
1109 nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */
1110 nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
1111#endif
1112#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1113 nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */
1114 nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IP6TABLES */
1115 nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_ARPTABLES */
826#endif 1116#endif
827 0; 1117 0;
828} 1118}
@@ -837,6 +1127,20 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
837 u32 stp_enabled = br->stp_enabled; 1127 u32 stp_enabled = br->stp_enabled;
838 u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]; 1128 u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
839 u8 vlan_enabled = br_vlan_enabled(br); 1129 u8 vlan_enabled = br_vlan_enabled(br);
1130 u64 clockval;
1131
1132 clockval = br_timer_value(&br->hello_timer);
1133 if (nla_put_u64(skb, IFLA_BR_HELLO_TIMER, clockval))
1134 return -EMSGSIZE;
1135 clockval = br_timer_value(&br->tcn_timer);
1136 if (nla_put_u64(skb, IFLA_BR_TCN_TIMER, clockval))
1137 return -EMSGSIZE;
1138 clockval = br_timer_value(&br->topology_change_timer);
1139 if (nla_put_u64(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval))
1140 return -EMSGSIZE;
1141 clockval = br_timer_value(&br->gc_timer);
1142 if (nla_put_u64(skb, IFLA_BR_GC_TIMER, clockval))
1143 return -EMSGSIZE;
840 1144
841 if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || 1145 if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
842 nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) || 1146 nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
@@ -844,38 +1148,76 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
844 nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) || 1148 nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) ||
845 nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) || 1149 nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) ||
846 nla_put_u16(skb, IFLA_BR_PRIORITY, priority) || 1150 nla_put_u16(skb, IFLA_BR_PRIORITY, priority) ||
847 nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled)) 1151 nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled) ||
1152 nla_put_u16(skb, IFLA_BR_GROUP_FWD_MASK, br->group_fwd_mask) ||
1153 nla_put(skb, IFLA_BR_BRIDGE_ID, sizeof(struct ifla_bridge_id),
1154 &br->bridge_id) ||
1155 nla_put(skb, IFLA_BR_ROOT_ID, sizeof(struct ifla_bridge_id),
1156 &br->designated_root) ||
1157 nla_put_u16(skb, IFLA_BR_ROOT_PORT, br->root_port) ||
1158 nla_put_u32(skb, IFLA_BR_ROOT_PATH_COST, br->root_path_cost) ||
1159 nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE, br->topology_change) ||
1160 nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
1161 br->topology_change_detected) ||
1162 nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr))
848 return -EMSGSIZE; 1163 return -EMSGSIZE;
849 1164
850#ifdef CONFIG_BRIDGE_VLAN_FILTERING 1165#ifdef CONFIG_BRIDGE_VLAN_FILTERING
851 if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto)) 1166 if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) ||
1167 nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid))
1168 return -EMSGSIZE;
1169#endif
1170#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
1171 if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
1172 nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, !br->multicast_disabled) ||
1173 nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
1174 br->multicast_query_use_ifaddr) ||
1175 nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) ||
1176 nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
1177 br->hash_elasticity) ||
1178 nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
1179 nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
1180 br->multicast_last_member_count) ||
1181 nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
1182 br->multicast_startup_query_count))
1183 return -EMSGSIZE;
1184
1185 clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
1186 if (nla_put_u64(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval))
1187 return -EMSGSIZE;
1188 clockval = jiffies_to_clock_t(br->multicast_membership_interval);
1189 if (nla_put_u64(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval))
1190 return -EMSGSIZE;
1191 clockval = jiffies_to_clock_t(br->multicast_querier_interval);
1192 if (nla_put_u64(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval))
1193 return -EMSGSIZE;
1194 clockval = jiffies_to_clock_t(br->multicast_query_interval);
1195 if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval))
1196 return -EMSGSIZE;
1197 clockval = jiffies_to_clock_t(br->multicast_query_response_interval);
1198 if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval))
1199 return -EMSGSIZE;
1200 clockval = jiffies_to_clock_t(br->multicast_startup_query_interval);
1201 if (nla_put_u64(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval))
1202 return -EMSGSIZE;
1203#endif
1204#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1205 if (nla_put_u8(skb, IFLA_BR_NF_CALL_IPTABLES,
1206 br->nf_call_iptables ? 1 : 0) ||
1207 nla_put_u8(skb, IFLA_BR_NF_CALL_IP6TABLES,
1208 br->nf_call_ip6tables ? 1 : 0) ||
1209 nla_put_u8(skb, IFLA_BR_NF_CALL_ARPTABLES,
1210 br->nf_call_arptables ? 1 : 0))
852 return -EMSGSIZE; 1211 return -EMSGSIZE;
853#endif 1212#endif
854 1213
855 return 0; 1214 return 0;
856} 1215}
857 1216
858static size_t br_get_link_af_size(const struct net_device *dev)
859{
860 struct net_port_vlans *pv;
861
862 if (br_port_exists(dev))
863 pv = nbp_get_vlan_info(br_port_get_rtnl(dev));
864 else if (dev->priv_flags & IFF_EBRIDGE)
865 pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
866 else
867 return 0;
868
869 if (!pv)
870 return 0;
871
872 /* Each VLAN is returned in bridge_vlan_info along with flags */
873 return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
874}
875 1217
876static struct rtnl_af_ops br_af_ops __read_mostly = { 1218static struct rtnl_af_ops br_af_ops __read_mostly = {
877 .family = AF_BRIDGE, 1219 .family = AF_BRIDGE,
878 .get_link_af_size = br_get_link_af_size, 1220 .get_link_af_size = br_get_link_af_size_filtered,
879}; 1221};
880 1222
881struct rtnl_link_ops br_link_ops __read_mostly = { 1223struct rtnl_link_ops br_link_ops __read_mostly = {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 213baf7aaa93..216018c76018 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -20,6 +20,7 @@
20#include <net/route.h> 20#include <net/route.h>
21#include <net/ip6_fib.h> 21#include <net/ip6_fib.h>
22#include <linux/if_vlan.h> 22#include <linux/if_vlan.h>
23#include <linux/rhashtable.h>
23 24
24#define BR_HASH_BITS 8 25#define BR_HASH_BITS 8
25#define BR_HASH_SIZE (1 << BR_HASH_BITS) 26#define BR_HASH_SIZE (1 << BR_HASH_BITS)
@@ -28,7 +29,6 @@
28 29
29#define BR_PORT_BITS 10 30#define BR_PORT_BITS 10
30#define BR_MAX_PORTS (1<<BR_PORT_BITS) 31#define BR_MAX_PORTS (1<<BR_PORT_BITS)
31#define BR_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID)
32 32
33#define BR_VERSION "2.3" 33#define BR_VERSION "2.3"
34 34
@@ -77,17 +77,61 @@ struct bridge_mcast_querier {
77}; 77};
78#endif 78#endif
79 79
80struct net_port_vlans { 80/**
81 u16 port_idx; 81 * struct net_bridge_vlan - per-vlan entry
82 u16 pvid; 82 *
83 * @vnode: rhashtable member
84 * @vid: VLAN id
85 * @flags: bridge vlan flags
86 * @br: if MASTER flag set, this points to a bridge struct
87 * @port: if MASTER flag unset, this points to a port struct
88 * @refcnt: if MASTER flag set, this is bumped for each port referencing it
89 * @brvlan: if MASTER flag unset, this points to the global per-VLAN context
90 * for this VLAN entry
91 * @vlist: sorted list of VLAN entries
92 * @rcu: used for entry destruction
93 *
94 * This structure is shared between the global per-VLAN entries contained in
95 * the bridge rhashtable and the local per-port per-VLAN entries contained in
96 * the port's rhashtable. The union entries should be interpreted depending on
97 * the entry flags that are set.
98 */
99struct net_bridge_vlan {
100 struct rhash_head vnode;
101 u16 vid;
102 u16 flags;
83 union { 103 union {
84 struct net_bridge_port *port; 104 struct net_bridge *br;
85 struct net_bridge *br; 105 struct net_bridge_port *port;
86 } parent; 106 };
107 union {
108 atomic_t refcnt;
109 struct net_bridge_vlan *brvlan;
110 };
111 struct list_head vlist;
112
87 struct rcu_head rcu; 113 struct rcu_head rcu;
88 unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; 114};
89 unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; 115
116/**
117 * struct net_bridge_vlan_group
118 *
119 * @vlan_hash: VLAN entry rhashtable
120 * @vlan_list: sorted VLAN entry list
121 * @num_vlans: number of total VLAN entries
122 * @pvid: PVID VLAN id
123 *
124 * IMPORTANT: Be careful when checking if there're VLAN entries using list
125 * primitives because the bridge can have entries in its list which
126 * are just for global context but not for filtering, i.e. they have
127 * the master flag set but not the brentry flag. If you have to check
128 * if there're "real" entries in the bridge please test @num_vlans
129 */
130struct net_bridge_vlan_group {
131 struct rhashtable vlan_hash;
132 struct list_head vlan_list;
90 u16 num_vlans; 133 u16 num_vlans;
134 u16 pvid;
91}; 135};
92 136
93struct net_bridge_fdb_entry 137struct net_bridge_fdb_entry
@@ -185,7 +229,7 @@ struct net_bridge_port
185 struct netpoll *np; 229 struct netpoll *np;
186#endif 230#endif
187#ifdef CONFIG_BRIDGE_VLAN_FILTERING 231#ifdef CONFIG_BRIDGE_VLAN_FILTERING
188 struct net_port_vlans __rcu *vlan_info; 232 struct net_bridge_vlan_group __rcu *vlgrp;
189#endif 233#endif
190}; 234};
191 235
@@ -293,10 +337,10 @@ struct net_bridge
293 struct kobject *ifobj; 337 struct kobject *ifobj;
294 u32 auto_cnt; 338 u32 auto_cnt;
295#ifdef CONFIG_BRIDGE_VLAN_FILTERING 339#ifdef CONFIG_BRIDGE_VLAN_FILTERING
340 struct net_bridge_vlan_group __rcu *vlgrp;
296 u8 vlan_enabled; 341 u8 vlan_enabled;
297 __be16 vlan_proto; 342 __be16 vlan_proto;
298 u16 default_pvid; 343 u16 default_pvid;
299 struct net_port_vlans __rcu *vlan_info;
300#endif 344#endif
301}; 345};
302 346
@@ -344,6 +388,31 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
344 return !memcmp(&br->bridge_id, &br->designated_root, 8); 388 return !memcmp(&br->bridge_id, &br->designated_root, 8);
345} 389}
346 390
391/* check if a VLAN entry is global */
392static inline bool br_vlan_is_master(const struct net_bridge_vlan *v)
393{
394 return v->flags & BRIDGE_VLAN_INFO_MASTER;
395}
396
397/* check if a VLAN entry is used by the bridge */
398static inline bool br_vlan_is_brentry(const struct net_bridge_vlan *v)
399{
400 return v->flags & BRIDGE_VLAN_INFO_BRENTRY;
401}
402
403/* check if we should use the vlan entry, returns false if it's only context */
404static inline bool br_vlan_should_use(const struct net_bridge_vlan *v)
405{
406 if (br_vlan_is_master(v)) {
407 if (br_vlan_is_brentry(v))
408 return true;
409 else
410 return false;
411 }
412
413 return true;
414}
415
347/* br_device.c */ 416/* br_device.c */
348void br_dev_setup(struct net_device *dev); 417void br_dev_setup(struct net_device *dev);
349void br_dev_delete(struct net_device *dev, struct list_head *list); 418void br_dev_delete(struct net_device *dev, struct list_head *list);
@@ -413,10 +482,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
413 482
414/* br_forward.c */ 483/* br_forward.c */
415void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); 484void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
416int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb); 485int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
417void br_forward(const struct net_bridge_port *to, 486void br_forward(const struct net_bridge_port *to,
418 struct sk_buff *skb, struct sk_buff *skb0); 487 struct sk_buff *skb, struct sk_buff *skb0);
419int br_forward_finish(struct sock *sk, struct sk_buff *skb); 488int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
420void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast); 489void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
421void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, 490void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
422 struct sk_buff *skb2, bool unicast); 491 struct sk_buff *skb2, bool unicast);
@@ -434,7 +503,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
434void br_manage_promisc(struct net_bridge *br); 503void br_manage_promisc(struct net_bridge *br);
435 504
436/* br_input.c */ 505/* br_input.c */
437int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); 506int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
438rx_handler_result_t br_handle_frame(struct sk_buff **pskb); 507rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
439 508
440static inline bool br_rx_handler_check_rcu(const struct net_device *dev) 509static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
@@ -601,18 +670,19 @@ static inline void br_mdb_uninit(void)
601 670
602/* br_vlan.c */ 671/* br_vlan.c */
603#ifdef CONFIG_BRIDGE_VLAN_FILTERING 672#ifdef CONFIG_BRIDGE_VLAN_FILTERING
604bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, 673bool br_allowed_ingress(const struct net_bridge *br,
605 struct sk_buff *skb, u16 *vid); 674 struct net_bridge_vlan_group *vg, struct sk_buff *skb,
606bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v, 675 u16 *vid);
676bool br_allowed_egress(struct net_bridge_vlan_group *vg,
607 const struct sk_buff *skb); 677 const struct sk_buff *skb);
608bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); 678bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
609struct sk_buff *br_handle_vlan(struct net_bridge *br, 679struct sk_buff *br_handle_vlan(struct net_bridge *br,
610 const struct net_port_vlans *v, 680 struct net_bridge_vlan_group *vg,
611 struct sk_buff *skb); 681 struct sk_buff *skb);
612int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); 682int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
613int br_vlan_delete(struct net_bridge *br, u16 vid); 683int br_vlan_delete(struct net_bridge *br, u16 vid);
614void br_vlan_flush(struct net_bridge *br); 684void br_vlan_flush(struct net_bridge *br);
615bool br_vlan_find(struct net_bridge *br, u16 vid); 685struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
616void br_recalculate_fwd_mask(struct net_bridge *br); 686void br_recalculate_fwd_mask(struct net_bridge *br);
617int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); 687int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
618int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); 688int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
@@ -620,22 +690,35 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
620int br_vlan_set_proto(struct net_bridge *br, unsigned long val); 690int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
621int br_vlan_init(struct net_bridge *br); 691int br_vlan_init(struct net_bridge *br);
622int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); 692int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
693int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
623int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); 694int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
624int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); 695int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
625void nbp_vlan_flush(struct net_bridge_port *port); 696void nbp_vlan_flush(struct net_bridge_port *port);
626bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
627int nbp_vlan_init(struct net_bridge_port *port); 697int nbp_vlan_init(struct net_bridge_port *port);
698int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
699
700static inline struct net_bridge_vlan_group *br_vlan_group(
701 const struct net_bridge *br)
702{
703 return rtnl_dereference(br->vlgrp);
704}
628 705
629static inline struct net_port_vlans *br_get_vlan_info( 706static inline struct net_bridge_vlan_group *nbp_vlan_group(
630 const struct net_bridge *br) 707 const struct net_bridge_port *p)
631{ 708{
632 return rcu_dereference_rtnl(br->vlan_info); 709 return rtnl_dereference(p->vlgrp);
633} 710}
634 711
635static inline struct net_port_vlans *nbp_get_vlan_info( 712static inline struct net_bridge_vlan_group *br_vlan_group_rcu(
636 const struct net_bridge_port *p) 713 const struct net_bridge *br)
637{ 714{
638 return rcu_dereference_rtnl(p->vlan_info); 715 return rcu_dereference(br->vlgrp);
716}
717
718static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu(
719 const struct net_bridge_port *p)
720{
721 return rcu_dereference(p->vlgrp);
639} 722}
640 723
641/* Since bridge now depends on 8021Q module, but the time bridge sees the 724/* Since bridge now depends on 8021Q module, but the time bridge sees the
@@ -645,9 +728,9 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
645{ 728{
646 int err = 0; 729 int err = 0;
647 730
648 if (skb_vlan_tag_present(skb)) 731 if (skb_vlan_tag_present(skb)) {
649 *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK; 732 *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
650 else { 733 } else {
651 *vid = 0; 734 *vid = 0;
652 err = -EINVAL; 735 err = -EINVAL;
653 } 736 }
@@ -655,13 +738,13 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
655 return err; 738 return err;
656} 739}
657 740
658static inline u16 br_get_pvid(const struct net_port_vlans *v) 741static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
659{ 742{
660 if (!v) 743 if (!vg)
661 return 0; 744 return 0;
662 745
663 smp_rmb(); 746 smp_rmb();
664 return v->pvid; 747 return vg->pvid;
665} 748}
666 749
667static inline int br_vlan_enabled(struct net_bridge *br) 750static inline int br_vlan_enabled(struct net_bridge *br)
@@ -669,16 +752,15 @@ static inline int br_vlan_enabled(struct net_bridge *br)
669 return br->vlan_enabled; 752 return br->vlan_enabled;
670} 753}
671#else 754#else
672static inline bool br_allowed_ingress(struct net_bridge *br, 755static inline bool br_allowed_ingress(const struct net_bridge *br,
673 struct net_port_vlans *v, 756 struct net_bridge_vlan_group *vg,
674 struct sk_buff *skb, 757 struct sk_buff *skb,
675 u16 *vid) 758 u16 *vid)
676{ 759{
677 return true; 760 return true;
678} 761}
679 762
680static inline bool br_allowed_egress(struct net_bridge *br, 763static inline bool br_allowed_egress(struct net_bridge_vlan_group *vg,
681 const struct net_port_vlans *v,
682 const struct sk_buff *skb) 764 const struct sk_buff *skb)
683{ 765{
684 return true; 766 return true;
@@ -691,7 +773,7 @@ static inline bool br_should_learn(struct net_bridge_port *p,
691} 773}
692 774
693static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, 775static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
694 const struct net_port_vlans *v, 776 struct net_bridge_vlan_group *vg,
695 struct sk_buff *skb) 777 struct sk_buff *skb)
696{ 778{
697 return skb; 779 return skb;
@@ -711,11 +793,6 @@ static inline void br_vlan_flush(struct net_bridge *br)
711{ 793{
712} 794}
713 795
714static inline bool br_vlan_find(struct net_bridge *br, u16 vid)
715{
716 return false;
717}
718
719static inline void br_recalculate_fwd_mask(struct net_bridge *br) 796static inline void br_recalculate_fwd_mask(struct net_bridge *br)
720{ 797{
721} 798}
@@ -739,22 +816,12 @@ static inline void nbp_vlan_flush(struct net_bridge_port *port)
739{ 816{
740} 817}
741 818
742static inline struct net_port_vlans *br_get_vlan_info( 819static inline struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg,
743 const struct net_bridge *br) 820 u16 vid)
744{
745 return NULL;
746}
747static inline struct net_port_vlans *nbp_get_vlan_info(
748 const struct net_bridge_port *p)
749{ 821{
750 return NULL; 822 return NULL;
751} 823}
752 824
753static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
754{
755 return false;
756}
757
758static inline int nbp_vlan_init(struct net_bridge_port *port) 825static inline int nbp_vlan_init(struct net_bridge_port *port)
759{ 826{
760 return 0; 827 return 0;
@@ -764,7 +831,8 @@ static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag)
764{ 831{
765 return 0; 832 return 0;
766} 833}
767static inline u16 br_get_pvid(const struct net_port_vlans *v) 834
835static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
768{ 836{
769 return 0; 837 return 0;
770} 838}
@@ -779,6 +847,37 @@ static inline int __br_vlan_filter_toggle(struct net_bridge *br,
779{ 847{
780 return -EOPNOTSUPP; 848 return -EOPNOTSUPP;
781} 849}
850
851static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
852 u32 filter_mask)
853{
854 return 0;
855}
856
857static inline struct net_bridge_vlan_group *br_vlan_group(
858 const struct net_bridge *br)
859{
860 return NULL;
861}
862
863static inline struct net_bridge_vlan_group *nbp_vlan_group(
864 const struct net_bridge_port *p)
865{
866 return NULL;
867}
868
869static inline struct net_bridge_vlan_group *br_vlan_group_rcu(
870 const struct net_bridge *br)
871{
872 return NULL;
873}
874
875static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu(
876 const struct net_bridge_port *p)
877{
878 return NULL;
879}
880
782#endif 881#endif
783 882
784struct nf_br_ops { 883struct nf_br_ops {
@@ -808,6 +907,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
808int br_set_forward_delay(struct net_bridge *br, unsigned long x); 907int br_set_forward_delay(struct net_bridge *br, unsigned long x);
809int br_set_hello_time(struct net_bridge *br, unsigned long x); 908int br_set_hello_time(struct net_bridge *br, unsigned long x);
810int br_set_max_age(struct net_bridge *br, unsigned long x); 909int br_set_max_age(struct net_bridge *br, unsigned long x);
910int br_set_ageing_time(struct net_bridge *br, u32 ageing_time);
811 911
812 912
813/* br_stp_if.c */ 913/* br_stp_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index ed74ffaa851f..5f3f64553179 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -40,7 +40,8 @@ void br_log_state(const struct net_bridge_port *p)
40void br_set_state(struct net_bridge_port *p, unsigned int state) 40void br_set_state(struct net_bridge_port *p, unsigned int state)
41{ 41{
42 struct switchdev_attr attr = { 42 struct switchdev_attr attr = {
43 .id = SWITCHDEV_ATTR_PORT_STP_STATE, 43 .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE,
44 .flags = SWITCHDEV_F_DEFER,
44 .u.stp_state = state, 45 .u.stp_state = state,
45 }; 46 };
46 int err; 47 int err;
@@ -566,6 +567,29 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
566 567
567} 568}
568 569
570int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
571{
572 struct switchdev_attr attr = {
573 .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
574 .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
575 .u.ageing_time = ageing_time,
576 };
577 unsigned long t = clock_t_to_jiffies(ageing_time);
578 int err;
579
580 if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
581 return -ERANGE;
582
583 err = switchdev_port_attr_set(br->dev, &attr);
584 if (err)
585 return err;
586
587 br->ageing_time = t;
588 mod_timer(&br->gc_timer, jiffies);
589
590 return 0;
591}
592
569void __br_set_forward_delay(struct net_bridge *br, unsigned long t) 593void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
570{ 594{
571 br->bridge_forward_delay = t; 595 br->bridge_forward_delay = t;
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 534fc4cd263e..5881fbc114a9 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -30,6 +30,12 @@
30 30
31#define LLC_RESERVE sizeof(struct llc_pdu_un) 31#define LLC_RESERVE sizeof(struct llc_pdu_un)
32 32
33static int br_send_bpdu_finish(struct net *net, struct sock *sk,
34 struct sk_buff *skb)
35{
36 return dev_queue_xmit(skb);
37}
38
33static void br_send_bpdu(struct net_bridge_port *p, 39static void br_send_bpdu(struct net_bridge_port *p,
34 const unsigned char *data, int length) 40 const unsigned char *data, int length)
35{ 41{
@@ -54,9 +60,9 @@ static void br_send_bpdu(struct net_bridge_port *p,
54 60
55 skb_reset_mac_header(skb); 61 skb_reset_mac_header(skb);
56 62
57 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, 63 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
58 NULL, skb->dev, 64 dev_net(p->dev), NULL, skb, NULL, skb->dev,
59 dev_queue_xmit_sk); 65 br_send_bpdu_finish);
60} 66}
61 67
62static inline void br_set_ticks(unsigned char *dest, int j) 68static inline void br_set_ticks(unsigned char *dest, int j)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 4ca449a16132..5396ff08af32 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -15,6 +15,7 @@
15#include <linux/kmod.h> 15#include <linux/kmod.h>
16#include <linux/etherdevice.h> 16#include <linux/etherdevice.h>
17#include <linux/rtnetlink.h> 17#include <linux/rtnetlink.h>
18#include <net/switchdev.h>
18 19
19#include "br_private.h" 20#include "br_private.h"
20#include "br_private_stp.h" 21#include "br_private_stp.h"
@@ -35,11 +36,22 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
35/* called under bridge lock */ 36/* called under bridge lock */
36void br_init_port(struct net_bridge_port *p) 37void br_init_port(struct net_bridge_port *p)
37{ 38{
39 struct switchdev_attr attr = {
40 .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
41 .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
42 .u.ageing_time = p->br->ageing_time,
43 };
44 int err;
45
38 p->port_id = br_make_port_id(p->priority, p->port_no); 46 p->port_id = br_make_port_id(p->priority, p->port_no);
39 br_become_designated_port(p); 47 br_become_designated_port(p);
40 br_set_state(p, BR_STATE_BLOCKING); 48 br_set_state(p, BR_STATE_BLOCKING);
41 p->topology_change_ack = 0; 49 p->topology_change_ack = 0;
42 p->config_pending = 0; 50 p->config_pending = 0;
51
52 err = switchdev_port_attr_set(p->dev, &attr);
53 if (err && err != -EOPNOTSUPP)
54 netdev_err(p->dev, "failed to set HW ageing time\n");
43} 55}
44 56
45/* called under bridge lock */ 57/* called under bridge lock */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 4c97fc50fb70..8365bd53c421 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -102,8 +102,15 @@ static ssize_t ageing_time_show(struct device *d,
102 102
103static int set_ageing_time(struct net_bridge *br, unsigned long val) 103static int set_ageing_time(struct net_bridge *br, unsigned long val)
104{ 104{
105 br->ageing_time = clock_t_to_jiffies(val); 105 int ret;
106 return 0; 106
107 if (!rtnl_trylock())
108 return restart_syscall();
109
110 ret = br_set_ageing_time(br, val);
111 rtnl_unlock();
112
113 return ret;
107} 114}
108 115
109static ssize_t ageing_time_store(struct device *d, 116static ssize_t ageing_time_store(struct device *d,
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 5f5a02b49a99..1394da63614a 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -6,86 +6,205 @@
6 6
7#include "br_private.h" 7#include "br_private.h"
8 8
9static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) 9static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg,
10 const void *ptr)
10{ 11{
11 if (v->pvid == vid) 12 const struct net_bridge_vlan *vle = ptr;
13 u16 vid = *(u16 *)arg->key;
14
15 return vle->vid != vid;
16}
17
18static const struct rhashtable_params br_vlan_rht_params = {
19 .head_offset = offsetof(struct net_bridge_vlan, vnode),
20 .key_offset = offsetof(struct net_bridge_vlan, vid),
21 .key_len = sizeof(u16),
22 .nelem_hint = 3,
23 .locks_mul = 1,
24 .max_size = VLAN_N_VID,
25 .obj_cmpfn = br_vlan_cmp,
26 .automatic_shrinking = true,
27};
28
29static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
30{
31 return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
32}
33
34static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
35{
36 if (vg->pvid == vid)
12 return; 37 return;
13 38
14 smp_wmb(); 39 smp_wmb();
15 v->pvid = vid; 40 vg->pvid = vid;
16} 41}
17 42
18static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) 43static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
19{ 44{
20 if (v->pvid != vid) 45 if (vg->pvid != vid)
21 return; 46 return;
22 47
23 smp_wmb(); 48 smp_wmb();
24 v->pvid = 0; 49 vg->pvid = 0;
25} 50}
26 51
27static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) 52static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
28{ 53{
54 struct net_bridge_vlan_group *vg;
55
56 if (br_vlan_is_master(v))
57 vg = br_vlan_group(v->br);
58 else
59 vg = nbp_vlan_group(v->port);
60
29 if (flags & BRIDGE_VLAN_INFO_PVID) 61 if (flags & BRIDGE_VLAN_INFO_PVID)
30 __vlan_add_pvid(v, vid); 62 __vlan_add_pvid(vg, v->vid);
31 else 63 else
32 __vlan_delete_pvid(v, vid); 64 __vlan_delete_pvid(vg, v->vid);
33 65
34 if (flags & BRIDGE_VLAN_INFO_UNTAGGED) 66 if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
35 set_bit(vid, v->untagged_bitmap); 67 v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
36 else 68 else
37 clear_bit(vid, v->untagged_bitmap); 69 v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
38} 70}
39 71
40static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, 72static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
41 u16 vid, u16 flags) 73 u16 vid, u16 flags)
42{ 74{
43 const struct net_device_ops *ops = dev->netdev_ops; 75 struct switchdev_obj_port_vlan v = {
76 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
77 .flags = flags,
78 .vid_begin = vid,
79 .vid_end = vid,
80 };
44 int err; 81 int err;
45 82
46 /* If driver uses VLAN ndo ops, use 8021q to install vid 83 /* Try switchdev op first. In case it is not supported, fallback to
47 * on device, otherwise try switchdev ops to install vid. 84 * 8021q add.
48 */ 85 */
86 err = switchdev_port_obj_add(dev, &v.obj);
87 if (err == -EOPNOTSUPP)
88 return vlan_vid_add(dev, br->vlan_proto, vid);
89 return err;
90}
49 91
50 if (ops->ndo_vlan_rx_add_vid) { 92static void __vlan_add_list(struct net_bridge_vlan *v)
51 err = vlan_vid_add(dev, br->vlan_proto, vid); 93{
52 } else { 94 struct net_bridge_vlan_group *vg;
53 struct switchdev_obj vlan_obj = { 95 struct list_head *headp, *hpos;
54 .id = SWITCHDEV_OBJ_PORT_VLAN, 96 struct net_bridge_vlan *vent;
55 .u.vlan = {
56 .flags = flags,
57 .vid_begin = vid,
58 .vid_end = vid,
59 },
60 };
61 97
62 err = switchdev_port_obj_add(dev, &vlan_obj); 98 if (br_vlan_is_master(v))
63 if (err == -EOPNOTSUPP) 99 vg = br_vlan_group(v->br);
64 err = 0; 100 else
101 vg = nbp_vlan_group(v->port);
102
103 headp = &vg->vlan_list;
104 list_for_each_prev(hpos, headp) {
105 vent = list_entry(hpos, struct net_bridge_vlan, vlist);
106 if (v->vid < vent->vid)
107 continue;
108 else
109 break;
65 } 110 }
111 list_add_rcu(&v->vlist, hpos);
112}
66 113
67 return err; 114static void __vlan_del_list(struct net_bridge_vlan *v)
115{
116 list_del_rcu(&v->vlist);
68} 117}
69 118
70static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) 119static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
120 u16 vid)
71{ 121{
72 struct net_bridge_port *p = NULL; 122 struct switchdev_obj_port_vlan v = {
73 struct net_bridge *br; 123 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
74 struct net_device *dev; 124 .vid_begin = vid,
125 .vid_end = vid,
126 };
75 int err; 127 int err;
76 128
77 if (test_bit(vid, v->vlan_bitmap)) { 129 /* Try switchdev op first. In case it is not supported, fallback to
78 __vlan_add_flags(v, vid, flags); 130 * 8021q del.
131 */
132 err = switchdev_port_obj_del(dev, &v.obj);
133 if (err == -EOPNOTSUPP) {
134 vlan_vid_del(dev, br->vlan_proto, vid);
79 return 0; 135 return 0;
80 } 136 }
137 return err;
138}
139
140/* Returns a master vlan, if it didn't exist it gets created. In all cases a
141 * a reference is taken to the master vlan before returning.
142 */
143static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid)
144{
145 struct net_bridge_vlan_group *vg;
146 struct net_bridge_vlan *masterv;
147
148 vg = br_vlan_group(br);
149 masterv = br_vlan_find(vg, vid);
150 if (!masterv) {
151 /* missing global ctx, create it now */
152 if (br_vlan_add(br, vid, 0))
153 return NULL;
154 masterv = br_vlan_find(vg, vid);
155 if (WARN_ON(!masterv))
156 return NULL;
157 }
158 atomic_inc(&masterv->refcnt);
159
160 return masterv;
161}
162
163static void br_vlan_put_master(struct net_bridge_vlan *masterv)
164{
165 struct net_bridge_vlan_group *vg;
166
167 if (!br_vlan_is_master(masterv))
168 return;
81 169
82 if (v->port_idx) { 170 vg = br_vlan_group(masterv->br);
83 p = v->parent.port; 171 if (atomic_dec_and_test(&masterv->refcnt)) {
172 rhashtable_remove_fast(&vg->vlan_hash,
173 &masterv->vnode, br_vlan_rht_params);
174 __vlan_del_list(masterv);
175 kfree_rcu(masterv, rcu);
176 }
177}
178
179/* This is the shared VLAN add function which works for both ports and bridge
180 * devices. There are four possible calls to this function in terms of the
181 * vlan entry type:
182 * 1. vlan is being added on a port (no master flags, global entry exists)
183 * 2. vlan is being added on a bridge (both master and brentry flags)
184 * 3. vlan is being added on a port, but a global entry didn't exist which
185 * is being created right now (master flag set, brentry flag unset), the
186 * global entry is used for global per-vlan features, but not for filtering
187 * 4. same as 3 but with both master and brentry flags set so the entry
188 * will be used for filtering in both the port and the bridge
189 */
190static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
191{
192 struct net_bridge_vlan *masterv = NULL;
193 struct net_bridge_port *p = NULL;
194 struct net_bridge_vlan_group *vg;
195 struct net_device *dev;
196 struct net_bridge *br;
197 int err;
198
199 if (br_vlan_is_master(v)) {
200 br = v->br;
201 dev = br->dev;
202 vg = br_vlan_group(br);
203 } else {
204 p = v->port;
84 br = p->br; 205 br = p->br;
85 dev = p->dev; 206 dev = p->dev;
86 } else { 207 vg = nbp_vlan_group(p);
87 br = v->parent.br;
88 dev = br->dev;
89 } 208 }
90 209
91 if (p) { 210 if (p) {
@@ -93,116 +212,140 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
93 * This ensures tagged traffic enters the bridge when 212 * This ensures tagged traffic enters the bridge when
94 * promiscuous mode is disabled by br_manage_promisc(). 213 * promiscuous mode is disabled by br_manage_promisc().
95 */ 214 */
96 err = __vlan_vid_add(dev, br, vid, flags); 215 err = __vlan_vid_add(dev, br, v->vid, flags);
97 if (err) 216 if (err)
98 return err; 217 goto out;
99 } 218
219 /* need to work on the master vlan too */
220 if (flags & BRIDGE_VLAN_INFO_MASTER) {
221 err = br_vlan_add(br, v->vid, flags |
222 BRIDGE_VLAN_INFO_BRENTRY);
223 if (err)
224 goto out_filt;
225 }
100 226
101 err = br_fdb_insert(br, p, dev->dev_addr, vid); 227 masterv = br_vlan_get_master(br, v->vid);
102 if (err) { 228 if (!masterv)
103 br_err(br, "failed insert local address into bridge " 229 goto out_filt;
104 "forwarding table\n"); 230 v->brvlan = masterv;
105 goto out_filt;
106 } 231 }
107 232
108 set_bit(vid, v->vlan_bitmap); 233 /* Add the dev mac and count the vlan only if it's usable */
109 v->num_vlans++; 234 if (br_vlan_should_use(v)) {
110 __vlan_add_flags(v, vid, flags); 235 err = br_fdb_insert(br, p, dev->dev_addr, v->vid);
236 if (err) {
237 br_err(br, "failed insert local address into bridge forwarding table\n");
238 goto out_filt;
239 }
240 vg->num_vlans++;
241 }
111 242
112 return 0; 243 err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode,
244 br_vlan_rht_params);
245 if (err)
246 goto out_fdb_insert;
113 247
114out_filt: 248 __vlan_add_list(v);
115 if (p) 249 __vlan_add_flags(v, flags);
116 vlan_vid_del(dev, br->vlan_proto, vid); 250out:
117 return err; 251 return err;
118}
119 252
120static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, 253out_fdb_insert:
121 u16 vid) 254 if (br_vlan_should_use(v)) {
122{ 255 br_fdb_find_delete_local(br, p, dev->dev_addr, v->vid);
123 const struct net_device_ops *ops = dev->netdev_ops; 256 vg->num_vlans--;
124 int err = 0; 257 }
125
126 /* If driver uses VLAN ndo ops, use 8021q to delete vid
127 * on device, otherwise try switchdev ops to delete vid.
128 */
129
130 if (ops->ndo_vlan_rx_kill_vid) {
131 vlan_vid_del(dev, br->vlan_proto, vid);
132 } else {
133 struct switchdev_obj vlan_obj = {
134 .id = SWITCHDEV_OBJ_PORT_VLAN,
135 .u.vlan = {
136 .vid_begin = vid,
137 .vid_end = vid,
138 },
139 };
140 258
141 err = switchdev_port_obj_del(dev, &vlan_obj); 259out_filt:
142 if (err == -EOPNOTSUPP) 260 if (p) {
143 err = 0; 261 __vlan_vid_del(dev, br, v->vid);
262 if (masterv) {
263 br_vlan_put_master(masterv);
264 v->brvlan = NULL;
265 }
144 } 266 }
145 267
146 return err; 268 goto out;
147} 269}
148 270
149static int __vlan_del(struct net_port_vlans *v, u16 vid) 271static int __vlan_del(struct net_bridge_vlan *v)
150{ 272{
151 if (!test_bit(vid, v->vlan_bitmap)) 273 struct net_bridge_vlan *masterv = v;
152 return -EINVAL; 274 struct net_bridge_vlan_group *vg;
153 275 struct net_bridge_port *p = NULL;
154 __vlan_delete_pvid(v, vid); 276 int err = 0;
155 clear_bit(vid, v->untagged_bitmap);
156 277
157 if (v->port_idx) { 278 if (br_vlan_is_master(v)) {
158 struct net_bridge_port *p = v->parent.port; 279 vg = br_vlan_group(v->br);
159 int err; 280 } else {
281 p = v->port;
282 vg = nbp_vlan_group(v->port);
283 masterv = v->brvlan;
284 }
160 285
161 err = __vlan_vid_del(p->dev, p->br, vid); 286 __vlan_delete_pvid(vg, v->vid);
287 if (p) {
288 err = __vlan_vid_del(p->dev, p->br, v->vid);
162 if (err) 289 if (err)
163 return err; 290 goto out;
164 } 291 }
165 292
166 clear_bit(vid, v->vlan_bitmap); 293 if (br_vlan_should_use(v)) {
167 v->num_vlans--; 294 v->flags &= ~BRIDGE_VLAN_INFO_BRENTRY;
168 if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) { 295 vg->num_vlans--;
169 if (v->port_idx) 296 }
170 RCU_INIT_POINTER(v->parent.port->vlan_info, NULL); 297
171 else 298 if (masterv != v) {
172 RCU_INIT_POINTER(v->parent.br->vlan_info, NULL); 299 rhashtable_remove_fast(&vg->vlan_hash, &v->vnode,
300 br_vlan_rht_params);
301 __vlan_del_list(v);
173 kfree_rcu(v, rcu); 302 kfree_rcu(v, rcu);
174 } 303 }
175 return 0; 304
305 br_vlan_put_master(masterv);
306out:
307 return err;
176} 308}
177 309
178static void __vlan_flush(struct net_port_vlans *v) 310static void __vlan_group_free(struct net_bridge_vlan_group *vg)
179{ 311{
180 smp_wmb(); 312 WARN_ON(!list_empty(&vg->vlan_list));
181 v->pvid = 0; 313 rhashtable_destroy(&vg->vlan_hash);
182 bitmap_zero(v->vlan_bitmap, VLAN_N_VID); 314 kfree(vg);
183 if (v->port_idx) 315}
184 RCU_INIT_POINTER(v->parent.port->vlan_info, NULL); 316
185 else 317static void __vlan_flush(struct net_bridge_vlan_group *vg)
186 RCU_INIT_POINTER(v->parent.br->vlan_info, NULL); 318{
187 kfree_rcu(v, rcu); 319 struct net_bridge_vlan *vlan, *tmp;
320
321 __vlan_delete_pvid(vg, vg->pvid);
322 list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist)
323 __vlan_del(vlan);
188} 324}
189 325
190struct sk_buff *br_handle_vlan(struct net_bridge *br, 326struct sk_buff *br_handle_vlan(struct net_bridge *br,
191 const struct net_port_vlans *pv, 327 struct net_bridge_vlan_group *vg,
192 struct sk_buff *skb) 328 struct sk_buff *skb)
193{ 329{
330 struct net_bridge_vlan *v;
194 u16 vid; 331 u16 vid;
195 332
196 /* If this packet was not filtered at input, let it pass */ 333 /* If this packet was not filtered at input, let it pass */
197 if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) 334 if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
198 goto out; 335 goto out;
199 336
200 /* Vlan filter table must be configured at this point. The 337 /* At this point, we know that the frame was filtered and contains
338 * a valid vlan id. If the vlan id has untagged flag set,
339 * send untagged; otherwise, send tagged.
340 */
341 br_vlan_get_tag(skb, &vid);
342 v = br_vlan_find(vg, vid);
343 /* Vlan entry must be configured at this point. The
201 * only exception is the bridge is set in promisc mode and the 344 * only exception is the bridge is set in promisc mode and the
202 * packet is destined for the bridge device. In this case 345 * packet is destined for the bridge device. In this case
203 * pass the packet as is. 346 * pass the packet as is.
204 */ 347 */
205 if (!pv) { 348 if (!v || !br_vlan_should_use(v)) {
206 if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { 349 if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) {
207 goto out; 350 goto out;
208 } else { 351 } else {
@@ -210,13 +353,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
210 return NULL; 353 return NULL;
211 } 354 }
212 } 355 }
213 356 if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
214 /* At this point, we know that the frame was filtered and contains
215 * a valid vlan id. If the vlan id is set in the untagged bitmap,
216 * send untagged; otherwise, send tagged.
217 */
218 br_vlan_get_tag(skb, &vid);
219 if (test_bit(vid, pv->untagged_bitmap))
220 skb->vlan_tci = 0; 357 skb->vlan_tci = 0;
221 358
222out: 359out:
@@ -224,29 +361,13 @@ out:
224} 361}
225 362
226/* Called under RCU */ 363/* Called under RCU */
227bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, 364static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
228 struct sk_buff *skb, u16 *vid) 365 struct sk_buff *skb, u16 *vid)
229{ 366{
367 const struct net_bridge_vlan *v;
230 bool tagged; 368 bool tagged;
231 __be16 proto;
232
233 /* If VLAN filtering is disabled on the bridge, all packets are
234 * permitted.
235 */
236 if (!br->vlan_enabled) {
237 BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
238 return true;
239 }
240
241 /* If there are no vlan in the permitted list, all packets are
242 * rejected.
243 */
244 if (!v)
245 goto drop;
246 369
247 BR_INPUT_SKB_CB(skb)->vlan_filtered = true; 370 BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
248 proto = br->vlan_proto;
249
250 /* If vlan tx offload is disabled on bridge device and frame was 371 /* If vlan tx offload is disabled on bridge device and frame was
251 * sent from vlan device on the bridge device, it does not have 372 * sent from vlan device on the bridge device, it does not have
252 * HW accelerated vlan tag. 373 * HW accelerated vlan tag.
@@ -281,7 +402,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
281 } 402 }
282 403
283 if (!*vid) { 404 if (!*vid) {
284 u16 pvid = br_get_pvid(v); 405 u16 pvid = br_get_pvid(vg);
285 406
286 /* Frame had a tag with VID 0 or did not have a tag. 407 /* Frame had a tag with VID 0 or did not have a tag.
287 * See if pvid is set on this port. That tells us which 408 * See if pvid is set on this port. That tells us which
@@ -309,29 +430,43 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
309 } 430 }
310 431
311 /* Frame had a valid vlan tag. See if vlan is allowed */ 432 /* Frame had a valid vlan tag. See if vlan is allowed */
312 if (test_bit(*vid, v->vlan_bitmap)) 433 v = br_vlan_find(vg, *vid);
434 if (v && br_vlan_should_use(v))
313 return true; 435 return true;
314drop: 436drop:
315 kfree_skb(skb); 437 kfree_skb(skb);
316 return false; 438 return false;
317} 439}
318 440
441bool br_allowed_ingress(const struct net_bridge *br,
442 struct net_bridge_vlan_group *vg, struct sk_buff *skb,
443 u16 *vid)
444{
445 /* If VLAN filtering is disabled on the bridge, all packets are
446 * permitted.
447 */
448 if (!br->vlan_enabled) {
449 BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
450 return true;
451 }
452
453 return __allowed_ingress(vg, br->vlan_proto, skb, vid);
454}
455
319/* Called under RCU. */ 456/* Called under RCU. */
320bool br_allowed_egress(struct net_bridge *br, 457bool br_allowed_egress(struct net_bridge_vlan_group *vg,
321 const struct net_port_vlans *v,
322 const struct sk_buff *skb) 458 const struct sk_buff *skb)
323{ 459{
460 const struct net_bridge_vlan *v;
324 u16 vid; 461 u16 vid;
325 462
326 /* If this packet was not filtered at input, let it pass */ 463 /* If this packet was not filtered at input, let it pass */
327 if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) 464 if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
328 return true; 465 return true;
329 466
330 if (!v)
331 return false;
332
333 br_vlan_get_tag(skb, &vid); 467 br_vlan_get_tag(skb, &vid);
334 if (test_bit(vid, v->vlan_bitmap)) 468 v = br_vlan_find(vg, vid);
469 if (v && br_vlan_should_use(v))
335 return true; 470 return true;
336 471
337 return false; 472 return false;
@@ -340,29 +475,29 @@ bool br_allowed_egress(struct net_bridge *br,
340/* Called under RCU */ 475/* Called under RCU */
341bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) 476bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
342{ 477{
478 struct net_bridge_vlan_group *vg;
343 struct net_bridge *br = p->br; 479 struct net_bridge *br = p->br;
344 struct net_port_vlans *v;
345 480
346 /* If filtering was disabled at input, let it pass. */ 481 /* If filtering was disabled at input, let it pass. */
347 if (!br->vlan_enabled) 482 if (!br->vlan_enabled)
348 return true; 483 return true;
349 484
350 v = rcu_dereference(p->vlan_info); 485 vg = nbp_vlan_group_rcu(p);
351 if (!v) 486 if (!vg || !vg->num_vlans)
352 return false; 487 return false;
353 488
354 if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto) 489 if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto)
355 *vid = 0; 490 *vid = 0;
356 491
357 if (!*vid) { 492 if (!*vid) {
358 *vid = br_get_pvid(v); 493 *vid = br_get_pvid(vg);
359 if (!*vid) 494 if (!*vid)
360 return false; 495 return false;
361 496
362 return true; 497 return true;
363 } 498 }
364 499
365 if (test_bit(*vid, v->vlan_bitmap)) 500 if (br_vlan_find(vg, *vid))
366 return true; 501 return true;
367 502
368 return false; 503 return false;
@@ -373,31 +508,49 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
373 */ 508 */
374int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) 509int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
375{ 510{
376 struct net_port_vlans *pv = NULL; 511 struct net_bridge_vlan_group *vg;
377 int err; 512 struct net_bridge_vlan *vlan;
513 int ret;
378 514
379 ASSERT_RTNL(); 515 ASSERT_RTNL();
380 516
381 pv = rtnl_dereference(br->vlan_info); 517 vg = br_vlan_group(br);
382 if (pv) 518 vlan = br_vlan_find(vg, vid);
383 return __vlan_add(pv, vid, flags); 519 if (vlan) {
520 if (!br_vlan_is_brentry(vlan)) {
521 /* Trying to change flags of non-existent bridge vlan */
522 if (!(flags & BRIDGE_VLAN_INFO_BRENTRY))
523 return -EINVAL;
524 /* It was only kept for port vlans, now make it real */
525 ret = br_fdb_insert(br, NULL, br->dev->dev_addr,
526 vlan->vid);
527 if (ret) {
528 br_err(br, "failed insert local address into bridge forwarding table\n");
529 return ret;
530 }
531 atomic_inc(&vlan->refcnt);
532 vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
533 vg->num_vlans++;
534 }
535 __vlan_add_flags(vlan, flags);
536 return 0;
537 }
384 538
385 /* Create port vlan infomration 539 vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
386 */ 540 if (!vlan)
387 pv = kzalloc(sizeof(*pv), GFP_KERNEL);
388 if (!pv)
389 return -ENOMEM; 541 return -ENOMEM;
390 542
391 pv->parent.br = br; 543 vlan->vid = vid;
392 err = __vlan_add(pv, vid, flags); 544 vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER;
393 if (err) 545 vlan->flags &= ~BRIDGE_VLAN_INFO_PVID;
394 goto out; 546 vlan->br = br;
547 if (flags & BRIDGE_VLAN_INFO_BRENTRY)
548 atomic_set(&vlan->refcnt, 1);
549 ret = __vlan_add(vlan, flags);
550 if (ret)
551 kfree(vlan);
395 552
396 rcu_assign_pointer(br->vlan_info, pv); 553 return ret;
397 return 0;
398out:
399 kfree(pv);
400 return err;
401} 554}
402 555
403/* Must be protected by RTNL. 556/* Must be protected by RTNL.
@@ -405,49 +558,41 @@ out:
405 */ 558 */
406int br_vlan_delete(struct net_bridge *br, u16 vid) 559int br_vlan_delete(struct net_bridge *br, u16 vid)
407{ 560{
408 struct net_port_vlans *pv; 561 struct net_bridge_vlan_group *vg;
562 struct net_bridge_vlan *v;
409 563
410 ASSERT_RTNL(); 564 ASSERT_RTNL();
411 565
412 pv = rtnl_dereference(br->vlan_info); 566 vg = br_vlan_group(br);
413 if (!pv) 567 v = br_vlan_find(vg, vid);
414 return -EINVAL; 568 if (!v || !br_vlan_is_brentry(v))
569 return -ENOENT;
415 570
416 br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid); 571 br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid);
572 br_fdb_delete_by_port(br, NULL, vid, 0);
417 573
418 __vlan_del(pv, vid); 574 return __vlan_del(v);
419 return 0;
420} 575}
421 576
422void br_vlan_flush(struct net_bridge *br) 577void br_vlan_flush(struct net_bridge *br)
423{ 578{
424 struct net_port_vlans *pv; 579 struct net_bridge_vlan_group *vg;
425 580
426 ASSERT_RTNL(); 581 ASSERT_RTNL();
427 pv = rtnl_dereference(br->vlan_info);
428 if (!pv)
429 return;
430 582
431 __vlan_flush(pv); 583 vg = br_vlan_group(br);
584 __vlan_flush(vg);
585 RCU_INIT_POINTER(br->vlgrp, NULL);
586 synchronize_rcu();
587 __vlan_group_free(vg);
432} 588}
433 589
434bool br_vlan_find(struct net_bridge *br, u16 vid) 590struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid)
435{ 591{
436 struct net_port_vlans *pv; 592 if (!vg)
437 bool found = false; 593 return NULL;
438
439 rcu_read_lock();
440 pv = rcu_dereference(br->vlan_info);
441
442 if (!pv)
443 goto out;
444
445 if (test_bit(vid, pv->vlan_bitmap))
446 found = true;
447 594
448out: 595 return br_vlan_lookup(&vg->vlan_hash, vid);
449 rcu_read_unlock();
450 return found;
451} 596}
452 597
453/* Must be protected by RTNL. */ 598/* Must be protected by RTNL. */
@@ -505,21 +650,18 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
505{ 650{
506 int err = 0; 651 int err = 0;
507 struct net_bridge_port *p; 652 struct net_bridge_port *p;
508 struct net_port_vlans *pv; 653 struct net_bridge_vlan *vlan;
654 struct net_bridge_vlan_group *vg;
509 __be16 oldproto; 655 __be16 oldproto;
510 u16 vid, errvid;
511 656
512 if (br->vlan_proto == proto) 657 if (br->vlan_proto == proto)
513 return 0; 658 return 0;
514 659
515 /* Add VLANs for the new proto to the device filter. */ 660 /* Add VLANs for the new proto to the device filter. */
516 list_for_each_entry(p, &br->port_list, list) { 661 list_for_each_entry(p, &br->port_list, list) {
517 pv = rtnl_dereference(p->vlan_info); 662 vg = nbp_vlan_group(p);
518 if (!pv) 663 list_for_each_entry(vlan, &vg->vlan_list, vlist) {
519 continue; 664 err = vlan_vid_add(p->dev, proto, vlan->vid);
520
521 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
522 err = vlan_vid_add(p->dev, proto, vid);
523 if (err) 665 if (err)
524 goto err_filt; 666 goto err_filt;
525 } 667 }
@@ -533,28 +675,21 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
533 675
534 /* Delete VLANs for the old proto from the device filter. */ 676 /* Delete VLANs for the old proto from the device filter. */
535 list_for_each_entry(p, &br->port_list, list) { 677 list_for_each_entry(p, &br->port_list, list) {
536 pv = rtnl_dereference(p->vlan_info); 678 vg = nbp_vlan_group(p);
537 if (!pv) 679 list_for_each_entry(vlan, &vg->vlan_list, vlist)
538 continue; 680 vlan_vid_del(p->dev, oldproto, vlan->vid);
539
540 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
541 vlan_vid_del(p->dev, oldproto, vid);
542 } 681 }
543 682
544 return 0; 683 return 0;
545 684
546err_filt: 685err_filt:
547 errvid = vid; 686 list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist)
548 for_each_set_bit(vid, pv->vlan_bitmap, errvid) 687 vlan_vid_del(p->dev, proto, vlan->vid);
549 vlan_vid_del(p->dev, proto, vid);
550 688
551 list_for_each_entry_continue_reverse(p, &br->port_list, list) { 689 list_for_each_entry_continue_reverse(p, &br->port_list, list) {
552 pv = rtnl_dereference(p->vlan_info); 690 vg = nbp_vlan_group(p);
553 if (!pv) 691 list_for_each_entry(vlan, &vg->vlan_list, vlist)
554 continue; 692 vlan_vid_del(p->dev, proto, vlan->vid);
555
556 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
557 vlan_vid_del(p->dev, proto, vid);
558 } 693 }
559 694
560 return err; 695 return err;
@@ -576,9 +711,19 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
576 return err; 711 return err;
577} 712}
578 713
579static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid) 714static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid)
580{ 715{
581 return pv && vid == pv->pvid && test_bit(vid, pv->untagged_bitmap); 716 struct net_bridge_vlan *v;
717
718 if (vid != vg->pvid)
719 return false;
720
721 v = br_vlan_lookup(&vg->vlan_hash, vid);
722 if (v && br_vlan_should_use(v) &&
723 (v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
724 return true;
725
726 return false;
582} 727}
583 728
584static void br_vlan_disable_default_pvid(struct net_bridge *br) 729static void br_vlan_disable_default_pvid(struct net_bridge *br)
@@ -589,24 +734,31 @@ static void br_vlan_disable_default_pvid(struct net_bridge *br)
589 /* Disable default_pvid on all ports where it is still 734 /* Disable default_pvid on all ports where it is still
590 * configured. 735 * configured.
591 */ 736 */
592 if (vlan_default_pvid(br_get_vlan_info(br), pvid)) 737 if (vlan_default_pvid(br_vlan_group(br), pvid))
593 br_vlan_delete(br, pvid); 738 br_vlan_delete(br, pvid);
594 739
595 list_for_each_entry(p, &br->port_list, list) { 740 list_for_each_entry(p, &br->port_list, list) {
596 if (vlan_default_pvid(nbp_get_vlan_info(p), pvid)) 741 if (vlan_default_pvid(nbp_vlan_group(p), pvid))
597 nbp_vlan_delete(p, pvid); 742 nbp_vlan_delete(p, pvid);
598 } 743 }
599 744
600 br->default_pvid = 0; 745 br->default_pvid = 0;
601} 746}
602 747
603static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) 748int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
604{ 749{
750 const struct net_bridge_vlan *pvent;
751 struct net_bridge_vlan_group *vg;
605 struct net_bridge_port *p; 752 struct net_bridge_port *p;
606 u16 old_pvid; 753 u16 old_pvid;
607 int err = 0; 754 int err = 0;
608 unsigned long *changed; 755 unsigned long *changed;
609 756
757 if (!pvid) {
758 br_vlan_disable_default_pvid(br);
759 return 0;
760 }
761
610 changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long), 762 changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
611 GFP_KERNEL); 763 GFP_KERNEL);
612 if (!changed) 764 if (!changed)
@@ -617,11 +769,14 @@ static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
617 /* Update default_pvid config only if we do not conflict with 769 /* Update default_pvid config only if we do not conflict with
618 * user configuration. 770 * user configuration.
619 */ 771 */
620 if ((!old_pvid || vlan_default_pvid(br_get_vlan_info(br), old_pvid)) && 772 vg = br_vlan_group(br);
621 !br_vlan_find(br, pvid)) { 773 pvent = br_vlan_find(vg, pvid);
774 if ((!old_pvid || vlan_default_pvid(vg, old_pvid)) &&
775 (!pvent || !br_vlan_should_use(pvent))) {
622 err = br_vlan_add(br, pvid, 776 err = br_vlan_add(br, pvid,
623 BRIDGE_VLAN_INFO_PVID | 777 BRIDGE_VLAN_INFO_PVID |
624 BRIDGE_VLAN_INFO_UNTAGGED); 778 BRIDGE_VLAN_INFO_UNTAGGED |
779 BRIDGE_VLAN_INFO_BRENTRY);
625 if (err) 780 if (err)
626 goto out; 781 goto out;
627 br_vlan_delete(br, old_pvid); 782 br_vlan_delete(br, old_pvid);
@@ -632,9 +787,10 @@ static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
632 /* Update default_pvid config only if we do not conflict with 787 /* Update default_pvid config only if we do not conflict with
633 * user configuration. 788 * user configuration.
634 */ 789 */
790 vg = nbp_vlan_group(p);
635 if ((old_pvid && 791 if ((old_pvid &&
636 !vlan_default_pvid(nbp_get_vlan_info(p), old_pvid)) || 792 !vlan_default_pvid(vg, old_pvid)) ||
637 nbp_vlan_find(p, pvid)) 793 br_vlan_find(vg, pvid))
638 continue; 794 continue;
639 795
640 err = nbp_vlan_add(p, pvid, 796 err = nbp_vlan_add(p, pvid,
@@ -668,7 +824,8 @@ err_port:
668 if (old_pvid) 824 if (old_pvid)
669 br_vlan_add(br, old_pvid, 825 br_vlan_add(br, old_pvid,
670 BRIDGE_VLAN_INFO_PVID | 826 BRIDGE_VLAN_INFO_PVID |
671 BRIDGE_VLAN_INFO_UNTAGGED); 827 BRIDGE_VLAN_INFO_UNTAGGED |
828 BRIDGE_VLAN_INFO_BRENTRY);
672 br_vlan_delete(br, pvid); 829 br_vlan_delete(br, pvid);
673 } 830 }
674 goto out; 831 goto out;
@@ -694,12 +851,7 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
694 err = -EPERM; 851 err = -EPERM;
695 goto unlock; 852 goto unlock;
696 } 853 }
697 854 err = __br_vlan_set_default_pvid(br, pvid);
698 if (!pvid)
699 br_vlan_disable_default_pvid(br);
700 else
701 err = __br_vlan_set_default_pvid(br, pvid);
702
703unlock: 855unlock:
704 rtnl_unlock(); 856 rtnl_unlock();
705 return err; 857 return err;
@@ -707,10 +859,68 @@ unlock:
707 859
708int br_vlan_init(struct net_bridge *br) 860int br_vlan_init(struct net_bridge *br)
709{ 861{
862 struct net_bridge_vlan_group *vg;
863 int ret = -ENOMEM;
864
865 vg = kzalloc(sizeof(*vg), GFP_KERNEL);
866 if (!vg)
867 goto out;
868 ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params);
869 if (ret)
870 goto err_rhtbl;
871 INIT_LIST_HEAD(&vg->vlan_list);
710 br->vlan_proto = htons(ETH_P_8021Q); 872 br->vlan_proto = htons(ETH_P_8021Q);
711 br->default_pvid = 1; 873 br->default_pvid = 1;
712 return br_vlan_add(br, 1, 874 rcu_assign_pointer(br->vlgrp, vg);
713 BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED); 875 ret = br_vlan_add(br, 1,
876 BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
877 BRIDGE_VLAN_INFO_BRENTRY);
878 if (ret)
879 goto err_vlan_add;
880
881out:
882 return ret;
883
884err_vlan_add:
885 rhashtable_destroy(&vg->vlan_hash);
886err_rhtbl:
887 kfree(vg);
888
889 goto out;
890}
891
892int nbp_vlan_init(struct net_bridge_port *p)
893{
894 struct net_bridge_vlan_group *vg;
895 int ret = -ENOMEM;
896
897 vg = kzalloc(sizeof(struct net_bridge_vlan_group), GFP_KERNEL);
898 if (!vg)
899 goto out;
900
901 ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params);
902 if (ret)
903 goto err_rhtbl;
904 INIT_LIST_HEAD(&vg->vlan_list);
905 rcu_assign_pointer(p->vlgrp, vg);
906 if (p->br->default_pvid) {
907 ret = nbp_vlan_add(p, p->br->default_pvid,
908 BRIDGE_VLAN_INFO_PVID |
909 BRIDGE_VLAN_INFO_UNTAGGED);
910 if (ret)
911 goto err_vlan_add;
912 }
913out:
914 return ret;
915
916err_vlan_add:
917 RCU_INIT_POINTER(p->vlgrp, NULL);
918 synchronize_rcu();
919 rhashtable_destroy(&vg->vlan_hash);
920err_rhtbl:
921 kfree(vg);
922
923 goto out;
714} 924}
715 925
716/* Must be protected by RTNL. 926/* Must be protected by RTNL.
@@ -718,35 +928,28 @@ int br_vlan_init(struct net_bridge *br)
718 */ 928 */
719int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) 929int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
720{ 930{
721 struct net_port_vlans *pv = NULL; 931 struct net_bridge_vlan *vlan;
722 int err; 932 int ret;
723 933
724 ASSERT_RTNL(); 934 ASSERT_RTNL();
725 935
726 pv = rtnl_dereference(port->vlan_info); 936 vlan = br_vlan_find(nbp_vlan_group(port), vid);
727 if (pv) 937 if (vlan) {
728 return __vlan_add(pv, vid, flags); 938 __vlan_add_flags(vlan, flags);
729 939 return 0;
730 /* Create port vlan infomration
731 */
732 pv = kzalloc(sizeof(*pv), GFP_KERNEL);
733 if (!pv) {
734 err = -ENOMEM;
735 goto clean_up;
736 } 940 }
737 941
738 pv->port_idx = port->port_no; 942 vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
739 pv->parent.port = port; 943 if (!vlan)
740 err = __vlan_add(pv, vid, flags); 944 return -ENOMEM;
741 if (err)
742 goto clean_up;
743 945
744 rcu_assign_pointer(port->vlan_info, pv); 946 vlan->vid = vid;
745 return 0; 947 vlan->port = port;
948 ret = __vlan_add(vlan, flags);
949 if (ret)
950 kfree(vlan);
746 951
747clean_up: 952 return ret;
748 kfree(pv);
749 return err;
750} 953}
751 954
752/* Must be protected by RTNL. 955/* Must be protected by RTNL.
@@ -754,61 +957,28 @@ clean_up:
754 */ 957 */
755int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) 958int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
756{ 959{
757 struct net_port_vlans *pv; 960 struct net_bridge_vlan *v;
758 961
759 ASSERT_RTNL(); 962 ASSERT_RTNL();
760 963
761 pv = rtnl_dereference(port->vlan_info); 964 v = br_vlan_find(nbp_vlan_group(port), vid);
762 if (!pv) 965 if (!v)
763 return -EINVAL; 966 return -ENOENT;
764
765 br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid); 967 br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
766 br_fdb_delete_by_port(port->br, port, vid, 0); 968 br_fdb_delete_by_port(port->br, port, vid, 0);
767 969
768 return __vlan_del(pv, vid); 970 return __vlan_del(v);
769} 971}
770 972
771void nbp_vlan_flush(struct net_bridge_port *port) 973void nbp_vlan_flush(struct net_bridge_port *port)
772{ 974{
773 struct net_port_vlans *pv; 975 struct net_bridge_vlan_group *vg;
774 u16 vid;
775 976
776 ASSERT_RTNL(); 977 ASSERT_RTNL();
777 978
778 pv = rtnl_dereference(port->vlan_info); 979 vg = nbp_vlan_group(port);
779 if (!pv) 980 __vlan_flush(vg);
780 return; 981 RCU_INIT_POINTER(port->vlgrp, NULL);
781 982 synchronize_rcu();
782 for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) 983 __vlan_group_free(vg);
783 vlan_vid_del(port->dev, port->br->vlan_proto, vid);
784
785 __vlan_flush(pv);
786}
787
788bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
789{
790 struct net_port_vlans *pv;
791 bool found = false;
792
793 rcu_read_lock();
794 pv = rcu_dereference(port->vlan_info);
795
796 if (!pv)
797 goto out;
798
799 if (test_bit(vid, pv->vlan_bitmap))
800 found = true;
801
802out:
803 rcu_read_unlock();
804 return found;
805}
806
807int nbp_vlan_init(struct net_bridge_port *p)
808{
809 return p->br->default_pvid ?
810 nbp_vlan_add(p, p->br->default_pvid,
811 BRIDGE_VLAN_INFO_PVID |
812 BRIDGE_VLAN_INFO_UNTAGGED) :
813 0;
814} 984}
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 17f2e4bc2a29..0ad639a96142 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -180,7 +180,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
180{ 180{
181 const struct ebt_log_info *info = par->targinfo; 181 const struct ebt_log_info *info = par->targinfo;
182 struct nf_loginfo li; 182 struct nf_loginfo li;
183 struct net *net = dev_net(par->in ? par->in : par->out); 183 struct net *net = par->net;
184 184
185 li.type = NF_LOG_TYPE_LOG; 185 li.type = NF_LOG_TYPE_LOG;
186 li.u.log.level = info->loglevel; 186 li.u.log.level = info->loglevel;
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 59ac7952010d..54816150608e 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,7 +24,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
24{ 24{
25 const struct ebt_nflog_info *info = par->targinfo; 25 const struct ebt_nflog_info *info = par->targinfo;
26 struct nf_loginfo li; 26 struct nf_loginfo li;
27 struct net *net = dev_net(par->in ? par->in : par->out); 27 struct net *net = par->net;
28 28
29 li.type = NF_LOG_TYPE_ULOG; 29 li.type = NF_LOG_TYPE_ULOG;
30 li.u.ulog.copy_len = info->len; 30 li.u.ulog.copy_len = info->len;
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d2cdf5d6e98c..ec94c6f1ae88 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -50,10 +50,14 @@ static const struct ebt_table broute_table = {
50 50
51static int ebt_broute(struct sk_buff *skb) 51static int ebt_broute(struct sk_buff *skb)
52{ 52{
53 struct nf_hook_state state;
53 int ret; 54 int ret;
54 55
55 ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, 56 nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN,
56 dev_net(skb->dev)->xt.broute_table); 57 NFPROTO_BRIDGE, skb->dev, NULL, NULL,
58 dev_net(skb->dev), NULL);
59
60 ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
57 if (ret == NF_DROP) 61 if (ret == NF_DROP)
58 return 1; /* route it */ 62 return 1; /* route it */
59 return 0; /* bridge it */ 63 return 0; /* bridge it */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 8a3f63b2e807..32eccd101f26 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -57,39 +57,34 @@ static const struct ebt_table frame_filter = {
57}; 57};
58 58
59static unsigned int 59static unsigned int
60ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 60ebt_in_hook(void *priv, struct sk_buff *skb,
61 const struct nf_hook_state *state) 61 const struct nf_hook_state *state)
62{ 62{
63 return ebt_do_table(ops->hooknum, skb, state->in, state->out, 63 return ebt_do_table(skb, state, state->net->xt.frame_filter);
64 dev_net(state->in)->xt.frame_filter);
65} 64}
66 65
67static unsigned int 66static unsigned int
68ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 67ebt_out_hook(void *priv, struct sk_buff *skb,
69 const struct nf_hook_state *state) 68 const struct nf_hook_state *state)
70{ 69{
71 return ebt_do_table(ops->hooknum, skb, state->in, state->out, 70 return ebt_do_table(skb, state, state->net->xt.frame_filter);
72 dev_net(state->out)->xt.frame_filter);
73} 71}
74 72
75static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { 73static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
76 { 74 {
77 .hook = ebt_in_hook, 75 .hook = ebt_in_hook,
78 .owner = THIS_MODULE,
79 .pf = NFPROTO_BRIDGE, 76 .pf = NFPROTO_BRIDGE,
80 .hooknum = NF_BR_LOCAL_IN, 77 .hooknum = NF_BR_LOCAL_IN,
81 .priority = NF_BR_PRI_FILTER_BRIDGED, 78 .priority = NF_BR_PRI_FILTER_BRIDGED,
82 }, 79 },
83 { 80 {
84 .hook = ebt_in_hook, 81 .hook = ebt_in_hook,
85 .owner = THIS_MODULE,
86 .pf = NFPROTO_BRIDGE, 82 .pf = NFPROTO_BRIDGE,
87 .hooknum = NF_BR_FORWARD, 83 .hooknum = NF_BR_FORWARD,
88 .priority = NF_BR_PRI_FILTER_BRIDGED, 84 .priority = NF_BR_PRI_FILTER_BRIDGED,
89 }, 85 },
90 { 86 {
91 .hook = ebt_out_hook, 87 .hook = ebt_out_hook,
92 .owner = THIS_MODULE,
93 .pf = NFPROTO_BRIDGE, 88 .pf = NFPROTO_BRIDGE,
94 .hooknum = NF_BR_LOCAL_OUT, 89 .hooknum = NF_BR_LOCAL_OUT,
95 .priority = NF_BR_PRI_FILTER_OTHER, 90 .priority = NF_BR_PRI_FILTER_OTHER,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index c5ef5b1ab678..ec55358f00c8 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -57,39 +57,34 @@ static struct ebt_table frame_nat = {
57}; 57};
58 58
59static unsigned int 59static unsigned int
60ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, 60ebt_nat_in(void *priv, struct sk_buff *skb,
61 const struct nf_hook_state *state) 61 const struct nf_hook_state *state)
62{ 62{
63 return ebt_do_table(ops->hooknum, skb, state->in, state->out, 63 return ebt_do_table(skb, state, state->net->xt.frame_nat);
64 dev_net(state->in)->xt.frame_nat);
65} 64}
66 65
67static unsigned int 66static unsigned int
68ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, 67ebt_nat_out(void *priv, struct sk_buff *skb,
69 const struct nf_hook_state *state) 68 const struct nf_hook_state *state)
70{ 69{
71 return ebt_do_table(ops->hooknum, skb, state->in, state->out, 70 return ebt_do_table(skb, state, state->net->xt.frame_nat);
72 dev_net(state->out)->xt.frame_nat);
73} 71}
74 72
75static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { 73static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
76 { 74 {
77 .hook = ebt_nat_out, 75 .hook = ebt_nat_out,
78 .owner = THIS_MODULE,
79 .pf = NFPROTO_BRIDGE, 76 .pf = NFPROTO_BRIDGE,
80 .hooknum = NF_BR_LOCAL_OUT, 77 .hooknum = NF_BR_LOCAL_OUT,
81 .priority = NF_BR_PRI_NAT_DST_OTHER, 78 .priority = NF_BR_PRI_NAT_DST_OTHER,
82 }, 79 },
83 { 80 {
84 .hook = ebt_nat_out, 81 .hook = ebt_nat_out,
85 .owner = THIS_MODULE,
86 .pf = NFPROTO_BRIDGE, 82 .pf = NFPROTO_BRIDGE,
87 .hooknum = NF_BR_POST_ROUTING, 83 .hooknum = NF_BR_POST_ROUTING,
88 .priority = NF_BR_PRI_NAT_SRC, 84 .priority = NF_BR_PRI_NAT_SRC,
89 }, 85 },
90 { 86 {
91 .hook = ebt_nat_in, 87 .hook = ebt_nat_in,
92 .owner = THIS_MODULE,
93 .pf = NFPROTO_BRIDGE, 88 .pf = NFPROTO_BRIDGE,
94 .hooknum = NF_BR_PRE_ROUTING, 89 .hooknum = NF_BR_PRE_ROUTING,
95 .priority = NF_BR_PRI_NAT_DST_BRIDGED, 90 .priority = NF_BR_PRI_NAT_DST_BRIDGED,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 48b6b01295de..f46ca417bf2d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -183,10 +183,11 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
183} 183}
184 184
185/* Do some firewalling */ 185/* Do some firewalling */
186unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, 186unsigned int ebt_do_table(struct sk_buff *skb,
187 const struct net_device *in, const struct net_device *out, 187 const struct nf_hook_state *state,
188 struct ebt_table *table) 188 struct ebt_table *table)
189{ 189{
190 unsigned int hook = state->hook;
190 int i, nentries; 191 int i, nentries;
191 struct ebt_entry *point; 192 struct ebt_entry *point;
192 struct ebt_counter *counter_base, *cb_base; 193 struct ebt_counter *counter_base, *cb_base;
@@ -199,8 +200,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
199 struct xt_action_param acpar; 200 struct xt_action_param acpar;
200 201
201 acpar.family = NFPROTO_BRIDGE; 202 acpar.family = NFPROTO_BRIDGE;
202 acpar.in = in; 203 acpar.net = state->net;
203 acpar.out = out; 204 acpar.in = state->in;
205 acpar.out = state->out;
204 acpar.hotdrop = false; 206 acpar.hotdrop = false;
205 acpar.hooknum = hook; 207 acpar.hooknum = hook;
206 208
@@ -220,7 +222,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
220 base = private->entries; 222 base = private->entries;
221 i = 0; 223 i = 0;
222 while (i < nentries) { 224 while (i < nentries) {
223 if (ebt_basic_match(point, skb, in, out)) 225 if (ebt_basic_match(point, skb, state->in, state->out))
224 goto letscontinue; 226 goto letscontinue;
225 227
226 if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) 228 if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index a343e62442b1..62f6b1b19589 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -65,31 +65,29 @@ int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
65EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate); 65EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate);
66 66
67static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, 67static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
68 const struct nf_hook_ops *ops,
69 struct sk_buff *skb, 68 struct sk_buff *skb,
70 const struct nf_hook_state *state) 69 const struct nf_hook_state *state)
71{ 70{
72 if (nft_bridge_iphdr_validate(skb)) 71 if (nft_bridge_iphdr_validate(skb))
73 nft_set_pktinfo_ipv4(pkt, ops, skb, state); 72 nft_set_pktinfo_ipv4(pkt, skb, state);
74 else 73 else
75 nft_set_pktinfo(pkt, ops, skb, state); 74 nft_set_pktinfo(pkt, skb, state);
76} 75}
77 76
78static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, 77static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
79 const struct nf_hook_ops *ops,
80 struct sk_buff *skb, 78 struct sk_buff *skb,
81 const struct nf_hook_state *state) 79 const struct nf_hook_state *state)
82{ 80{
83#if IS_ENABLED(CONFIG_IPV6) 81#if IS_ENABLED(CONFIG_IPV6)
84 if (nft_bridge_ip6hdr_validate(skb) && 82 if (nft_bridge_ip6hdr_validate(skb) &&
85 nft_set_pktinfo_ipv6(pkt, ops, skb, state) == 0) 83 nft_set_pktinfo_ipv6(pkt, skb, state) == 0)
86 return; 84 return;
87#endif 85#endif
88 nft_set_pktinfo(pkt, ops, skb, state); 86 nft_set_pktinfo(pkt, skb, state);
89} 87}
90 88
91static unsigned int 89static unsigned int
92nft_do_chain_bridge(const struct nf_hook_ops *ops, 90nft_do_chain_bridge(void *priv,
93 struct sk_buff *skb, 91 struct sk_buff *skb,
94 const struct nf_hook_state *state) 92 const struct nf_hook_state *state)
95{ 93{
@@ -97,17 +95,17 @@ nft_do_chain_bridge(const struct nf_hook_ops *ops,
97 95
98 switch (eth_hdr(skb)->h_proto) { 96 switch (eth_hdr(skb)->h_proto) {
99 case htons(ETH_P_IP): 97 case htons(ETH_P_IP):
100 nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state); 98 nft_bridge_set_pktinfo_ipv4(&pkt, skb, state);
101 break; 99 break;
102 case htons(ETH_P_IPV6): 100 case htons(ETH_P_IPV6):
103 nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state); 101 nft_bridge_set_pktinfo_ipv6(&pkt, skb, state);
104 break; 102 break;
105 default: 103 default:
106 nft_set_pktinfo(&pkt, ops, skb, state); 104 nft_set_pktinfo(&pkt, skb, state);
107 break; 105 break;
108 } 106 }
109 107
110 return nft_do_chain(&pkt, ops); 108 return nft_do_chain(&pkt, priv);
111} 109}
112 110
113static struct nft_af_info nft_af_bridge __read_mostly = { 111static struct nft_af_info nft_af_bridge __read_mostly = {
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 858d848564ee..fdba3d9fbff3 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -261,7 +261,6 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
261 const struct nft_pktinfo *pkt) 261 const struct nft_pktinfo *pkt)
262{ 262{
263 struct nft_reject *priv = nft_expr_priv(expr); 263 struct nft_reject *priv = nft_expr_priv(expr);
264 struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
265 const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; 264 const unsigned char *dest = eth_hdr(pkt->skb)->h_dest;
266 265
267 if (is_broadcast_ether_addr(dest) || 266 if (is_broadcast_ether_addr(dest) ||
@@ -273,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
273 switch (priv->type) { 272 switch (priv->type) {
274 case NFT_REJECT_ICMP_UNREACH: 273 case NFT_REJECT_ICMP_UNREACH:
275 nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, 274 nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
276 pkt->ops->hooknum, 275 pkt->hook,
277 priv->icmp_code); 276 priv->icmp_code);
278 break; 277 break;
279 case NFT_REJECT_TCP_RST: 278 case NFT_REJECT_TCP_RST:
280 nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, 279 nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in,
281 pkt->ops->hooknum); 280 pkt->hook);
282 break; 281 break;
283 case NFT_REJECT_ICMPX_UNREACH: 282 case NFT_REJECT_ICMPX_UNREACH:
284 nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, 283 nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
285 pkt->ops->hooknum, 284 pkt->hook,
286 nft_reject_icmp_code(priv->icmp_code)); 285 nft_reject_icmp_code(priv->icmp_code));
287 break; 286 break;
288 } 287 }
@@ -290,17 +289,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
290 case htons(ETH_P_IPV6): 289 case htons(ETH_P_IPV6):
291 switch (priv->type) { 290 switch (priv->type) {
292 case NFT_REJECT_ICMP_UNREACH: 291 case NFT_REJECT_ICMP_UNREACH:
293 nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, 292 nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
294 pkt->ops->hooknum, 293 pkt->in, pkt->hook,
295 priv->icmp_code); 294 priv->icmp_code);
296 break; 295 break;
297 case NFT_REJECT_TCP_RST: 296 case NFT_REJECT_TCP_RST:
298 nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in, 297 nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb,
299 pkt->ops->hooknum); 298 pkt->in, pkt->hook);
300 break; 299 break;
301 case NFT_REJECT_ICMPX_UNREACH: 300 case NFT_REJECT_ICMPX_UNREACH:
302 nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, 301 nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
303 pkt->ops->hooknum, 302 pkt->in, pkt->hook,
304 nft_reject_icmpv6_code(priv->icmp_code)); 303 nft_reject_icmpv6_code(priv->icmp_code));
305 break; 304 break;
306 } 305 }
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a1ba6875c2a2..6863310d6973 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -96,7 +96,7 @@ struct bcm_op {
96 canid_t can_id; 96 canid_t can_id;
97 u32 flags; 97 u32 flags;
98 unsigned long frames_abs, frames_filtered; 98 unsigned long frames_abs, frames_filtered;
99 struct timeval ival1, ival2; 99 struct bcm_timeval ival1, ival2;
100 struct hrtimer timer, thrtimer; 100 struct hrtimer timer, thrtimer;
101 struct tasklet_struct tsklet, thrtsklet; 101 struct tasklet_struct tsklet, thrtsklet;
102 ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; 102 ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
@@ -131,6 +131,11 @@ static inline struct bcm_sock *bcm_sk(const struct sock *sk)
131 return (struct bcm_sock *)sk; 131 return (struct bcm_sock *)sk;
132} 132}
133 133
134static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
135{
136 return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
137}
138
134#define CFSIZ sizeof(struct can_frame) 139#define CFSIZ sizeof(struct can_frame)
135#define OPSIZ sizeof(struct bcm_op) 140#define OPSIZ sizeof(struct bcm_op)
136#define MHSIZ sizeof(struct bcm_msg_head) 141#define MHSIZ sizeof(struct bcm_msg_head)
@@ -953,8 +958,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
953 op->count = msg_head->count; 958 op->count = msg_head->count;
954 op->ival1 = msg_head->ival1; 959 op->ival1 = msg_head->ival1;
955 op->ival2 = msg_head->ival2; 960 op->ival2 = msg_head->ival2;
956 op->kt_ival1 = timeval_to_ktime(msg_head->ival1); 961 op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
957 op->kt_ival2 = timeval_to_ktime(msg_head->ival2); 962 op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
958 963
959 /* disable an active timer due to zero values? */ 964 /* disable an active timer due to zero values? */
960 if (!op->kt_ival1.tv64 && !op->kt_ival2.tv64) 965 if (!op->kt_ival1.tv64 && !op->kt_ival2.tv64)
@@ -1134,8 +1139,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
1134 /* set timer value */ 1139 /* set timer value */
1135 op->ival1 = msg_head->ival1; 1140 op->ival1 = msg_head->ival1;
1136 op->ival2 = msg_head->ival2; 1141 op->ival2 = msg_head->ival2;
1137 op->kt_ival1 = timeval_to_ktime(msg_head->ival1); 1142 op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
1138 op->kt_ival2 = timeval_to_ktime(msg_head->ival2); 1143 op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
1139 1144
1140 /* disable an active timer due to zero value? */ 1145 /* disable an active timer due to zero value? */
1141 if (!op->kt_ival1.tv64) 1146 if (!op->kt_ival1.tv64)
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index ba6eb17226da..10d87753ed87 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/ceph/decode.h> 9#include <linux/ceph/decode.h>
10#include <linux/ceph/auth.h> 10#include <linux/ceph/auth.h>
11#include <linux/ceph/libceph.h>
11#include <linux/ceph/messenger.h> 12#include <linux/ceph/messenger.h>
12 13
13#include "crypto.h" 14#include "crypto.h"
@@ -279,6 +280,15 @@ bad:
279 return -EINVAL; 280 return -EINVAL;
280} 281}
281 282
283static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
284{
285 ceph_crypto_key_destroy(&au->session_key);
286 if (au->buf) {
287 ceph_buffer_put(au->buf);
288 au->buf = NULL;
289 }
290}
291
282static int ceph_x_build_authorizer(struct ceph_auth_client *ac, 292static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
283 struct ceph_x_ticket_handler *th, 293 struct ceph_x_ticket_handler *th,
284 struct ceph_x_authorizer *au) 294 struct ceph_x_authorizer *au)
@@ -297,7 +307,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
297 ceph_crypto_key_destroy(&au->session_key); 307 ceph_crypto_key_destroy(&au->session_key);
298 ret = ceph_crypto_key_clone(&au->session_key, &th->session_key); 308 ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
299 if (ret) 309 if (ret)
300 return ret; 310 goto out_au;
301 311
302 maxlen = sizeof(*msg_a) + sizeof(msg_b) + 312 maxlen = sizeof(*msg_a) + sizeof(msg_b) +
303 ceph_x_encrypt_buflen(ticket_blob_len); 313 ceph_x_encrypt_buflen(ticket_blob_len);
@@ -309,8 +319,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
309 if (!au->buf) { 319 if (!au->buf) {
310 au->buf = ceph_buffer_new(maxlen, GFP_NOFS); 320 au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
311 if (!au->buf) { 321 if (!au->buf) {
312 ceph_crypto_key_destroy(&au->session_key); 322 ret = -ENOMEM;
313 return -ENOMEM; 323 goto out_au;
314 } 324 }
315 } 325 }
316 au->service = th->service; 326 au->service = th->service;
@@ -340,7 +350,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
340 ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b), 350 ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
341 p, end - p); 351 p, end - p);
342 if (ret < 0) 352 if (ret < 0)
343 goto out_buf; 353 goto out_au;
344 p += ret; 354 p += ret;
345 au->buf->vec.iov_len = p - au->buf->vec.iov_base; 355 au->buf->vec.iov_len = p - au->buf->vec.iov_base;
346 dout(" built authorizer nonce %llx len %d\n", au->nonce, 356 dout(" built authorizer nonce %llx len %d\n", au->nonce,
@@ -348,9 +358,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
348 BUG_ON(au->buf->vec.iov_len > maxlen); 358 BUG_ON(au->buf->vec.iov_len > maxlen);
349 return 0; 359 return 0;
350 360
351out_buf: 361out_au:
352 ceph_buffer_put(au->buf); 362 ceph_x_authorizer_cleanup(au);
353 au->buf = NULL;
354 return ret; 363 return ret;
355} 364}
356 365
@@ -624,8 +633,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
624{ 633{
625 struct ceph_x_authorizer *au = (void *)a; 634 struct ceph_x_authorizer *au = (void *)a;
626 635
627 ceph_crypto_key_destroy(&au->session_key); 636 ceph_x_authorizer_cleanup(au);
628 ceph_buffer_put(au->buf);
629 kfree(au); 637 kfree(au);
630} 638}
631 639
@@ -653,8 +661,7 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
653 remove_ticket_handler(ac, th); 661 remove_ticket_handler(ac, th);
654 } 662 }
655 663
656 if (xi->auth_authorizer.buf) 664 ceph_x_authorizer_cleanup(&xi->auth_authorizer);
657 ceph_buffer_put(xi->auth_authorizer.buf);
658 665
659 kfree(ac->private); 666 kfree(ac->private);
660 ac->private = NULL; 667 ac->private = NULL;
@@ -691,8 +698,10 @@ static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
691 struct ceph_msg *msg) 698 struct ceph_msg *msg)
692{ 699{
693 int ret; 700 int ret;
694 if (!auth->authorizer) 701
702 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
695 return 0; 703 return 0;
704
696 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, 705 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
697 msg, &msg->footer.sig); 706 msg, &msg->footer.sig);
698 if (ret < 0) 707 if (ret < 0)
@@ -707,8 +716,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
707 __le64 sig_check; 716 __le64 sig_check;
708 int ret; 717 int ret;
709 718
710 if (!auth->authorizer) 719 if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
711 return 0; 720 return 0;
721
712 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, 722 ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
713 msg, &sig_check); 723 msg, &sig_check);
714 if (ret < 0) 724 if (ret < 0)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 54a00d66509e..bcbec33c6a14 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -245,6 +245,8 @@ enum {
245 Opt_nocrc, 245 Opt_nocrc,
246 Opt_cephx_require_signatures, 246 Opt_cephx_require_signatures,
247 Opt_nocephx_require_signatures, 247 Opt_nocephx_require_signatures,
248 Opt_cephx_sign_messages,
249 Opt_nocephx_sign_messages,
248 Opt_tcp_nodelay, 250 Opt_tcp_nodelay,
249 Opt_notcp_nodelay, 251 Opt_notcp_nodelay,
250}; 252};
@@ -267,6 +269,8 @@ static match_table_t opt_tokens = {
267 {Opt_nocrc, "nocrc"}, 269 {Opt_nocrc, "nocrc"},
268 {Opt_cephx_require_signatures, "cephx_require_signatures"}, 270 {Opt_cephx_require_signatures, "cephx_require_signatures"},
269 {Opt_nocephx_require_signatures, "nocephx_require_signatures"}, 271 {Opt_nocephx_require_signatures, "nocephx_require_signatures"},
272 {Opt_cephx_sign_messages, "cephx_sign_messages"},
273 {Opt_nocephx_sign_messages, "nocephx_sign_messages"},
270 {Opt_tcp_nodelay, "tcp_nodelay"}, 274 {Opt_tcp_nodelay, "tcp_nodelay"},
271 {Opt_notcp_nodelay, "notcp_nodelay"}, 275 {Opt_notcp_nodelay, "notcp_nodelay"},
272 {-1, NULL} 276 {-1, NULL}
@@ -318,7 +322,7 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
318 goto out; 322 goto out;
319 } 323 }
320 324
321 ckey = ukey->payload.data; 325 ckey = ukey->payload.data[0];
322 err = ceph_crypto_key_clone(dst, ckey); 326 err = ceph_crypto_key_clone(dst, ckey);
323 if (err) 327 if (err)
324 goto out_key; 328 goto out_key;
@@ -491,6 +495,12 @@ ceph_parse_options(char *options, const char *dev_name,
491 case Opt_nocephx_require_signatures: 495 case Opt_nocephx_require_signatures:
492 opt->flags |= CEPH_OPT_NOMSGAUTH; 496 opt->flags |= CEPH_OPT_NOMSGAUTH;
493 break; 497 break;
498 case Opt_cephx_sign_messages:
499 opt->flags &= ~CEPH_OPT_NOMSGSIGN;
500 break;
501 case Opt_nocephx_sign_messages:
502 opt->flags |= CEPH_OPT_NOMSGSIGN;
503 break;
494 504
495 case Opt_tcp_nodelay: 505 case Opt_tcp_nodelay:
496 opt->flags |= CEPH_OPT_TCP_NODELAY; 506 opt->flags |= CEPH_OPT_TCP_NODELAY;
@@ -534,6 +544,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
534 seq_puts(m, "nocrc,"); 544 seq_puts(m, "nocrc,");
535 if (opt->flags & CEPH_OPT_NOMSGAUTH) 545 if (opt->flags & CEPH_OPT_NOMSGAUTH)
536 seq_puts(m, "nocephx_require_signatures,"); 546 seq_puts(m, "nocephx_require_signatures,");
547 if (opt->flags & CEPH_OPT_NOMSGSIGN)
548 seq_puts(m, "nocephx_sign_messages,");
537 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) 549 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
538 seq_puts(m, "notcp_nodelay,"); 550 seq_puts(m, "notcp_nodelay,");
539 551
@@ -596,11 +608,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
596 if (ceph_test_opt(client, MYIP)) 608 if (ceph_test_opt(client, MYIP))
597 myaddr = &client->options->my_addr; 609 myaddr = &client->options->my_addr;
598 610
599 ceph_messenger_init(&client->msgr, myaddr, 611 ceph_messenger_init(&client->msgr, myaddr);
600 client->supported_features,
601 client->required_features,
602 ceph_test_opt(client, NOCRC),
603 ceph_test_opt(client, TCP_NODELAY));
604 612
605 /* subsystems */ 613 /* subsystems */
606 err = ceph_monc_init(&client->monc, client); 614 err = ceph_monc_init(&client->monc, client);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 4440edcce0d6..42e8649c6e79 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -537,7 +537,7 @@ static int ceph_key_preparse(struct key_preparsed_payload *prep)
537 if (ret < 0) 537 if (ret < 0)
538 goto err_ckey; 538 goto err_ckey;
539 539
540 prep->payload[0] = ckey; 540 prep->payload.data[0] = ckey;
541 prep->quotalen = datalen; 541 prep->quotalen = datalen;
542 return 0; 542 return 0;
543 543
@@ -549,14 +549,14 @@ err:
549 549
550static void ceph_key_free_preparse(struct key_preparsed_payload *prep) 550static void ceph_key_free_preparse(struct key_preparsed_payload *prep)
551{ 551{
552 struct ceph_crypto_key *ckey = prep->payload[0]; 552 struct ceph_crypto_key *ckey = prep->payload.data[0];
553 ceph_crypto_key_destroy(ckey); 553 ceph_crypto_key_destroy(ckey);
554 kfree(ckey); 554 kfree(ckey);
555} 555}
556 556
557static void ceph_key_destroy(struct key *key) 557static void ceph_key_destroy(struct key *key)
558{ 558{
559 struct ceph_crypto_key *ckey = key->payload.data; 559 struct ceph_crypto_key *ckey = key->payload.data[0];
560 560
561 ceph_crypto_key_destroy(ckey); 561 ceph_crypto_key_destroy(ckey);
562 kfree(ckey); 562 kfree(ckey);
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index d1498224c49d..2e9cab09f37b 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -16,8 +16,10 @@ struct ceph_crypto_key {
16 16
17static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) 17static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
18{ 18{
19 if (key) 19 if (key) {
20 kfree(key->key); 20 kfree(key->key);
21 key->key = NULL;
22 }
21} 23}
22 24
23int ceph_crypto_key_clone(struct ceph_crypto_key *dst, 25int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b9b0e3b5da49..9981039ef4ff 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -509,7 +509,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
509 return ret; 509 return ret;
510 } 510 }
511 511
512 if (con->msgr->tcp_nodelay) { 512 if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
513 int optval = 1; 513 int optval = 1;
514 514
515 ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, 515 ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
@@ -637,9 +637,6 @@ static int con_close_socket(struct ceph_connection *con)
637static void ceph_msg_remove(struct ceph_msg *msg) 637static void ceph_msg_remove(struct ceph_msg *msg)
638{ 638{
639 list_del_init(&msg->list_head); 639 list_del_init(&msg->list_head);
640 BUG_ON(msg->con == NULL);
641 msg->con->ops->put(msg->con);
642 msg->con = NULL;
643 640
644 ceph_msg_put(msg); 641 ceph_msg_put(msg);
645} 642}
@@ -662,15 +659,14 @@ static void reset_connection(struct ceph_connection *con)
662 659
663 if (con->in_msg) { 660 if (con->in_msg) {
664 BUG_ON(con->in_msg->con != con); 661 BUG_ON(con->in_msg->con != con);
665 con->in_msg->con = NULL;
666 ceph_msg_put(con->in_msg); 662 ceph_msg_put(con->in_msg);
667 con->in_msg = NULL; 663 con->in_msg = NULL;
668 con->ops->put(con);
669 } 664 }
670 665
671 con->connect_seq = 0; 666 con->connect_seq = 0;
672 con->out_seq = 0; 667 con->out_seq = 0;
673 if (con->out_msg) { 668 if (con->out_msg) {
669 BUG_ON(con->out_msg->con != con);
674 ceph_msg_put(con->out_msg); 670 ceph_msg_put(con->out_msg);
675 con->out_msg = NULL; 671 con->out_msg = NULL;
676 } 672 }
@@ -1205,7 +1201,7 @@ static void prepare_write_message_footer(struct ceph_connection *con)
1205 con->out_kvec[v].iov_base = &m->footer; 1201 con->out_kvec[v].iov_base = &m->footer;
1206 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { 1202 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
1207 if (con->ops->sign_message) 1203 if (con->ops->sign_message)
1208 con->ops->sign_message(con, m); 1204 con->ops->sign_message(m);
1209 else 1205 else
1210 m->footer.sig = 0; 1206 m->footer.sig = 0;
1211 con->out_kvec[v].iov_len = sizeof(m->footer); 1207 con->out_kvec[v].iov_len = sizeof(m->footer);
@@ -1432,7 +1428,8 @@ static int prepare_write_connect(struct ceph_connection *con)
1432 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, 1428 dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
1433 con->connect_seq, global_seq, proto); 1429 con->connect_seq, global_seq, proto);
1434 1430
1435 con->out_connect.features = cpu_to_le64(con->msgr->supported_features); 1431 con->out_connect.features =
1432 cpu_to_le64(from_msgr(con->msgr)->supported_features);
1436 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); 1433 con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
1437 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); 1434 con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
1438 con->out_connect.global_seq = cpu_to_le32(global_seq); 1435 con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1527,7 +1524,7 @@ static int write_partial_message_data(struct ceph_connection *con)
1527{ 1524{
1528 struct ceph_msg *msg = con->out_msg; 1525 struct ceph_msg *msg = con->out_msg;
1529 struct ceph_msg_data_cursor *cursor = &msg->cursor; 1526 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1530 bool do_datacrc = !con->msgr->nocrc; 1527 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
1531 u32 crc; 1528 u32 crc;
1532 1529
1533 dout("%s %p msg %p\n", __func__, con, msg); 1530 dout("%s %p msg %p\n", __func__, con, msg);
@@ -1552,8 +1549,8 @@ static int write_partial_message_data(struct ceph_connection *con)
1552 bool need_crc; 1549 bool need_crc;
1553 int ret; 1550 int ret;
1554 1551
1555 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, 1552 page = ceph_msg_data_next(cursor, &page_offset, &length,
1556 &last_piece); 1553 &last_piece);
1557 ret = ceph_tcp_sendpage(con->sock, page, page_offset, 1554 ret = ceph_tcp_sendpage(con->sock, page, page_offset,
1558 length, !last_piece); 1555 length, !last_piece);
1559 if (ret <= 0) { 1556 if (ret <= 0) {
@@ -1564,7 +1561,7 @@ static int write_partial_message_data(struct ceph_connection *con)
1564 } 1561 }
1565 if (do_datacrc && cursor->need_crc) 1562 if (do_datacrc && cursor->need_crc)
1566 crc = ceph_crc32c_page(crc, page, page_offset, length); 1563 crc = ceph_crc32c_page(crc, page, page_offset, length);
1567 need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret); 1564 need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
1568 } 1565 }
1569 1566
1570 dout("%s %p msg %p done\n", __func__, con, msg); 1567 dout("%s %p msg %p done\n", __func__, con, msg);
@@ -2005,8 +2002,8 @@ static int process_banner(struct ceph_connection *con)
2005 2002
2006static int process_connect(struct ceph_connection *con) 2003static int process_connect(struct ceph_connection *con)
2007{ 2004{
2008 u64 sup_feat = con->msgr->supported_features; 2005 u64 sup_feat = from_msgr(con->msgr)->supported_features;
2009 u64 req_feat = con->msgr->required_features; 2006 u64 req_feat = from_msgr(con->msgr)->required_features;
2010 u64 server_feat = ceph_sanitize_features( 2007 u64 server_feat = ceph_sanitize_features(
2011 le64_to_cpu(con->in_reply.features)); 2008 le64_to_cpu(con->in_reply.features));
2012 int ret; 2009 int ret;
@@ -2232,7 +2229,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
2232{ 2229{
2233 struct ceph_msg *msg = con->in_msg; 2230 struct ceph_msg *msg = con->in_msg;
2234 struct ceph_msg_data_cursor *cursor = &msg->cursor; 2231 struct ceph_msg_data_cursor *cursor = &msg->cursor;
2235 const bool do_datacrc = !con->msgr->nocrc; 2232 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
2236 struct page *page; 2233 struct page *page;
2237 size_t page_offset; 2234 size_t page_offset;
2238 size_t length; 2235 size_t length;
@@ -2246,8 +2243,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
2246 if (do_datacrc) 2243 if (do_datacrc)
2247 crc = con->in_data_crc; 2244 crc = con->in_data_crc;
2248 while (cursor->resid) { 2245 while (cursor->resid) {
2249 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, 2246 page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
2250 NULL);
2251 ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); 2247 ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
2252 if (ret <= 0) { 2248 if (ret <= 0) {
2253 if (do_datacrc) 2249 if (do_datacrc)
@@ -2258,7 +2254,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
2258 2254
2259 if (do_datacrc) 2255 if (do_datacrc)
2260 crc = ceph_crc32c_page(crc, page, page_offset, ret); 2256 crc = ceph_crc32c_page(crc, page, page_offset, ret);
2261 (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret); 2257 (void) ceph_msg_data_advance(cursor, (size_t)ret);
2262 } 2258 }
2263 if (do_datacrc) 2259 if (do_datacrc)
2264 con->in_data_crc = crc; 2260 con->in_data_crc = crc;
@@ -2278,7 +2274,7 @@ static int read_partial_message(struct ceph_connection *con)
2278 int end; 2274 int end;
2279 int ret; 2275 int ret;
2280 unsigned int front_len, middle_len, data_len; 2276 unsigned int front_len, middle_len, data_len;
2281 bool do_datacrc = !con->msgr->nocrc; 2277 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
2282 bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); 2278 bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
2283 u64 seq; 2279 u64 seq;
2284 u32 crc; 2280 u32 crc;
@@ -2423,7 +2419,7 @@ static int read_partial_message(struct ceph_connection *con)
2423 } 2419 }
2424 2420
2425 if (need_sign && con->ops->check_message_signature && 2421 if (need_sign && con->ops->check_message_signature &&
2426 con->ops->check_message_signature(con, m)) { 2422 con->ops->check_message_signature(m)) {
2427 pr_err("read_partial_message %p signature check failed\n", m); 2423 pr_err("read_partial_message %p signature check failed\n", m);
2428 return -EBADMSG; 2424 return -EBADMSG;
2429 } 2425 }
@@ -2438,13 +2434,10 @@ static int read_partial_message(struct ceph_connection *con)
2438 */ 2434 */
2439static void process_message(struct ceph_connection *con) 2435static void process_message(struct ceph_connection *con)
2440{ 2436{
2441 struct ceph_msg *msg; 2437 struct ceph_msg *msg = con->in_msg;
2442 2438
2443 BUG_ON(con->in_msg->con != con); 2439 BUG_ON(con->in_msg->con != con);
2444 con->in_msg->con = NULL;
2445 msg = con->in_msg;
2446 con->in_msg = NULL; 2440 con->in_msg = NULL;
2447 con->ops->put(con);
2448 2441
2449 /* if first message, set peer_name */ 2442 /* if first message, set peer_name */
2450 if (con->peer_name.type == 0) 2443 if (con->peer_name.type == 0)
@@ -2677,7 +2670,7 @@ more:
2677 if (ret <= 0) { 2670 if (ret <= 0) {
2678 switch (ret) { 2671 switch (ret) {
2679 case -EBADMSG: 2672 case -EBADMSG:
2680 con->error_msg = "bad crc"; 2673 con->error_msg = "bad crc/signature";
2681 /* fall through */ 2674 /* fall through */
2682 case -EBADE: 2675 case -EBADE:
2683 ret = -EIO; 2676 ret = -EIO;
@@ -2918,10 +2911,8 @@ static void con_fault(struct ceph_connection *con)
2918 2911
2919 if (con->in_msg) { 2912 if (con->in_msg) {
2920 BUG_ON(con->in_msg->con != con); 2913 BUG_ON(con->in_msg->con != con);
2921 con->in_msg->con = NULL;
2922 ceph_msg_put(con->in_msg); 2914 ceph_msg_put(con->in_msg);
2923 con->in_msg = NULL; 2915 con->in_msg = NULL;
2924 con->ops->put(con);
2925 } 2916 }
2926 2917
2927 /* Requeue anything that hasn't been acked */ 2918 /* Requeue anything that hasn't been acked */
@@ -2952,15 +2943,8 @@ static void con_fault(struct ceph_connection *con)
2952 * initialize a new messenger instance 2943 * initialize a new messenger instance
2953 */ 2944 */
2954void ceph_messenger_init(struct ceph_messenger *msgr, 2945void ceph_messenger_init(struct ceph_messenger *msgr,
2955 struct ceph_entity_addr *myaddr, 2946 struct ceph_entity_addr *myaddr)
2956 u64 supported_features,
2957 u64 required_features,
2958 bool nocrc,
2959 bool tcp_nodelay)
2960{ 2947{
2961 msgr->supported_features = supported_features;
2962 msgr->required_features = required_features;
2963
2964 spin_lock_init(&msgr->global_seq_lock); 2948 spin_lock_init(&msgr->global_seq_lock);
2965 2949
2966 if (myaddr) 2950 if (myaddr)
@@ -2970,8 +2954,6 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
2970 msgr->inst.addr.type = 0; 2954 msgr->inst.addr.type = 0;
2971 get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); 2955 get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
2972 encode_my_addr(msgr); 2956 encode_my_addr(msgr);
2973 msgr->nocrc = nocrc;
2974 msgr->tcp_nodelay = tcp_nodelay;
2975 2957
2976 atomic_set(&msgr->stopping, 0); 2958 atomic_set(&msgr->stopping, 0);
2977 write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); 2959 write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
@@ -2986,6 +2968,15 @@ void ceph_messenger_fini(struct ceph_messenger *msgr)
2986} 2968}
2987EXPORT_SYMBOL(ceph_messenger_fini); 2969EXPORT_SYMBOL(ceph_messenger_fini);
2988 2970
2971static void msg_con_set(struct ceph_msg *msg, struct ceph_connection *con)
2972{
2973 if (msg->con)
2974 msg->con->ops->put(msg->con);
2975
2976 msg->con = con ? con->ops->get(con) : NULL;
2977 BUG_ON(msg->con != con);
2978}
2979
2989static void clear_standby(struct ceph_connection *con) 2980static void clear_standby(struct ceph_connection *con)
2990{ 2981{
2991 /* come back from STANDBY? */ 2982 /* come back from STANDBY? */
@@ -3017,9 +3008,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
3017 return; 3008 return;
3018 } 3009 }
3019 3010
3020 BUG_ON(msg->con != NULL); 3011 msg_con_set(msg, con);
3021 msg->con = con->ops->get(con);
3022 BUG_ON(msg->con == NULL);
3023 3012
3024 BUG_ON(!list_empty(&msg->list_head)); 3013 BUG_ON(!list_empty(&msg->list_head));
3025 list_add_tail(&msg->list_head, &con->out_queue); 3014 list_add_tail(&msg->list_head, &con->out_queue);
@@ -3047,16 +3036,15 @@ void ceph_msg_revoke(struct ceph_msg *msg)
3047{ 3036{
3048 struct ceph_connection *con = msg->con; 3037 struct ceph_connection *con = msg->con;
3049 3038
3050 if (!con) 3039 if (!con) {
3040 dout("%s msg %p null con\n", __func__, msg);
3051 return; /* Message not in our possession */ 3041 return; /* Message not in our possession */
3042 }
3052 3043
3053 mutex_lock(&con->mutex); 3044 mutex_lock(&con->mutex);
3054 if (!list_empty(&msg->list_head)) { 3045 if (!list_empty(&msg->list_head)) {
3055 dout("%s %p msg %p - was on queue\n", __func__, con, msg); 3046 dout("%s %p msg %p - was on queue\n", __func__, con, msg);
3056 list_del_init(&msg->list_head); 3047 list_del_init(&msg->list_head);
3057 BUG_ON(msg->con == NULL);
3058 msg->con->ops->put(msg->con);
3059 msg->con = NULL;
3060 msg->hdr.seq = 0; 3048 msg->hdr.seq = 0;
3061 3049
3062 ceph_msg_put(msg); 3050 ceph_msg_put(msg);
@@ -3080,16 +3068,13 @@ void ceph_msg_revoke(struct ceph_msg *msg)
3080 */ 3068 */
3081void ceph_msg_revoke_incoming(struct ceph_msg *msg) 3069void ceph_msg_revoke_incoming(struct ceph_msg *msg)
3082{ 3070{
3083 struct ceph_connection *con; 3071 struct ceph_connection *con = msg->con;
3084 3072
3085 BUG_ON(msg == NULL); 3073 if (!con) {
3086 if (!msg->con) {
3087 dout("%s msg %p null con\n", __func__, msg); 3074 dout("%s msg %p null con\n", __func__, msg);
3088
3089 return; /* Message not in our possession */ 3075 return; /* Message not in our possession */
3090 } 3076 }
3091 3077
3092 con = msg->con;
3093 mutex_lock(&con->mutex); 3078 mutex_lock(&con->mutex);
3094 if (con->in_msg == msg) { 3079 if (con->in_msg == msg) {
3095 unsigned int front_len = le32_to_cpu(con->in_hdr.front_len); 3080 unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
@@ -3335,9 +3320,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
3335 } 3320 }
3336 if (msg) { 3321 if (msg) {
3337 BUG_ON(*skip); 3322 BUG_ON(*skip);
3323 msg_con_set(msg, con);
3338 con->in_msg = msg; 3324 con->in_msg = msg;
3339 con->in_msg->con = con->ops->get(con);
3340 BUG_ON(con->in_msg->con == NULL);
3341 } else { 3325 } else {
3342 /* 3326 /*
3343 * Null message pointer means either we should skip 3327 * Null message pointer means either we should skip
@@ -3384,6 +3368,8 @@ static void ceph_msg_release(struct kref *kref)
3384 dout("%s %p\n", __func__, m); 3368 dout("%s %p\n", __func__, m);
3385 WARN_ON(!list_empty(&m->list_head)); 3369 WARN_ON(!list_empty(&m->list_head));
3386 3370
3371 msg_con_set(m, NULL);
3372
3387 /* drop middle, data, if any */ 3373 /* drop middle, data, if any */
3388 if (m->middle) { 3374 if (m->middle) {
3389 ceph_buffer_put(m->middle); 3375 ceph_buffer_put(m->middle);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 80b94e37c94a..f8f235930d88 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -120,11 +120,13 @@ static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
120} 120}
121#endif /* CONFIG_BLOCK */ 121#endif /* CONFIG_BLOCK */
122 122
123#define osd_req_op_data(oreq, whch, typ, fld) \ 123#define osd_req_op_data(oreq, whch, typ, fld) \
124 ({ \ 124({ \
125 BUG_ON(whch >= (oreq)->r_num_ops); \ 125 struct ceph_osd_request *__oreq = (oreq); \
126 &(oreq)->r_ops[whch].typ.fld; \ 126 unsigned int __whch = (whch); \
127 }) 127 BUG_ON(__whch >= __oreq->r_num_ops); \
128 &__oreq->r_ops[__whch].typ.fld; \
129})
128 130
129static struct ceph_osd_data * 131static struct ceph_osd_data *
130osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which) 132osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
@@ -285,6 +287,7 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
285 switch (op->op) { 287 switch (op->op) {
286 case CEPH_OSD_OP_READ: 288 case CEPH_OSD_OP_READ:
287 case CEPH_OSD_OP_WRITE: 289 case CEPH_OSD_OP_WRITE:
290 case CEPH_OSD_OP_WRITEFULL:
288 ceph_osd_data_release(&op->extent.osd_data); 291 ceph_osd_data_release(&op->extent.osd_data);
289 break; 292 break;
290 case CEPH_OSD_OP_CALL: 293 case CEPH_OSD_OP_CALL:
@@ -485,13 +488,14 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
485 size_t payload_len = 0; 488 size_t payload_len = 0;
486 489
487 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && 490 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
488 opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE); 491 opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO &&
492 opcode != CEPH_OSD_OP_TRUNCATE);
489 493
490 op->extent.offset = offset; 494 op->extent.offset = offset;
491 op->extent.length = length; 495 op->extent.length = length;
492 op->extent.truncate_size = truncate_size; 496 op->extent.truncate_size = truncate_size;
493 op->extent.truncate_seq = truncate_seq; 497 op->extent.truncate_seq = truncate_seq;
494 if (opcode == CEPH_OSD_OP_WRITE) 498 if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
495 payload_len += length; 499 payload_len += length;
496 500
497 op->payload_len = payload_len; 501 op->payload_len = payload_len;
@@ -670,9 +674,11 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
670 break; 674 break;
671 case CEPH_OSD_OP_READ: 675 case CEPH_OSD_OP_READ:
672 case CEPH_OSD_OP_WRITE: 676 case CEPH_OSD_OP_WRITE:
677 case CEPH_OSD_OP_WRITEFULL:
673 case CEPH_OSD_OP_ZERO: 678 case CEPH_OSD_OP_ZERO:
674 case CEPH_OSD_OP_TRUNCATE: 679 case CEPH_OSD_OP_TRUNCATE:
675 if (src->op == CEPH_OSD_OP_WRITE) 680 if (src->op == CEPH_OSD_OP_WRITE ||
681 src->op == CEPH_OSD_OP_WRITEFULL)
676 request_data_len = src->extent.length; 682 request_data_len = src->extent.length;
677 dst->extent.offset = cpu_to_le64(src->extent.offset); 683 dst->extent.offset = cpu_to_le64(src->extent.offset);
678 dst->extent.length = cpu_to_le64(src->extent.length); 684 dst->extent.length = cpu_to_le64(src->extent.length);
@@ -681,7 +687,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
681 dst->extent.truncate_seq = 687 dst->extent.truncate_seq =
682 cpu_to_le32(src->extent.truncate_seq); 688 cpu_to_le32(src->extent.truncate_seq);
683 osd_data = &src->extent.osd_data; 689 osd_data = &src->extent.osd_data;
684 if (src->op == CEPH_OSD_OP_WRITE) 690 if (src->op == CEPH_OSD_OP_WRITE ||
691 src->op == CEPH_OSD_OP_WRITEFULL)
685 ceph_osdc_msg_data_add(req->r_request, osd_data); 692 ceph_osdc_msg_data_add(req->r_request, osd_data);
686 else 693 else
687 ceph_osdc_msg_data_add(req->r_reply, osd_data); 694 ceph_osdc_msg_data_add(req->r_reply, osd_data);
@@ -1745,8 +1752,7 @@ static void complete_request(struct ceph_osd_request *req)
1745 * handle osd op reply. either call the callback if it is specified, 1752 * handle osd op reply. either call the callback if it is specified,
1746 * or do the completion to wake up the waiting thread. 1753 * or do the completion to wake up the waiting thread.
1747 */ 1754 */
1748static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, 1755static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1749 struct ceph_connection *con)
1750{ 1756{
1751 void *p, *end; 1757 void *p, *end;
1752 struct ceph_osd_request *req; 1758 struct ceph_osd_request *req;
@@ -2802,7 +2808,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
2802 ceph_osdc_handle_map(osdc, msg); 2808 ceph_osdc_handle_map(osdc, msg);
2803 break; 2809 break;
2804 case CEPH_MSG_OSD_OPREPLY: 2810 case CEPH_MSG_OSD_OPREPLY:
2805 handle_reply(osdc, msg, con); 2811 handle_reply(osdc, msg);
2806 break; 2812 break;
2807 case CEPH_MSG_WATCH_NOTIFY: 2813 case CEPH_MSG_WATCH_NOTIFY:
2808 handle_watch_notify(osdc, msg); 2814 handle_watch_notify(osdc, msg);
@@ -2844,9 +2850,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2844 goto out; 2850 goto out;
2845 } 2851 }
2846 2852
2847 if (req->r_reply->con)
2848 dout("%s revoking msg %p from old con %p\n", __func__,
2849 req->r_reply, req->r_reply->con);
2850 ceph_msg_revoke_incoming(req->r_reply); 2853 ceph_msg_revoke_incoming(req->r_reply);
2851 2854
2852 if (front_len > req->r_reply->front_alloc_len) { 2855 if (front_len > req->r_reply->front_alloc_len) {
@@ -2973,17 +2976,19 @@ static int invalidate_authorizer(struct ceph_connection *con)
2973 return ceph_monc_validate_auth(&osdc->client->monc); 2976 return ceph_monc_validate_auth(&osdc->client->monc);
2974} 2977}
2975 2978
2976static int sign_message(struct ceph_connection *con, struct ceph_msg *msg) 2979static int osd_sign_message(struct ceph_msg *msg)
2977{ 2980{
2978 struct ceph_osd *o = con->private; 2981 struct ceph_osd *o = msg->con->private;
2979 struct ceph_auth_handshake *auth = &o->o_auth; 2982 struct ceph_auth_handshake *auth = &o->o_auth;
2983
2980 return ceph_auth_sign_message(auth, msg); 2984 return ceph_auth_sign_message(auth, msg);
2981} 2985}
2982 2986
2983static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg) 2987static int osd_check_message_signature(struct ceph_msg *msg)
2984{ 2988{
2985 struct ceph_osd *o = con->private; 2989 struct ceph_osd *o = msg->con->private;
2986 struct ceph_auth_handshake *auth = &o->o_auth; 2990 struct ceph_auth_handshake *auth = &o->o_auth;
2991
2987 return ceph_auth_check_message_signature(auth, msg); 2992 return ceph_auth_check_message_signature(auth, msg);
2988} 2993}
2989 2994
@@ -2995,7 +3000,7 @@ static const struct ceph_connection_operations osd_con_ops = {
2995 .verify_authorizer_reply = verify_authorizer_reply, 3000 .verify_authorizer_reply = verify_authorizer_reply,
2996 .invalidate_authorizer = invalidate_authorizer, 3001 .invalidate_authorizer = invalidate_authorizer,
2997 .alloc_msg = alloc_msg, 3002 .alloc_msg = alloc_msg,
2998 .sign_message = sign_message, 3003 .sign_message = osd_sign_message,
2999 .check_message_signature = check_message_signature, 3004 .check_message_signature = osd_check_message_signature,
3000 .fault = osd_reset, 3005 .fault = osd_reset,
3001}; 3006};
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bb6470f5b7b..ae00b894e675 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -99,6 +99,7 @@
99#include <linux/rtnetlink.h> 99#include <linux/rtnetlink.h>
100#include <linux/stat.h> 100#include <linux/stat.h>
101#include <net/dst.h> 101#include <net/dst.h>
102#include <net/dst_metadata.h>
102#include <net/pkt_sched.h> 103#include <net/pkt_sched.h>
103#include <net/checksum.h> 104#include <net/checksum.h>
104#include <net/xfrm.h> 105#include <net/xfrm.h>
@@ -682,6 +683,32 @@ int dev_get_iflink(const struct net_device *dev)
682EXPORT_SYMBOL(dev_get_iflink); 683EXPORT_SYMBOL(dev_get_iflink);
683 684
684/** 685/**
686 * dev_fill_metadata_dst - Retrieve tunnel egress information.
687 * @dev: targeted interface
688 * @skb: The packet.
689 *
690 * For better visibility of tunnel traffic OVS needs to retrieve
691 * egress tunnel information for a packet. Following API allows
692 * user to get this info.
693 */
694int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
695{
696 struct ip_tunnel_info *info;
697
698 if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
699 return -EINVAL;
700
701 info = skb_tunnel_info_unclone(skb);
702 if (!info)
703 return -ENOMEM;
704 if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
705 return -EINVAL;
706
707 return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
708}
709EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
710
711/**
685 * __dev_get_by_name - find a device by its name 712 * __dev_get_by_name - find a device by its name
686 * @net: the applicable net namespace 713 * @net: the applicable net namespace
687 * @name: name to find 714 * @name: name to find
@@ -2376,17 +2403,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
2376{ 2403{
2377 static const netdev_features_t null_features = 0; 2404 static const netdev_features_t null_features = 0;
2378 struct net_device *dev = skb->dev; 2405 struct net_device *dev = skb->dev;
2379 const char *driver = ""; 2406 const char *name = "";
2380 2407
2381 if (!net_ratelimit()) 2408 if (!net_ratelimit())
2382 return; 2409 return;
2383 2410
2384 if (dev && dev->dev.parent) 2411 if (dev) {
2385 driver = dev_driver_string(dev->dev.parent); 2412 if (dev->dev.parent)
2386 2413 name = dev_driver_string(dev->dev.parent);
2414 else
2415 name = netdev_name(dev);
2416 }
2387 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " 2417 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2388 "gso_type=%d ip_summed=%d\n", 2418 "gso_type=%d ip_summed=%d\n",
2389 driver, dev ? &dev->features : &null_features, 2419 name, dev ? &dev->features : &null_features,
2390 skb->sk ? &skb->sk->sk_route_caps : &null_features, 2420 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2391 skb->len, skb->data_len, skb_shinfo(skb)->gso_size, 2421 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2392 skb_shinfo(skb)->gso_type, skb->ip_summed); 2422 skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -2915,9 +2945,11 @@ EXPORT_SYMBOL(xmit_recursion);
2915 2945
2916/** 2946/**
2917 * dev_loopback_xmit - loop back @skb 2947 * dev_loopback_xmit - loop back @skb
2948 * @net: network namespace this loopback is happening in
2949 * @sk: sk needed to be a netfilter okfn
2918 * @skb: buffer to transmit 2950 * @skb: buffer to transmit
2919 */ 2951 */
2920int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) 2952int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
2921{ 2953{
2922 skb_reset_mac_header(skb); 2954 skb_reset_mac_header(skb);
2923 __skb_pull(skb, skb_network_offset(skb)); 2955 __skb_pull(skb, skb_network_offset(skb));
@@ -2972,6 +3004,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
2972 new_index = skb_tx_hash(dev, skb); 3004 new_index = skb_tx_hash(dev, skb);
2973 3005
2974 if (queue_index != new_index && sk && 3006 if (queue_index != new_index && sk &&
3007 sk_fullsock(sk) &&
2975 rcu_access_pointer(sk->sk_dst_cache)) 3008 rcu_access_pointer(sk->sk_dst_cache))
2976 sk_tx_queue_set(sk, new_index); 3009 sk_tx_queue_set(sk, new_index);
2977 3010
@@ -3143,11 +3176,11 @@ out:
3143 return rc; 3176 return rc;
3144} 3177}
3145 3178
3146int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb) 3179int dev_queue_xmit(struct sk_buff *skb)
3147{ 3180{
3148 return __dev_queue_xmit(skb, NULL); 3181 return __dev_queue_xmit(skb, NULL);
3149} 3182}
3150EXPORT_SYMBOL(dev_queue_xmit_sk); 3183EXPORT_SYMBOL(dev_queue_xmit);
3151 3184
3152int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) 3185int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
3153{ 3186{
@@ -3668,6 +3701,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3668 case TC_ACT_QUEUED: 3701 case TC_ACT_QUEUED:
3669 kfree_skb(skb); 3702 kfree_skb(skb);
3670 return NULL; 3703 return NULL;
3704 case TC_ACT_REDIRECT:
3705 /* skb_mac_header check was done by cls/act_bpf, so
3706 * we can safely push the L2 header back before
3707 * redirecting to another netdev
3708 */
3709 __skb_push(skb, skb->mac_len);
3710 skb_do_redirect(skb);
3711 return NULL;
3671 default: 3712 default:
3672 break; 3713 break;
3673 } 3714 }
@@ -3982,13 +4023,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
3982 * NET_RX_SUCCESS: no congestion 4023 * NET_RX_SUCCESS: no congestion
3983 * NET_RX_DROP: packet was dropped 4024 * NET_RX_DROP: packet was dropped
3984 */ 4025 */
3985int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb) 4026int netif_receive_skb(struct sk_buff *skb)
3986{ 4027{
3987 trace_netif_receive_skb_entry(skb); 4028 trace_netif_receive_skb_entry(skb);
3988 4029
3989 return netif_receive_skb_internal(skb); 4030 return netif_receive_skb_internal(skb);
3990} 4031}
3991EXPORT_SYMBOL(netif_receive_skb_sk); 4032EXPORT_SYMBOL(netif_receive_skb);
3992 4033
3993/* Network device is going away, flush any packets still pending 4034/* Network device is going away, flush any packets still pending
3994 * Called with irqs disabled. 4035 * Called with irqs disabled.
@@ -4857,8 +4898,7 @@ struct netdev_adjacent {
4857 struct rcu_head rcu; 4898 struct rcu_head rcu;
4858}; 4899};
4859 4900
4860static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, 4901static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
4861 struct net_device *adj_dev,
4862 struct list_head *adj_list) 4902 struct list_head *adj_list)
4863{ 4903{
4864 struct netdev_adjacent *adj; 4904 struct netdev_adjacent *adj;
@@ -4884,7 +4924,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
4884{ 4924{
4885 ASSERT_RTNL(); 4925 ASSERT_RTNL();
4886 4926
4887 return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper); 4927 return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
4888} 4928}
4889EXPORT_SYMBOL(netdev_has_upper_dev); 4929EXPORT_SYMBOL(netdev_has_upper_dev);
4890 4930
@@ -5146,7 +5186,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
5146 struct netdev_adjacent *adj; 5186 struct netdev_adjacent *adj;
5147 int ret; 5187 int ret;
5148 5188
5149 adj = __netdev_find_adj(dev, adj_dev, dev_list); 5189 adj = __netdev_find_adj(adj_dev, dev_list);
5150 5190
5151 if (adj) { 5191 if (adj) {
5152 adj->ref_nr++; 5192 adj->ref_nr++;
@@ -5202,7 +5242,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
5202{ 5242{
5203 struct netdev_adjacent *adj; 5243 struct netdev_adjacent *adj;
5204 5244
5205 adj = __netdev_find_adj(dev, adj_dev, dev_list); 5245 adj = __netdev_find_adj(adj_dev, dev_list);
5206 5246
5207 if (!adj) { 5247 if (!adj) {
5208 pr_err("tried to remove device %s from %s\n", 5248 pr_err("tried to remove device %s from %s\n",
@@ -5323,10 +5363,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5323 return -EBUSY; 5363 return -EBUSY;
5324 5364
5325 /* To prevent loops, check if dev is not upper device to upper_dev. */ 5365 /* To prevent loops, check if dev is not upper device to upper_dev. */
5326 if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) 5366 if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
5327 return -EBUSY; 5367 return -EBUSY;
5328 5368
5329 if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper)) 5369 if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
5330 return -EEXIST; 5370 return -EEXIST;
5331 5371
5332 if (master && netdev_master_upper_dev_get(dev)) 5372 if (master && netdev_master_upper_dev_get(dev))
@@ -5336,6 +5376,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
5336 changeupper_info.master = master; 5376 changeupper_info.master = master;
5337 changeupper_info.linking = true; 5377 changeupper_info.linking = true;
5338 5378
5379 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5380 &changeupper_info.info);
5381 ret = notifier_to_errno(ret);
5382 if (ret)
5383 return ret;
5384
5339 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, 5385 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
5340 master); 5386 master);
5341 if (ret) 5387 if (ret)
@@ -5478,6 +5524,9 @@ void netdev_upper_dev_unlink(struct net_device *dev,
5478 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; 5524 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
5479 changeupper_info.linking = false; 5525 changeupper_info.linking = false;
5480 5526
5527 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5528 &changeupper_info.info);
5529
5481 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); 5530 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5482 5531
5483 /* Here is the tricky part. We must remove all dev's lower 5532 /* Here is the tricky part. We must remove all dev's lower
@@ -5604,7 +5653,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
5604 5653
5605 if (!lower_dev) 5654 if (!lower_dev)
5606 return NULL; 5655 return NULL;
5607 lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); 5656 lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
5608 if (!lower) 5657 if (!lower)
5609 return NULL; 5658 return NULL;
5610 5659
@@ -6242,6 +6291,48 @@ static void rollback_registered(struct net_device *dev)
6242 list_del(&single); 6291 list_del(&single);
6243} 6292}
6244 6293
6294static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
6295 struct net_device *upper, netdev_features_t features)
6296{
6297 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
6298 netdev_features_t feature;
6299 int feature_bit;
6300
6301 for_each_netdev_feature(&upper_disables, feature_bit) {
6302 feature = __NETIF_F_BIT(feature_bit);
6303 if (!(upper->wanted_features & feature)
6304 && (features & feature)) {
6305 netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
6306 &feature, upper->name);
6307 features &= ~feature;
6308 }
6309 }
6310
6311 return features;
6312}
6313
6314static void netdev_sync_lower_features(struct net_device *upper,
6315 struct net_device *lower, netdev_features_t features)
6316{
6317 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
6318 netdev_features_t feature;
6319 int feature_bit;
6320
6321 for_each_netdev_feature(&upper_disables, feature_bit) {
6322 feature = __NETIF_F_BIT(feature_bit);
6323 if (!(features & feature) && (lower->features & feature)) {
6324 netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
6325 &feature, lower->name);
6326 lower->wanted_features &= ~feature;
6327 netdev_update_features(lower);
6328
6329 if (unlikely(lower->features & feature))
6330 netdev_WARN(upper, "failed to disable %pNF on %s!\n",
6331 &feature, lower->name);
6332 }
6333 }
6334}
6335
6245static netdev_features_t netdev_fix_features(struct net_device *dev, 6336static netdev_features_t netdev_fix_features(struct net_device *dev,
6246 netdev_features_t features) 6337 netdev_features_t features)
6247{ 6338{
@@ -6311,8 +6402,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
6311 6402
6312int __netdev_update_features(struct net_device *dev) 6403int __netdev_update_features(struct net_device *dev)
6313{ 6404{
6405 struct net_device *upper, *lower;
6314 netdev_features_t features; 6406 netdev_features_t features;
6315 int err = 0; 6407 struct list_head *iter;
6408 int err = -1;
6316 6409
6317 ASSERT_RTNL(); 6410 ASSERT_RTNL();
6318 6411
@@ -6324,26 +6417,42 @@ int __netdev_update_features(struct net_device *dev)
6324 /* driver might be less strict about feature dependencies */ 6417 /* driver might be less strict about feature dependencies */
6325 features = netdev_fix_features(dev, features); 6418 features = netdev_fix_features(dev, features);
6326 6419
6420 /* some features can't be enabled if they're off an an upper device */
6421 netdev_for_each_upper_dev_rcu(dev, upper, iter)
6422 features = netdev_sync_upper_features(dev, upper, features);
6423
6327 if (dev->features == features) 6424 if (dev->features == features)
6328 return 0; 6425 goto sync_lower;
6329 6426
6330 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", 6427 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
6331 &dev->features, &features); 6428 &dev->features, &features);
6332 6429
6333 if (dev->netdev_ops->ndo_set_features) 6430 if (dev->netdev_ops->ndo_set_features)
6334 err = dev->netdev_ops->ndo_set_features(dev, features); 6431 err = dev->netdev_ops->ndo_set_features(dev, features);
6432 else
6433 err = 0;
6335 6434
6336 if (unlikely(err < 0)) { 6435 if (unlikely(err < 0)) {
6337 netdev_err(dev, 6436 netdev_err(dev,
6338 "set_features() failed (%d); wanted %pNF, left %pNF\n", 6437 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6339 err, &features, &dev->features); 6438 err, &features, &dev->features);
6439 /* return non-0 since some features might have changed and
6440 * it's better to fire a spurious notification than miss it
6441 */
6340 return -1; 6442 return -1;
6341 } 6443 }
6342 6444
6445sync_lower:
6446 /* some features must be disabled on lower devices when disabled
6447 * on an upper device (think: bonding master or bridge)
6448 */
6449 netdev_for_each_lower_dev(dev, lower, iter)
6450 netdev_sync_lower_features(dev, lower, features);
6451
6343 if (!err) 6452 if (!err)
6344 dev->features = features; 6453 dev->features = features;
6345 6454
6346 return 1; 6455 return err < 0 ? 0 : 1;
6347} 6456}
6348 6457
6349/** 6458/**
diff --git a/net/core/dst.c b/net/core/dst.c
index 0771c8cb9307..e6dc77252fe9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -144,12 +144,12 @@ loop:
144 mutex_unlock(&dst_gc_mutex); 144 mutex_unlock(&dst_gc_mutex);
145} 145}
146 146
147int dst_discard_sk(struct sock *sk, struct sk_buff *skb) 147int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
148{ 148{
149 kfree_skb(skb); 149 kfree_skb(skb);
150 return 0; 150 return 0;
151} 151}
152EXPORT_SYMBOL(dst_discard_sk); 152EXPORT_SYMBOL(dst_discard_out);
153 153
154const u32 dst_default_metrics[RTAX_MAX + 1] = { 154const u32 dst_default_metrics[RTAX_MAX + 1] = {
155 /* This initializer is needed to force linker to place this variable 155 /* This initializer is needed to force linker to place this variable
@@ -177,7 +177,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
177 dst->xfrm = NULL; 177 dst->xfrm = NULL;
178#endif 178#endif
179 dst->input = dst_discard; 179 dst->input = dst_discard;
180 dst->output = dst_discard_sk; 180 dst->output = dst_discard_out;
181 dst->error = 0; 181 dst->error = 0;
182 dst->obsolete = initial_obsolete; 182 dst->obsolete = initial_obsolete;
183 dst->header_len = 0; 183 dst->header_len = 0;
@@ -224,7 +224,7 @@ static void ___dst_free(struct dst_entry *dst)
224 */ 224 */
225 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { 225 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
226 dst->input = dst_discard; 226 dst->input = dst_discard;
227 dst->output = dst_discard_sk; 227 dst->output = dst_discard_out;
228 } 228 }
229 dst->obsolete = DST_OBSOLETE_DEAD; 229 dst->obsolete = DST_OBSOLETE_DEAD;
230} 230}
@@ -306,7 +306,7 @@ void dst_release(struct dst_entry *dst)
306 if (unlikely(newrefcnt < 0)) 306 if (unlikely(newrefcnt < 0))
307 net_warn_ratelimited("%s: dst:%p refcnt:%d\n", 307 net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
308 __func__, dst, newrefcnt); 308 __func__, dst, newrefcnt);
309 if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) 309 if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
310 call_rcu(&dst->rcu_head, dst_destroy_rcu); 310 call_rcu(&dst->rcu_head, dst_destroy_rcu);
311 } 311 }
312} 312}
@@ -352,7 +352,7 @@ static struct dst_ops md_dst_ops = {
352 .family = AF_UNSPEC, 352 .family = AF_UNSPEC,
353}; 353};
354 354
355static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb) 355static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
356{ 356{
357 WARN_ONCE(1, "Attempting to call output on metadata dst\n"); 357 WARN_ONCE(1, "Attempting to call output on metadata dst\n");
358 kfree_skb(skb); 358 kfree_skb(skb);
@@ -375,7 +375,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
375 DST_METADATA | DST_NOCACHE | DST_NOCOUNT); 375 DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
376 376
377 dst->input = dst_md_discard; 377 dst->input = dst_md_discard;
378 dst->output = dst_md_discard_sk; 378 dst->output = dst_md_discard_out;
379 379
380 memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); 380 memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
381} 381}
@@ -430,7 +430,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
430 430
431 if (!unregister) { 431 if (!unregister) {
432 dst->input = dst_discard; 432 dst->input = dst_discard;
433 dst->output = dst_discard_sk; 433 dst->output = dst_discard_out;
434 } else { 434 } else {
435 dst->dev = dev_net(dst->dev)->loopback_dev; 435 dst->dev = dev_net(dst->dev)->loopback_dev;
436 dev_hold(dst->dev); 436 dev_hold(dst->dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b495ab1797fa..29edf74846fc 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1284,7 +1284,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
1284 1284
1285 gstrings.len = ret; 1285 gstrings.len = ret;
1286 1286
1287 data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); 1287 data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER);
1288 if (!data) 1288 if (!data)
1289 return -ENOMEM; 1289 return -ENOMEM;
1290 1290
diff --git a/net/core/filter.c b/net/core/filter.c
index 05a04ea87172..672eefbfbe99 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,16 +49,17 @@
49#include <net/sch_generic.h> 49#include <net/sch_generic.h>
50#include <net/cls_cgroup.h> 50#include <net/cls_cgroup.h>
51#include <net/dst_metadata.h> 51#include <net/dst_metadata.h>
52#include <net/dst.h>
52 53
53/** 54/**
54 * sk_filter - run a packet through a socket filter 55 * sk_filter - run a packet through a socket filter
55 * @sk: sock associated with &sk_buff 56 * @sk: sock associated with &sk_buff
56 * @skb: buffer to filter 57 * @skb: buffer to filter
57 * 58 *
58 * Run the filter code and then cut skb->data to correct size returned by 59 * Run the eBPF program and then cut skb->data to correct size returned by
59 * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller 60 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
60 * than pkt_len we keep whole skb->data. This is the socket level 61 * than pkt_len we keep whole skb->data. This is the socket level
61 * wrapper to SK_RUN_FILTER. It returns 0 if the packet should 62 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
62 * be accepted or -EPERM if the packet should be tossed. 63 * be accepted or -EPERM if the packet should be tossed.
63 * 64 *
64 */ 65 */
@@ -82,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
82 rcu_read_lock(); 83 rcu_read_lock();
83 filter = rcu_dereference(sk->sk_filter); 84 filter = rcu_dereference(sk->sk_filter);
84 if (filter) { 85 if (filter) {
85 unsigned int pkt_len = SK_RUN_FILTER(filter, skb); 86 unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
86 87
87 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 88 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
88 } 89 }
@@ -148,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
148 return raw_smp_processor_id(); 149 return raw_smp_processor_id();
149} 150}
150 151
151/* note that this only generates 32-bit random numbers */
152static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
153{
154 return prandom_u32();
155}
156
157static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, 152static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
158 struct bpf_insn *insn_buf) 153 struct bpf_insn *insn_buf)
159{ 154{
@@ -312,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
312 *insn = BPF_EMIT_CALL(__get_raw_cpu_id); 307 *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
313 break; 308 break;
314 case SKF_AD_OFF + SKF_AD_RANDOM: 309 case SKF_AD_OFF + SKF_AD_RANDOM:
315 *insn = BPF_EMIT_CALL(__get_random_u32); 310 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
311 bpf_user_rnd_init_once();
316 break; 312 break;
317 } 313 }
318 break; 314 break;
@@ -1001,7 +997,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1001 int err; 997 int err;
1002 998
1003 fp->bpf_func = NULL; 999 fp->bpf_func = NULL;
1004 fp->jited = false; 1000 fp->jited = 0;
1005 1001
1006 err = bpf_check_classic(fp->insns, fp->len); 1002 err = bpf_check_classic(fp->insns, fp->len);
1007 if (err) { 1003 if (err) {
@@ -1083,16 +1079,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create);
1083 * @pfp: the unattached filter that is created 1079 * @pfp: the unattached filter that is created
1084 * @fprog: the filter program 1080 * @fprog: the filter program
1085 * @trans: post-classic verifier transformation handler 1081 * @trans: post-classic verifier transformation handler
1082 * @save_orig: save classic BPF program
1086 * 1083 *
1087 * This function effectively does the same as bpf_prog_create(), only 1084 * This function effectively does the same as bpf_prog_create(), only
1088 * that it builds up its insns buffer from user space provided buffer. 1085 * that it builds up its insns buffer from user space provided buffer.
1089 * It also allows for passing a bpf_aux_classic_check_t handler. 1086 * It also allows for passing a bpf_aux_classic_check_t handler.
1090 */ 1087 */
1091int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, 1088int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1092 bpf_aux_classic_check_t trans) 1089 bpf_aux_classic_check_t trans, bool save_orig)
1093{ 1090{
1094 unsigned int fsize = bpf_classic_proglen(fprog); 1091 unsigned int fsize = bpf_classic_proglen(fprog);
1095 struct bpf_prog *fp; 1092 struct bpf_prog *fp;
1093 int err;
1096 1094
1097 /* Make sure new filter is there and in the right amounts. */ 1095 /* Make sure new filter is there and in the right amounts. */
1098 if (fprog->filter == NULL) 1096 if (fprog->filter == NULL)
@@ -1108,12 +1106,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1108 } 1106 }
1109 1107
1110 fp->len = fprog->len; 1108 fp->len = fprog->len;
1111 /* Since unattached filters are not copied back to user
1112 * space through sk_get_filter(), we do not need to hold
1113 * a copy here, and can spare us the work.
1114 */
1115 fp->orig_prog = NULL; 1109 fp->orig_prog = NULL;
1116 1110
1111 if (save_orig) {
1112 err = bpf_prog_store_orig_filter(fp, fprog);
1113 if (err) {
1114 __bpf_prog_free(fp);
1115 return -ENOMEM;
1116 }
1117 }
1118
1117 /* bpf_prepare_filter() already takes care of freeing 1119 /* bpf_prepare_filter() already takes care of freeing
1118 * memory in case something goes wrong. 1120 * memory in case something goes wrong.
1119 */ 1121 */
@@ -1404,9 +1406,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1404 if (unlikely(!dev)) 1406 if (unlikely(!dev))
1405 return -EINVAL; 1407 return -EINVAL;
1406 1408
1407 if (unlikely(!(dev->flags & IFF_UP)))
1408 return -EINVAL;
1409
1410 skb2 = skb_clone(skb, GFP_ATOMIC); 1409 skb2 = skb_clone(skb, GFP_ATOMIC);
1411 if (unlikely(!skb2)) 1410 if (unlikely(!skb2))
1412 return -ENOMEM; 1411 return -ENOMEM;
@@ -1415,6 +1414,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1415 return dev_forward_skb(dev, skb2); 1414 return dev_forward_skb(dev, skb2);
1416 1415
1417 skb2->dev = dev; 1416 skb2->dev = dev;
1417 skb_sender_cpu_clear(skb2);
1418 return dev_queue_xmit(skb2); 1418 return dev_queue_xmit(skb2);
1419} 1419}
1420 1420
@@ -1427,6 +1427,49 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
1427 .arg3_type = ARG_ANYTHING, 1427 .arg3_type = ARG_ANYTHING,
1428}; 1428};
1429 1429
1430struct redirect_info {
1431 u32 ifindex;
1432 u32 flags;
1433};
1434
1435static DEFINE_PER_CPU(struct redirect_info, redirect_info);
1436static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
1437{
1438 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
1439
1440 ri->ifindex = ifindex;
1441 ri->flags = flags;
1442 return TC_ACT_REDIRECT;
1443}
1444
1445int skb_do_redirect(struct sk_buff *skb)
1446{
1447 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
1448 struct net_device *dev;
1449
1450 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
1451 ri->ifindex = 0;
1452 if (unlikely(!dev)) {
1453 kfree_skb(skb);
1454 return -EINVAL;
1455 }
1456
1457 if (BPF_IS_REDIRECT_INGRESS(ri->flags))
1458 return dev_forward_skb(dev, skb);
1459
1460 skb->dev = dev;
1461 skb_sender_cpu_clear(skb);
1462 return dev_queue_xmit(skb);
1463}
1464
1465const struct bpf_func_proto bpf_redirect_proto = {
1466 .func = bpf_redirect,
1467 .gpl_only = false,
1468 .ret_type = RET_INTEGER,
1469 .arg1_type = ARG_ANYTHING,
1470 .arg2_type = ARG_ANYTHING,
1471};
1472
1430static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 1473static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1431{ 1474{
1432 return task_get_classid((struct sk_buff *) (unsigned long) r1); 1475 return task_get_classid((struct sk_buff *) (unsigned long) r1);
@@ -1439,6 +1482,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
1439 .arg1_type = ARG_PTR_TO_CTX, 1482 .arg1_type = ARG_PTR_TO_CTX,
1440}; 1483};
1441 1484
1485static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1486{
1487#ifdef CONFIG_IP_ROUTE_CLASSID
1488 const struct dst_entry *dst;
1489
1490 dst = skb_dst((struct sk_buff *) (unsigned long) r1);
1491 if (dst)
1492 return dst->tclassid;
1493#endif
1494 return 0;
1495}
1496
1497static const struct bpf_func_proto bpf_get_route_realm_proto = {
1498 .func = bpf_get_route_realm,
1499 .gpl_only = false,
1500 .ret_type = RET_INTEGER,
1501 .arg1_type = ARG_PTR_TO_CTX,
1502};
1503
1442static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) 1504static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
1443{ 1505{
1444 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1506 struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1579,7 +1641,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
1579 case BPF_FUNC_ktime_get_ns: 1641 case BPF_FUNC_ktime_get_ns:
1580 return &bpf_ktime_get_ns_proto; 1642 return &bpf_ktime_get_ns_proto;
1581 case BPF_FUNC_trace_printk: 1643 case BPF_FUNC_trace_printk:
1582 return bpf_get_trace_printk_proto(); 1644 if (capable(CAP_SYS_ADMIN))
1645 return bpf_get_trace_printk_proto();
1583 default: 1646 default:
1584 return NULL; 1647 return NULL;
1585 } 1648 }
@@ -1607,6 +1670,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
1607 return &bpf_skb_get_tunnel_key_proto; 1670 return &bpf_skb_get_tunnel_key_proto;
1608 case BPF_FUNC_skb_set_tunnel_key: 1671 case BPF_FUNC_skb_set_tunnel_key:
1609 return bpf_get_skb_set_tunnel_key_proto(); 1672 return bpf_get_skb_set_tunnel_key_proto();
1673 case BPF_FUNC_redirect:
1674 return &bpf_redirect_proto;
1675 case BPF_FUNC_get_route_realm:
1676 return &bpf_get_route_realm_proto;
1610 default: 1677 default:
1611 return sk_filter_func_proto(func_id); 1678 return sk_filter_func_proto(func_id);
1612 } 1679 }
@@ -1632,6 +1699,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
1632static bool sk_filter_is_valid_access(int off, int size, 1699static bool sk_filter_is_valid_access(int off, int size,
1633 enum bpf_access_type type) 1700 enum bpf_access_type type)
1634{ 1701{
1702 if (off == offsetof(struct __sk_buff, tc_classid))
1703 return false;
1704
1635 if (type == BPF_WRITE) { 1705 if (type == BPF_WRITE) {
1636 switch (off) { 1706 switch (off) {
1637 case offsetof(struct __sk_buff, cb[0]) ... 1707 case offsetof(struct __sk_buff, cb[0]) ...
@@ -1648,10 +1718,14 @@ static bool sk_filter_is_valid_access(int off, int size,
1648static bool tc_cls_act_is_valid_access(int off, int size, 1718static bool tc_cls_act_is_valid_access(int off, int size,
1649 enum bpf_access_type type) 1719 enum bpf_access_type type)
1650{ 1720{
1721 if (off == offsetof(struct __sk_buff, tc_classid))
1722 return type == BPF_WRITE ? true : false;
1723
1651 if (type == BPF_WRITE) { 1724 if (type == BPF_WRITE) {
1652 switch (off) { 1725 switch (off) {
1653 case offsetof(struct __sk_buff, mark): 1726 case offsetof(struct __sk_buff, mark):
1654 case offsetof(struct __sk_buff, tc_index): 1727 case offsetof(struct __sk_buff, tc_index):
1728 case offsetof(struct __sk_buff, priority):
1655 case offsetof(struct __sk_buff, cb[0]) ... 1729 case offsetof(struct __sk_buff, cb[0]) ...
1656 offsetof(struct __sk_buff, cb[4]): 1730 offsetof(struct __sk_buff, cb[4]):
1657 break; 1731 break;
@@ -1664,7 +1738,8 @@ static bool tc_cls_act_is_valid_access(int off, int size,
1664 1738
1665static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, 1739static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
1666 int src_reg, int ctx_off, 1740 int src_reg, int ctx_off,
1667 struct bpf_insn *insn_buf) 1741 struct bpf_insn *insn_buf,
1742 struct bpf_prog *prog)
1668{ 1743{
1669 struct bpf_insn *insn = insn_buf; 1744 struct bpf_insn *insn = insn_buf;
1670 1745
@@ -1693,8 +1768,12 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
1693 case offsetof(struct __sk_buff, priority): 1768 case offsetof(struct __sk_buff, priority):
1694 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); 1769 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
1695 1770
1696 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, 1771 if (type == BPF_WRITE)
1697 offsetof(struct sk_buff, priority)); 1772 *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
1773 offsetof(struct sk_buff, priority));
1774 else
1775 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
1776 offsetof(struct sk_buff, priority));
1698 break; 1777 break;
1699 1778
1700 case offsetof(struct __sk_buff, ingress_ifindex): 1779 case offsetof(struct __sk_buff, ingress_ifindex):
@@ -1751,6 +1830,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
1751 offsetof(struct __sk_buff, cb[4]): 1830 offsetof(struct __sk_buff, cb[4]):
1752 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); 1831 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
1753 1832
1833 prog->cb_access = 1;
1754 ctx_off -= offsetof(struct __sk_buff, cb[0]); 1834 ctx_off -= offsetof(struct __sk_buff, cb[0]);
1755 ctx_off += offsetof(struct sk_buff, cb); 1835 ctx_off += offsetof(struct sk_buff, cb);
1756 ctx_off += offsetof(struct qdisc_skb_cb, data); 1836 ctx_off += offsetof(struct qdisc_skb_cb, data);
@@ -1760,6 +1840,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
1760 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); 1840 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
1761 break; 1841 break;
1762 1842
1843 case offsetof(struct __sk_buff, tc_classid):
1844 ctx_off -= offsetof(struct __sk_buff, tc_classid);
1845 ctx_off += offsetof(struct sk_buff, cb);
1846 ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
1847 WARN_ON(type != BPF_WRITE);
1848 *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
1849 break;
1850
1763 case offsetof(struct __sk_buff, tc_index): 1851 case offsetof(struct __sk_buff, tc_index):
1764#ifdef CONFIG_NET_SCHED 1852#ifdef CONFIG_NET_SCHED
1765 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); 1853 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
@@ -1854,9 +1942,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1854 goto out; 1942 goto out;
1855 1943
1856 /* We're copying the filter that has been originally attached, 1944 /* We're copying the filter that has been originally attached,
1857 * so no conversion/decode needed anymore. 1945 * so no conversion/decode needed anymore. eBPF programs that
1946 * have no original program cannot be dumped through this.
1858 */ 1947 */
1948 ret = -EACCES;
1859 fprog = filter->prog->orig_prog; 1949 fprog = filter->prog->orig_prog;
1950 if (!fprog)
1951 goto out;
1860 1952
1861 ret = fprog->len; 1953 ret = fprog->len;
1862 if (!len) 1954 if (!len)
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index dfb1a9ca0835..299cfc24d888 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -180,7 +180,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
180} 180}
181EXPORT_SYMBOL(lwtunnel_cmp_encap); 181EXPORT_SYMBOL(lwtunnel_cmp_encap);
182 182
183int lwtunnel_output(struct sock *sk, struct sk_buff *skb) 183int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
184{ 184{
185 struct dst_entry *dst = skb_dst(skb); 185 struct dst_entry *dst = skb_dst(skb);
186 const struct lwtunnel_encap_ops *ops; 186 const struct lwtunnel_encap_ops *ops;
@@ -199,7 +199,7 @@ int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
199 rcu_read_lock(); 199 rcu_read_lock();
200 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 200 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
201 if (likely(ops && ops->output)) 201 if (likely(ops && ops->output))
202 ret = ops->output(sk, skb); 202 ret = ops->output(net, sk, skb);
203 rcu_read_unlock(); 203 rcu_read_unlock();
204 204
205 if (ret == -EOPNOTSUPP) 205 if (ret == -EOPNOTSUPP)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2b515ba7e94f..e6af42da28d9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh)
857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
858 /* keep skb alive even if arp_queue overflows */ 858 /* keep skb alive even if arp_queue overflows */
859 if (skb) 859 if (skb)
860 skb = skb_copy(skb, GFP_ATOMIC); 860 skb = skb_clone(skb, GFP_ATOMIC);
861 write_unlock(&neigh->lock); 861 write_unlock(&neigh->lock);
862 neigh->ops->solicit(neigh, skb); 862 neigh->ops->solicit(neigh, skb);
863 atomic_inc(&neigh->probes); 863 atomic_inc(&neigh->probes);
@@ -2235,14 +2235,53 @@ static void neigh_update_notify(struct neighbour *neigh)
2235 __neigh_notify(neigh, RTM_NEWNEIGH, 0); 2235 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2236} 2236}
2237 2237
2238static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2239{
2240 struct net_device *master;
2241
2242 if (!master_idx)
2243 return false;
2244
2245 master = netdev_master_upper_dev_get(dev);
2246 if (!master || master->ifindex != master_idx)
2247 return true;
2248
2249 return false;
2250}
2251
2252static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2253{
2254 if (filter_idx && dev->ifindex != filter_idx)
2255 return true;
2256
2257 return false;
2258}
2259
2238static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2260static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2239 struct netlink_callback *cb) 2261 struct netlink_callback *cb)
2240{ 2262{
2241 struct net *net = sock_net(skb->sk); 2263 struct net *net = sock_net(skb->sk);
2264 const struct nlmsghdr *nlh = cb->nlh;
2265 struct nlattr *tb[NDA_MAX + 1];
2242 struct neighbour *n; 2266 struct neighbour *n;
2243 int rc, h, s_h = cb->args[1]; 2267 int rc, h, s_h = cb->args[1];
2244 int idx, s_idx = idx = cb->args[2]; 2268 int idx, s_idx = idx = cb->args[2];
2245 struct neigh_hash_table *nht; 2269 struct neigh_hash_table *nht;
2270 int filter_master_idx = 0, filter_idx = 0;
2271 unsigned int flags = NLM_F_MULTI;
2272 int err;
2273
2274 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
2275 if (!err) {
2276 if (tb[NDA_IFINDEX])
2277 filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2278
2279 if (tb[NDA_MASTER])
2280 filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2281
2282 if (filter_idx || filter_master_idx)
2283 flags |= NLM_F_DUMP_FILTERED;
2284 }
2246 2285
2247 rcu_read_lock_bh(); 2286 rcu_read_lock_bh();
2248 nht = rcu_dereference_bh(tbl->nht); 2287 nht = rcu_dereference_bh(tbl->nht);
@@ -2255,12 +2294,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2255 n = rcu_dereference_bh(n->next)) { 2294 n = rcu_dereference_bh(n->next)) {
2256 if (!net_eq(dev_net(n->dev), net)) 2295 if (!net_eq(dev_net(n->dev), net))
2257 continue; 2296 continue;
2297 if (neigh_ifindex_filtered(n->dev, filter_idx))
2298 continue;
2299 if (neigh_master_filtered(n->dev, filter_master_idx))
2300 continue;
2258 if (idx < s_idx) 2301 if (idx < s_idx)
2259 goto next; 2302 goto next;
2260 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2303 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2261 cb->nlh->nlmsg_seq, 2304 cb->nlh->nlmsg_seq,
2262 RTM_NEWNEIGH, 2305 RTM_NEWNEIGH,
2263 NLM_F_MULTI) < 0) { 2306 flags) < 0) {
2264 rc = -1; 2307 rc = -1;
2265 goto out; 2308 goto out;
2266 } 2309 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 830f8a7c1cb1..f88a62ab019d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -471,7 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
471 471
472 if (dev_isalive(netdev)) { 472 if (dev_isalive(netdev)) {
473 struct switchdev_attr attr = { 473 struct switchdev_attr attr = {
474 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 474 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
475 .flags = SWITCHDEV_F_NO_RECURSE, 475 .flags = SWITCHDEV_F_NO_RECURSE,
476 }; 476 };
477 477
@@ -1003,15 +1003,12 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
1003} 1003}
1004 1004
1005#ifdef CONFIG_XPS 1005#ifdef CONFIG_XPS
1006static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) 1006static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
1007{ 1007{
1008 struct net_device *dev = queue->dev; 1008 struct net_device *dev = queue->dev;
1009 int i; 1009 unsigned int i;
1010
1011 for (i = 0; i < dev->num_tx_queues; i++)
1012 if (queue == &dev->_tx[i])
1013 break;
1014 1010
1011 i = queue - dev->_tx;
1015 BUG_ON(i >= dev->num_tx_queues); 1012 BUG_ON(i >= dev->num_tx_queues);
1016 1013
1017 return i; 1014 return i;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8bdada242a7d..94acfc89ad97 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -140,7 +140,7 @@ static void queue_process(struct work_struct *work)
140 * case. Further, we test the poll_owner to avoid recursion on UP 140 * case. Further, we test the poll_owner to avoid recursion on UP
141 * systems where the lock doesn't exist. 141 * systems where the lock doesn't exist.
142 */ 142 */
143static int poll_one_napi(struct napi_struct *napi, int budget) 143static void poll_one_napi(struct napi_struct *napi)
144{ 144{
145 int work = 0; 145 int work = 0;
146 146
@@ -149,33 +149,33 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
149 * holding the napi->poll_lock. 149 * holding the napi->poll_lock.
150 */ 150 */
151 if (!test_bit(NAPI_STATE_SCHED, &napi->state)) 151 if (!test_bit(NAPI_STATE_SCHED, &napi->state))
152 return budget; 152 return;
153 153
154 /* If we set this bit but see that it has already been set, 154 /* If we set this bit but see that it has already been set,
155 * that indicates that napi has been disabled and we need 155 * that indicates that napi has been disabled and we need
156 * to abort this operation 156 * to abort this operation
157 */ 157 */
158 if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state)) 158 if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
159 goto out; 159 return;
160 160
161 work = napi->poll(napi, budget); 161 /* We explicilty pass the polling call a budget of 0 to
162 WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll); 162 * indicate that we are clearing the Tx path only.
163 */
164 work = napi->poll(napi, 0);
165 WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
163 trace_napi_poll(napi); 166 trace_napi_poll(napi);
164 167
165 clear_bit(NAPI_STATE_NPSVC, &napi->state); 168 clear_bit(NAPI_STATE_NPSVC, &napi->state);
166
167out:
168 return budget - work;
169} 169}
170 170
171static void poll_napi(struct net_device *dev, int budget) 171static void poll_napi(struct net_device *dev)
172{ 172{
173 struct napi_struct *napi; 173 struct napi_struct *napi;
174 174
175 list_for_each_entry(napi, &dev->napi_list, dev_list) { 175 list_for_each_entry(napi, &dev->napi_list, dev_list) {
176 if (napi->poll_owner != smp_processor_id() && 176 if (napi->poll_owner != smp_processor_id() &&
177 spin_trylock(&napi->poll_lock)) { 177 spin_trylock(&napi->poll_lock)) {
178 budget = poll_one_napi(napi, budget); 178 poll_one_napi(napi);
179 spin_unlock(&napi->poll_lock); 179 spin_unlock(&napi->poll_lock);
180 } 180 }
181 } 181 }
@@ -185,7 +185,6 @@ static void netpoll_poll_dev(struct net_device *dev)
185{ 185{
186 const struct net_device_ops *ops; 186 const struct net_device_ops *ops;
187 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); 187 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
188 int budget = 0;
189 188
190 /* Don't do any rx activity if the dev_lock mutex is held 189 /* Don't do any rx activity if the dev_lock mutex is held
191 * the dev_open/close paths use this to block netpoll activity 190 * the dev_open/close paths use this to block netpoll activity
@@ -208,7 +207,7 @@ static void netpoll_poll_dev(struct net_device *dev)
208 /* Process pending work on NIC */ 207 /* Process pending work on NIC */
209 ops->ndo_poll_controller(dev); 208 ops->ndo_poll_controller(dev);
210 209
211 poll_napi(dev, budget); 210 poll_napi(dev);
212 211
213 up(&ni->dev_lock); 212 up(&ni->dev_lock);
214 213
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 4eab4a94a59d..703cf76aa7c2 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -58,7 +58,7 @@
58 * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these 58 * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
59 * ldh [18] ; reload payload 59 * ldh [18] ; reload payload
60 * and #0xf ; mask PTP_CLASS_VMASK 60 * and #0xf ; mask PTP_CLASS_VMASK
61 * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2 61 * or #0xc0 ; PTP_CLASS_VLAN|PTP_CLASS_L2
62 * ret a ; return PTP class 62 * ret a ; return PTP class
63 * 63 *
64 * ; PTP over UDP over IPv4 over 802.1Q over Ethernet 64 * ; PTP over UDP over IPv4 over 802.1Q over Ethernet
@@ -73,7 +73,7 @@
73 * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ? 73 * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ?
74 * ldh [x + 26] ; load payload 74 * ldh [x + 26] ; load payload
75 * and #0xf ; mask PTP_CLASS_VMASK 75 * and #0xf ; mask PTP_CLASS_VMASK
76 * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 76 * or #0x90 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
77 * ret a ; return PTP class 77 * ret a ; return PTP class
78 * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE 78 * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE
79 * 79 *
@@ -86,7 +86,7 @@
86 * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ? 86 * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ?
87 * ldh [66] ; load payload 87 * ldh [66] ; load payload
88 * and #0xf ; mask PTP_CLASS_VMASK 88 * and #0xf ; mask PTP_CLASS_VMASK
89 * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 89 * or #0xa0 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
90 * ret a ; return PTP class 90 * ret a ; return PTP class
91 * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE 91 * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE
92 * 92 *
@@ -98,7 +98,7 @@
98 * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these 98 * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
99 * ldh [14] ; reload payload 99 * ldh [14] ; reload payload
100 * and #0xf ; mask PTP_CLASS_VMASK 100 * and #0xf ; mask PTP_CLASS_VMASK
101 * or #0x30 ; PTP_CLASS_L2 101 * or #0x40 ; PTP_CLASS_L2
102 * ret a ; return PTP class 102 * ret a ; return PTP class
103 * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE 103 * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE
104 */ 104 */
@@ -150,7 +150,7 @@ void __init ptp_classifier_init(void)
150 { 0x15, 0, 35, 0x00000000 }, 150 { 0x15, 0, 35, 0x00000000 },
151 { 0x28, 0, 0, 0x00000012 }, 151 { 0x28, 0, 0, 0x00000012 },
152 { 0x54, 0, 0, 0x0000000f }, 152 { 0x54, 0, 0, 0x0000000f },
153 { 0x44, 0, 0, 0x00000070 }, 153 { 0x44, 0, 0, 0x000000c0 },
154 { 0x16, 0, 0, 0x00000000 }, 154 { 0x16, 0, 0, 0x00000000 },
155 { 0x15, 0, 12, 0x00000800 }, 155 { 0x15, 0, 12, 0x00000800 },
156 { 0x30, 0, 0, 0x0000001b }, 156 { 0x30, 0, 0, 0x0000001b },
@@ -162,7 +162,7 @@ void __init ptp_classifier_init(void)
162 { 0x15, 0, 4, 0x0000013f }, 162 { 0x15, 0, 4, 0x0000013f },
163 { 0x48, 0, 0, 0x0000001a }, 163 { 0x48, 0, 0, 0x0000001a },
164 { 0x54, 0, 0, 0x0000000f }, 164 { 0x54, 0, 0, 0x0000000f },
165 { 0x44, 0, 0, 0x00000050 }, 165 { 0x44, 0, 0, 0x00000090 },
166 { 0x16, 0, 0, 0x00000000 }, 166 { 0x16, 0, 0, 0x00000000 },
167 { 0x06, 0, 0, 0x00000000 }, 167 { 0x06, 0, 0, 0x00000000 },
168 { 0x15, 0, 8, 0x000086dd }, 168 { 0x15, 0, 8, 0x000086dd },
@@ -172,7 +172,7 @@ void __init ptp_classifier_init(void)
172 { 0x15, 0, 4, 0x0000013f }, 172 { 0x15, 0, 4, 0x0000013f },
173 { 0x28, 0, 0, 0x00000042 }, 173 { 0x28, 0, 0, 0x00000042 },
174 { 0x54, 0, 0, 0x0000000f }, 174 { 0x54, 0, 0, 0x0000000f },
175 { 0x44, 0, 0, 0x00000060 }, 175 { 0x44, 0, 0, 0x000000a0 },
176 { 0x16, 0, 0, 0x00000000 }, 176 { 0x16, 0, 0, 0x00000000 },
177 { 0x06, 0, 0, 0x00000000 }, 177 { 0x06, 0, 0, 0x00000000 },
178 { 0x15, 0, 7, 0x000088f7 }, 178 { 0x15, 0, 7, 0x000088f7 },
@@ -181,7 +181,7 @@ void __init ptp_classifier_init(void)
181 { 0x15, 0, 4, 0x00000000 }, 181 { 0x15, 0, 4, 0x00000000 },
182 { 0x28, 0, 0, 0x0000000e }, 182 { 0x28, 0, 0, 0x0000000e },
183 { 0x54, 0, 0, 0x0000000f }, 183 { 0x54, 0, 0, 0x0000000f },
184 { 0x44, 0, 0, 0x00000030 }, 184 { 0x44, 0, 0, 0x00000040 },
185 { 0x16, 0, 0, 0x00000000 }, 185 { 0x16, 0, 0, 0x00000000 },
186 { 0x06, 0, 0, 0x00000000 }, 186 { 0x06, 0, 0, 0x00000000 },
187 }; 187 };
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index b42f0e26f89e..5d26056b6d8f 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -37,90 +37,16 @@
37int sysctl_max_syn_backlog = 256; 37int sysctl_max_syn_backlog = 256;
38EXPORT_SYMBOL(sysctl_max_syn_backlog); 38EXPORT_SYMBOL(sysctl_max_syn_backlog);
39 39
40int reqsk_queue_alloc(struct request_sock_queue *queue, 40void reqsk_queue_alloc(struct request_sock_queue *queue)
41 unsigned int nr_table_entries)
42{ 41{
43 size_t lopt_size = sizeof(struct listen_sock); 42 spin_lock_init(&queue->rskq_lock);
44 struct listen_sock *lopt = NULL;
45 43
46 nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); 44 spin_lock_init(&queue->fastopenq.lock);
47 nr_table_entries = max_t(u32, nr_table_entries, 8); 45 queue->fastopenq.rskq_rst_head = NULL;
48 nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); 46 queue->fastopenq.rskq_rst_tail = NULL;
49 lopt_size += nr_table_entries * sizeof(struct request_sock *); 47 queue->fastopenq.qlen = 0;
50 48
51 if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
52 lopt = kzalloc(lopt_size, GFP_KERNEL |
53 __GFP_NOWARN |
54 __GFP_NORETRY);
55 if (!lopt)
56 lopt = vzalloc(lopt_size);
57 if (!lopt)
58 return -ENOMEM;
59
60 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
61 spin_lock_init(&queue->syn_wait_lock);
62 queue->rskq_accept_head = NULL; 49 queue->rskq_accept_head = NULL;
63 lopt->nr_table_entries = nr_table_entries;
64 lopt->max_qlen_log = ilog2(nr_table_entries);
65
66 spin_lock_bh(&queue->syn_wait_lock);
67 queue->listen_opt = lopt;
68 spin_unlock_bh(&queue->syn_wait_lock);
69
70 return 0;
71}
72
73void __reqsk_queue_destroy(struct request_sock_queue *queue)
74{
75 /* This is an error recovery path only, no locking needed */
76 kvfree(queue->listen_opt);
77}
78
79static inline struct listen_sock *reqsk_queue_yank_listen_sk(
80 struct request_sock_queue *queue)
81{
82 struct listen_sock *lopt;
83
84 spin_lock_bh(&queue->syn_wait_lock);
85 lopt = queue->listen_opt;
86 queue->listen_opt = NULL;
87 spin_unlock_bh(&queue->syn_wait_lock);
88
89 return lopt;
90}
91
92void reqsk_queue_destroy(struct request_sock_queue *queue)
93{
94 /* make all the listen_opt local to us */
95 struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
96
97 if (listen_sock_qlen(lopt) != 0) {
98 unsigned int i;
99
100 for (i = 0; i < lopt->nr_table_entries; i++) {
101 struct request_sock *req;
102
103 spin_lock_bh(&queue->syn_wait_lock);
104 while ((req = lopt->syn_table[i]) != NULL) {
105 lopt->syn_table[i] = req->dl_next;
106 /* Because of following del_timer_sync(),
107 * we must release the spinlock here
108 * or risk a dead lock.
109 */
110 spin_unlock_bh(&queue->syn_wait_lock);
111 atomic_inc(&lopt->qlen_dec);
112 if (del_timer_sync(&req->rsk_timer))
113 reqsk_put(req);
114 reqsk_put(req);
115 spin_lock_bh(&queue->syn_wait_lock);
116 }
117 spin_unlock_bh(&queue->syn_wait_lock);
118 }
119 }
120
121 if (WARN_ON(listen_sock_qlen(lopt) != 0))
122 pr_err("qlen %u\n", listen_sock_qlen(lopt));
123 kvfree(lopt);
124} 50}
125 51
126/* 52/*
@@ -174,7 +100,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
174 struct sock *lsk = req->rsk_listener; 100 struct sock *lsk = req->rsk_listener;
175 struct fastopen_queue *fastopenq; 101 struct fastopen_queue *fastopenq;
176 102
177 fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; 103 fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
178 104
179 tcp_sk(sk)->fastopen_rsk = NULL; 105 tcp_sk(sk)->fastopen_rsk = NULL;
180 spin_lock_bh(&fastopenq->lock); 106 spin_lock_bh(&fastopenq->lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0ec48403ed68..34ba7a08876d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -96,7 +96,7 @@ int rtnl_is_locked(void)
96EXPORT_SYMBOL(rtnl_is_locked); 96EXPORT_SYMBOL(rtnl_is_locked);
97 97
98#ifdef CONFIG_PROVE_LOCKING 98#ifdef CONFIG_PROVE_LOCKING
99int lockdep_rtnl_is_held(void) 99bool lockdep_rtnl_is_held(void)
100{ 100{
101 return lockdep_is_held(&rtnl_mutex); 101 return lockdep_is_held(&rtnl_mutex);
102} 102}
@@ -497,7 +497,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops)
497} 497}
498EXPORT_SYMBOL_GPL(rtnl_af_unregister); 498EXPORT_SYMBOL_GPL(rtnl_af_unregister);
499 499
500static size_t rtnl_link_get_af_size(const struct net_device *dev) 500static size_t rtnl_link_get_af_size(const struct net_device *dev,
501 u32 ext_filter_mask)
501{ 502{
502 struct rtnl_af_ops *af_ops; 503 struct rtnl_af_ops *af_ops;
503 size_t size; 504 size_t size;
@@ -509,7 +510,7 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev)
509 if (af_ops->get_link_af_size) { 510 if (af_ops->get_link_af_size) {
510 /* AF_* + nested data */ 511 /* AF_* + nested data */
511 size += nla_total_size(sizeof(struct nlattr)) + 512 size += nla_total_size(sizeof(struct nlattr)) +
512 af_ops->get_link_af_size(dev); 513 af_ops->get_link_af_size(dev, ext_filter_mask);
513 } 514 }
514 } 515 }
515 516
@@ -837,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
837 /* IFLA_VF_STATS_BROADCAST */ 838 /* IFLA_VF_STATS_BROADCAST */
838 nla_total_size(sizeof(__u64)) + 839 nla_total_size(sizeof(__u64)) +
839 /* IFLA_VF_STATS_MULTICAST */ 840 /* IFLA_VF_STATS_MULTICAST */
840 nla_total_size(sizeof(__u64))); 841 nla_total_size(sizeof(__u64)) +
842 nla_total_size(sizeof(struct ifla_vf_trust)));
841 return size; 843 return size;
842 } else 844 } else
843 return 0; 845 return 0;
@@ -900,7 +902,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
900 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ 902 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
901 + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ 903 + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
902 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ 904 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
903 + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ 905 + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
904 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ 906 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
905 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ 907 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
906 + nla_total_size(1); /* IFLA_PROTO_DOWN */ 908 + nla_total_size(1); /* IFLA_PROTO_DOWN */
@@ -1025,7 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
1025{ 1027{
1026 int err; 1028 int err;
1027 struct switchdev_attr attr = { 1029 struct switchdev_attr attr = {
1028 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1030 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1029 .flags = SWITCHDEV_F_NO_RECURSE, 1031 .flags = SWITCHDEV_F_NO_RECURSE,
1030 }; 1032 };
1031 1033
@@ -1043,15 +1045,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
1043 return 0; 1045 return 0;
1044} 1046}
1045 1047
1048static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
1049 struct net_device *dev)
1050{
1051 const struct rtnl_link_stats64 *stats;
1052 struct rtnl_link_stats64 temp;
1053 struct nlattr *attr;
1054
1055 stats = dev_get_stats(dev, &temp);
1056
1057 attr = nla_reserve(skb, IFLA_STATS,
1058 sizeof(struct rtnl_link_stats));
1059 if (!attr)
1060 return -EMSGSIZE;
1061
1062 copy_rtnl_link_stats(nla_data(attr), stats);
1063
1064 attr = nla_reserve(skb, IFLA_STATS64,
1065 sizeof(struct rtnl_link_stats64));
1066 if (!attr)
1067 return -EMSGSIZE;
1068
1069 copy_rtnl_link_stats64(nla_data(attr), stats);
1070
1071 return 0;
1072}
1073
1074static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
1075 struct net_device *dev,
1076 int vfs_num,
1077 struct nlattr *vfinfo)
1078{
1079 struct ifla_vf_rss_query_en vf_rss_query_en;
1080 struct ifla_vf_link_state vf_linkstate;
1081 struct ifla_vf_spoofchk vf_spoofchk;
1082 struct ifla_vf_tx_rate vf_tx_rate;
1083 struct ifla_vf_stats vf_stats;
1084 struct ifla_vf_trust vf_trust;
1085 struct ifla_vf_vlan vf_vlan;
1086 struct ifla_vf_rate vf_rate;
1087 struct nlattr *vf, *vfstats;
1088 struct ifla_vf_mac vf_mac;
1089 struct ifla_vf_info ivi;
1090
1091 /* Not all SR-IOV capable drivers support the
1092 * spoofcheck and "RSS query enable" query. Preset to
1093 * -1 so the user space tool can detect that the driver
1094 * didn't report anything.
1095 */
1096 ivi.spoofchk = -1;
1097 ivi.rss_query_en = -1;
1098 ivi.trusted = -1;
1099 memset(ivi.mac, 0, sizeof(ivi.mac));
1100 /* The default value for VF link state is "auto"
1101 * IFLA_VF_LINK_STATE_AUTO which equals zero
1102 */
1103 ivi.linkstate = 0;
1104 if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
1105 return 0;
1106
1107 vf_mac.vf =
1108 vf_vlan.vf =
1109 vf_rate.vf =
1110 vf_tx_rate.vf =
1111 vf_spoofchk.vf =
1112 vf_linkstate.vf =
1113 vf_rss_query_en.vf =
1114 vf_trust.vf = ivi.vf;
1115
1116 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1117 vf_vlan.vlan = ivi.vlan;
1118 vf_vlan.qos = ivi.qos;
1119 vf_tx_rate.rate = ivi.max_tx_rate;
1120 vf_rate.min_tx_rate = ivi.min_tx_rate;
1121 vf_rate.max_tx_rate = ivi.max_tx_rate;
1122 vf_spoofchk.setting = ivi.spoofchk;
1123 vf_linkstate.link_state = ivi.linkstate;
1124 vf_rss_query_en.setting = ivi.rss_query_en;
1125 vf_trust.setting = ivi.trusted;
1126 vf = nla_nest_start(skb, IFLA_VF_INFO);
1127 if (!vf) {
1128 nla_nest_cancel(skb, vfinfo);
1129 return -EMSGSIZE;
1130 }
1131 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1132 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1133 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1134 &vf_rate) ||
1135 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1136 &vf_tx_rate) ||
1137 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
1138 &vf_spoofchk) ||
1139 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1140 &vf_linkstate) ||
1141 nla_put(skb, IFLA_VF_RSS_QUERY_EN,
1142 sizeof(vf_rss_query_en),
1143 &vf_rss_query_en) ||
1144 nla_put(skb, IFLA_VF_TRUST,
1145 sizeof(vf_trust), &vf_trust))
1146 return -EMSGSIZE;
1147 memset(&vf_stats, 0, sizeof(vf_stats));
1148 if (dev->netdev_ops->ndo_get_vf_stats)
1149 dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
1150 &vf_stats);
1151 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1152 if (!vfstats) {
1153 nla_nest_cancel(skb, vf);
1154 nla_nest_cancel(skb, vfinfo);
1155 return -EMSGSIZE;
1156 }
1157 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1158 vf_stats.rx_packets) ||
1159 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1160 vf_stats.tx_packets) ||
1161 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1162 vf_stats.rx_bytes) ||
1163 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1164 vf_stats.tx_bytes) ||
1165 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1166 vf_stats.broadcast) ||
1167 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1168 vf_stats.multicast))
1169 return -EMSGSIZE;
1170 nla_nest_end(skb, vfstats);
1171 nla_nest_end(skb, vf);
1172 return 0;
1173}
1174
1175static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1176{
1177 struct rtnl_link_ifmap map = {
1178 .mem_start = dev->mem_start,
1179 .mem_end = dev->mem_end,
1180 .base_addr = dev->base_addr,
1181 .irq = dev->irq,
1182 .dma = dev->dma,
1183 .port = dev->if_port,
1184 };
1185 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1186 return -EMSGSIZE;
1187
1188 return 0;
1189}
1190
1046static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 1191static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1047 int type, u32 pid, u32 seq, u32 change, 1192 int type, u32 pid, u32 seq, u32 change,
1048 unsigned int flags, u32 ext_filter_mask) 1193 unsigned int flags, u32 ext_filter_mask)
1049{ 1194{
1050 struct ifinfomsg *ifm; 1195 struct ifinfomsg *ifm;
1051 struct nlmsghdr *nlh; 1196 struct nlmsghdr *nlh;
1052 struct rtnl_link_stats64 temp; 1197 struct nlattr *af_spec;
1053 const struct rtnl_link_stats64 *stats;
1054 struct nlattr *attr, *af_spec;
1055 struct rtnl_af_ops *af_ops; 1198 struct rtnl_af_ops *af_ops;
1056 struct net_device *upper_dev = netdev_master_upper_dev_get(dev); 1199 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1057 1200
@@ -1094,18 +1237,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1094 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) 1237 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
1095 goto nla_put_failure; 1238 goto nla_put_failure;
1096 1239
1097 if (1) { 1240 if (rtnl_fill_link_ifmap(skb, dev))
1098 struct rtnl_link_ifmap map = { 1241 goto nla_put_failure;
1099 .mem_start = dev->mem_start,
1100 .mem_end = dev->mem_end,
1101 .base_addr = dev->base_addr,
1102 .irq = dev->irq,
1103 .dma = dev->dma,
1104 .port = dev->if_port,
1105 };
1106 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1107 goto nla_put_failure;
1108 }
1109 1242
1110 if (dev->addr_len) { 1243 if (dev->addr_len) {
1111 if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || 1244 if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1122,122 +1255,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1122 if (rtnl_phys_switch_id_fill(skb, dev)) 1255 if (rtnl_phys_switch_id_fill(skb, dev))
1123 goto nla_put_failure; 1256 goto nla_put_failure;
1124 1257
1125 attr = nla_reserve(skb, IFLA_STATS, 1258 if (rtnl_fill_stats(skb, dev))
1126 sizeof(struct rtnl_link_stats));
1127 if (attr == NULL)
1128 goto nla_put_failure; 1259 goto nla_put_failure;
1129 1260
1130 stats = dev_get_stats(dev, &temp);
1131 copy_rtnl_link_stats(nla_data(attr), stats);
1132
1133 attr = nla_reserve(skb, IFLA_STATS64,
1134 sizeof(struct rtnl_link_stats64));
1135 if (attr == NULL)
1136 goto nla_put_failure;
1137 copy_rtnl_link_stats64(nla_data(attr), stats);
1138
1139 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && 1261 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
1140 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) 1262 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
1141 goto nla_put_failure; 1263 goto nla_put_failure;
1142 1264
1143 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent 1265 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
1144 && (ext_filter_mask & RTEXT_FILTER_VF)) { 1266 ext_filter_mask & RTEXT_FILTER_VF) {
1145 int i; 1267 int i;
1146 1268 struct nlattr *vfinfo;
1147 struct nlattr *vfinfo, *vf, *vfstats;
1148 int num_vfs = dev_num_vf(dev->dev.parent); 1269 int num_vfs = dev_num_vf(dev->dev.parent);
1149 1270
1150 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); 1271 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
1151 if (!vfinfo) 1272 if (!vfinfo)
1152 goto nla_put_failure; 1273 goto nla_put_failure;
1153 for (i = 0; i < num_vfs; i++) { 1274 for (i = 0; i < num_vfs; i++) {
1154 struct ifla_vf_info ivi; 1275 if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
1155 struct ifla_vf_mac vf_mac;
1156 struct ifla_vf_vlan vf_vlan;
1157 struct ifla_vf_rate vf_rate;
1158 struct ifla_vf_tx_rate vf_tx_rate;
1159 struct ifla_vf_spoofchk vf_spoofchk;
1160 struct ifla_vf_link_state vf_linkstate;
1161 struct ifla_vf_rss_query_en vf_rss_query_en;
1162 struct ifla_vf_stats vf_stats;
1163
1164 /*
1165 * Not all SR-IOV capable drivers support the
1166 * spoofcheck and "RSS query enable" query. Preset to
1167 * -1 so the user space tool can detect that the driver
1168 * didn't report anything.
1169 */
1170 ivi.spoofchk = -1;
1171 ivi.rss_query_en = -1;
1172 memset(ivi.mac, 0, sizeof(ivi.mac));
1173 /* The default value for VF link state is "auto"
1174 * IFLA_VF_LINK_STATE_AUTO which equals zero
1175 */
1176 ivi.linkstate = 0;
1177 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
1178 break;
1179 vf_mac.vf =
1180 vf_vlan.vf =
1181 vf_rate.vf =
1182 vf_tx_rate.vf =
1183 vf_spoofchk.vf =
1184 vf_linkstate.vf =
1185 vf_rss_query_en.vf = ivi.vf;
1186
1187 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1188 vf_vlan.vlan = ivi.vlan;
1189 vf_vlan.qos = ivi.qos;
1190 vf_tx_rate.rate = ivi.max_tx_rate;
1191 vf_rate.min_tx_rate = ivi.min_tx_rate;
1192 vf_rate.max_tx_rate = ivi.max_tx_rate;
1193 vf_spoofchk.setting = ivi.spoofchk;
1194 vf_linkstate.link_state = ivi.linkstate;
1195 vf_rss_query_en.setting = ivi.rss_query_en;
1196 vf = nla_nest_start(skb, IFLA_VF_INFO);
1197 if (!vf) {
1198 nla_nest_cancel(skb, vfinfo);
1199 goto nla_put_failure;
1200 }
1201 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1202 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1203 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1204 &vf_rate) ||
1205 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1206 &vf_tx_rate) ||
1207 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
1208 &vf_spoofchk) ||
1209 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1210 &vf_linkstate) ||
1211 nla_put(skb, IFLA_VF_RSS_QUERY_EN,
1212 sizeof(vf_rss_query_en),
1213 &vf_rss_query_en))
1214 goto nla_put_failure;
1215 memset(&vf_stats, 0, sizeof(vf_stats));
1216 if (dev->netdev_ops->ndo_get_vf_stats)
1217 dev->netdev_ops->ndo_get_vf_stats(dev, i,
1218 &vf_stats);
1219 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1220 if (!vfstats) {
1221 nla_nest_cancel(skb, vf);
1222 nla_nest_cancel(skb, vfinfo);
1223 goto nla_put_failure;
1224 }
1225 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1226 vf_stats.rx_packets) ||
1227 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1228 vf_stats.tx_packets) ||
1229 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1230 vf_stats.rx_bytes) ||
1231 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1232 vf_stats.tx_bytes) ||
1233 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1234 vf_stats.broadcast) ||
1235 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1236 vf_stats.multicast))
1237 goto nla_put_failure; 1276 goto nla_put_failure;
1238 nla_nest_end(skb, vfstats);
1239 nla_nest_end(skb, vf);
1240 } 1277 }
1278
1241 nla_nest_end(skb, vfinfo); 1279 nla_nest_end(skb, vfinfo);
1242 } 1280 }
1243 1281
@@ -1272,7 +1310,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1272 if (!(af = nla_nest_start(skb, af_ops->family))) 1310 if (!(af = nla_nest_start(skb, af_ops->family)))
1273 goto nla_put_failure; 1311 goto nla_put_failure;
1274 1312
1275 err = af_ops->fill_link_af(skb, dev); 1313 err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
1276 1314
1277 /* 1315 /*
1278 * Caller may return ENODATA to indicate that there 1316 * Caller may return ENODATA to indicate that there
@@ -1347,6 +1385,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
1347 [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, 1385 [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
1348 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, 1386 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
1349 [IFLA_VF_STATS] = { .type = NLA_NESTED }, 1387 [IFLA_VF_STATS] = { .type = NLA_NESTED },
1388 [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) },
1350}; 1389};
1351 1390
1352static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { 1391static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
@@ -1586,6 +1625,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1586 return err; 1625 return err;
1587 } 1626 }
1588 1627
1628 if (tb[IFLA_VF_TRUST]) {
1629 struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
1630
1631 err = -EOPNOTSUPP;
1632 if (ops->ndo_set_vf_trust)
1633 err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
1634 if (err < 0)
1635 return err;
1636 }
1637
1589 return err; 1638 return err;
1590} 1639}
1591 1640
@@ -3443,4 +3492,3 @@ void __init rtnetlink_init(void)
3443 rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); 3492 rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
3444 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); 3493 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
3445} 3494}
3446
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..152b9c70e252 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
414 len += NET_SKB_PAD; 414 len += NET_SKB_PAD;
415 415
416 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || 416 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
417 (gfp_mask & (__GFP_WAIT | GFP_DMA))) { 417 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
418 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); 418 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
419 if (!skb) 419 if (!skb)
420 goto skb_fail; 420 goto skb_fail;
@@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
481 len += NET_SKB_PAD + NET_IP_ALIGN; 481 len += NET_SKB_PAD + NET_IP_ALIGN;
482 482
483 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || 483 if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
484 (gfp_mask & (__GFP_WAIT | GFP_DMA))) { 484 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
485 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); 485 skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
486 if (!skb) 486 if (!skb)
487 goto skb_fail; 487 goto skb_fail;
@@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
4268 return NULL; 4268 return NULL;
4269 } 4269 }
4270 4270
4271 memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); 4271 memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
4272 2 * ETH_ALEN);
4272 skb->mac_header += VLAN_HLEN; 4273 skb->mac_header += VLAN_HLEN;
4273 return skb; 4274 return skb;
4274} 4275}
@@ -4452,7 +4453,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4452 return NULL; 4453 return NULL;
4453 4454
4454 gfp_head = gfp_mask; 4455 gfp_head = gfp_mask;
4455 if (gfp_head & __GFP_WAIT) 4456 if (gfp_head & __GFP_DIRECT_RECLAIM)
4456 gfp_head |= __GFP_REPEAT; 4457 gfp_head |= __GFP_REPEAT;
4457 4458
4458 *errcode = -ENOBUFS; 4459 *errcode = -ENOBUFS;
@@ -4467,7 +4468,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4467 4468
4468 while (order) { 4469 while (order) {
4469 if (npages >= 1 << order) { 4470 if (npages >= 1 << order) {
4470 page = alloc_pages((gfp_mask & ~__GFP_WAIT) | 4471 page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
4471 __GFP_COMP | 4472 __GFP_COMP |
4472 __GFP_NOWARN | 4473 __GFP_NOWARN |
4473 __GFP_NORETRY, 4474 __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 3307c02244d3..1e4dd54bfb5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -422,13 +422,25 @@ static void sock_warn_obsolete_bsdism(const char *name)
422 } 422 }
423} 423}
424 424
425static bool sock_needs_netstamp(const struct sock *sk)
426{
427 switch (sk->sk_family) {
428 case AF_UNSPEC:
429 case AF_UNIX:
430 return false;
431 default:
432 return true;
433 }
434}
435
425#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) 436#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
426 437
427static void sock_disable_timestamp(struct sock *sk, unsigned long flags) 438static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
428{ 439{
429 if (sk->sk_flags & flags) { 440 if (sk->sk_flags & flags) {
430 sk->sk_flags &= ~flags; 441 sk->sk_flags &= ~flags;
431 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) 442 if (sock_needs_netstamp(sk) &&
443 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
432 net_disable_timestamp(); 444 net_disable_timestamp();
433 } 445 }
434} 446}
@@ -988,6 +1000,10 @@ set_rcvbuf:
988 sk->sk_max_pacing_rate); 1000 sk->sk_max_pacing_rate);
989 break; 1001 break;
990 1002
1003 case SO_INCOMING_CPU:
1004 sk->sk_incoming_cpu = val;
1005 break;
1006
991 default: 1007 default:
992 ret = -ENOPROTOOPT; 1008 ret = -ENOPROTOOPT;
993 break; 1009 break;
@@ -1578,7 +1594,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1578 if (newsk->sk_prot->sockets_allocated) 1594 if (newsk->sk_prot->sockets_allocated)
1579 sk_sockets_allocated_inc(newsk); 1595 sk_sockets_allocated_inc(newsk);
1580 1596
1581 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) 1597 if (sock_needs_netstamp(sk) &&
1598 newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1582 net_enable_timestamp(); 1599 net_enable_timestamp();
1583 } 1600 }
1584out: 1601out:
@@ -1639,6 +1656,28 @@ void sock_wfree(struct sk_buff *skb)
1639} 1656}
1640EXPORT_SYMBOL(sock_wfree); 1657EXPORT_SYMBOL(sock_wfree);
1641 1658
1659void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1660{
1661 skb_orphan(skb);
1662 skb->sk = sk;
1663#ifdef CONFIG_INET
1664 if (unlikely(!sk_fullsock(sk))) {
1665 skb->destructor = sock_edemux;
1666 sock_hold(sk);
1667 return;
1668 }
1669#endif
1670 skb->destructor = sock_wfree;
1671 skb_set_hash_from_sk(skb, sk);
1672 /*
1673 * We used to take a refcount on sk, but following operation
1674 * is enough to guarantee sk_free() wont free this sock until
1675 * all in-flight packets are completed
1676 */
1677 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1678}
1679EXPORT_SYMBOL(skb_set_owner_w);
1680
1642void skb_orphan_partial(struct sk_buff *skb) 1681void skb_orphan_partial(struct sk_buff *skb)
1643{ 1682{
1644 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, 1683 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
@@ -1852,6 +1891,32 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1852} 1891}
1853EXPORT_SYMBOL(sock_alloc_send_skb); 1892EXPORT_SYMBOL(sock_alloc_send_skb);
1854 1893
1894int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
1895 struct sockcm_cookie *sockc)
1896{
1897 struct cmsghdr *cmsg;
1898
1899 for_each_cmsghdr(cmsg, msg) {
1900 if (!CMSG_OK(msg, cmsg))
1901 return -EINVAL;
1902 if (cmsg->cmsg_level != SOL_SOCKET)
1903 continue;
1904 switch (cmsg->cmsg_type) {
1905 case SO_MARK:
1906 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1907 return -EPERM;
1908 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
1909 return -EINVAL;
1910 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
1911 break;
1912 default:
1913 return -EINVAL;
1914 }
1915 }
1916 return 0;
1917}
1918EXPORT_SYMBOL(sock_cmsg_send);
1919
1855/* On 32bit arches, an skb frag is limited to 2^15 */ 1920/* On 32bit arches, an skb frag is limited to 2^15 */
1856#define SKB_FRAG_PAGE_ORDER get_order(32768) 1921#define SKB_FRAG_PAGE_ORDER get_order(32768)
1857 1922
@@ -1879,8 +1944,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1879 1944
1880 pfrag->offset = 0; 1945 pfrag->offset = 0;
1881 if (SKB_FRAG_PAGE_ORDER) { 1946 if (SKB_FRAG_PAGE_ORDER) {
1882 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP | 1947 /* Avoid direct reclaim but allow kswapd to wake */
1883 __GFP_NOWARN | __GFP_NORETRY, 1948 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
1949 __GFP_COMP | __GFP_NOWARN |
1950 __GFP_NORETRY,
1884 SKB_FRAG_PAGE_ORDER); 1951 SKB_FRAG_PAGE_ORDER);
1885 if (likely(pfrag->page)) { 1952 if (likely(pfrag->page)) {
1886 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; 1953 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
@@ -2353,6 +2420,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2353 2420
2354 sk->sk_max_pacing_rate = ~0U; 2421 sk->sk_max_pacing_rate = ~0U;
2355 sk->sk_pacing_rate = ~0U; 2422 sk->sk_pacing_rate = ~0U;
2423 sk->sk_incoming_cpu = -1;
2356 /* 2424 /*
2357 * Before updating sk_refcnt, we must commit prior changes to memory 2425 * Before updating sk_refcnt, we must commit prior changes to memory
2358 * (Documentation/RCU/rculist_nulls.txt for details) 2426 * (Documentation/RCU/rculist_nulls.txt for details)
@@ -2479,7 +2547,8 @@ void sock_enable_timestamp(struct sock *sk, int flag)
2479 * time stamping, but time stamping might have been on 2547 * time stamping, but time stamping might have been on
2480 * already because of the other one 2548 * already because of the other one
2481 */ 2549 */
2482 if (!(previous_flags & SK_FLAGS_TIMESTAMP)) 2550 if (sock_needs_netstamp(sk) &&
2551 !(previous_flags & SK_FLAGS_TIMESTAMP))
2483 net_enable_timestamp(); 2552 net_enable_timestamp();
2484 } 2553 }
2485} 2554}
@@ -2758,7 +2827,7 @@ static int req_prot_init(const struct proto *prot)
2758 2827
2759 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, 2828 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
2760 rsk_prot->obj_size, 0, 2829 rsk_prot->obj_size, 0,
2761 0, NULL); 2830 prot->slab_flags, NULL);
2762 2831
2763 if (!rsk_prot->slab) { 2832 if (!rsk_prot->slab) {
2764 pr_crit("%s: Can't create request sock SLAB cache!\n", 2833 pr_crit("%s: Can't create request sock SLAB cache!\n",
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 817622f3dbb7..0c1d58d43f67 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -1,3 +1,5 @@
1/* License: GPL */
2
1#include <linux/mutex.h> 3#include <linux/mutex.h>
2#include <linux/socket.h> 4#include <linux/socket.h>
3#include <linux/skbuff.h> 5#include <linux/skbuff.h>
@@ -323,14 +325,4 @@ static int __init sock_diag_init(void)
323 BUG_ON(!broadcast_wq); 325 BUG_ON(!broadcast_wq);
324 return register_pernet_subsys(&diag_net_ops); 326 return register_pernet_subsys(&diag_net_ops);
325} 327}
326 328device_initcall(sock_diag_init);
327static void __exit sock_diag_exit(void)
328{
329 unregister_pernet_subsys(&diag_net_ops);
330 destroy_workqueue(broadcast_wq);
331}
332
333module_init(sock_diag_init);
334module_exit(sock_diag_exit);
335MODULE_LICENSE("GPL");
336MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
diff --git a/net/core/tso.c b/net/core/tso.c
index 630b30b4fb53..5dca7ce8ee9f 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,4 +1,5 @@
1#include <linux/export.h> 1#include <linux/export.h>
2#include <linux/if_vlan.h>
2#include <net/ip.h> 3#include <net/ip.h>
3#include <net/tso.h> 4#include <net/tso.h>
4#include <asm/unaligned.h> 5#include <asm/unaligned.h>
@@ -14,18 +15,24 @@ EXPORT_SYMBOL(tso_count_descs);
14void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, 15void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
15 int size, bool is_last) 16 int size, bool is_last)
16{ 17{
17 struct iphdr *iph;
18 struct tcphdr *tcph; 18 struct tcphdr *tcph;
19 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); 19 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
20 int mac_hdr_len = skb_network_offset(skb); 20 int mac_hdr_len = skb_network_offset(skb);
21 21
22 memcpy(hdr, skb->data, hdr_len); 22 memcpy(hdr, skb->data, hdr_len);
23 iph = (struct iphdr *)(hdr + mac_hdr_len); 23 if (!tso->ipv6) {
24 iph->id = htons(tso->ip_id); 24 struct iphdr *iph = (void *)(hdr + mac_hdr_len);
25 iph->tot_len = htons(size + hdr_len - mac_hdr_len); 25
26 iph->id = htons(tso->ip_id);
27 iph->tot_len = htons(size + hdr_len - mac_hdr_len);
28 tso->ip_id++;
29 } else {
30 struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len);
31
32 iph->payload_len = htons(size + tcp_hdrlen(skb));
33 }
26 tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); 34 tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
27 put_unaligned_be32(tso->tcp_seq, &tcph->seq); 35 put_unaligned_be32(tso->tcp_seq, &tcph->seq);
28 tso->ip_id++;
29 36
30 if (!is_last) { 37 if (!is_last) {
31 /* Clear all special flags for not last packet */ 38 /* Clear all special flags for not last packet */
@@ -61,6 +68,7 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
61 tso->ip_id = ntohs(ip_hdr(skb)->id); 68 tso->ip_id = ntohs(ip_hdr(skb)->id);
62 tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); 69 tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
63 tso->next_frag_idx = 0; 70 tso->next_frag_idx = 0;
71 tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
64 72
65 /* Build first data */ 73 /* Build first data */
66 tso->size = skb_headlen(skb) - hdr_len; 74 tso->size = skb_headlen(skb) - hdr_len;
diff --git a/net/core/utils.c b/net/core/utils.c
index 3dffce953c39..3d17ca8b4744 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -348,52 +348,3 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
348 } 348 }
349} 349}
350EXPORT_SYMBOL(inet_proto_csum_replace_by_diff); 350EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
351
352struct __net_random_once_work {
353 struct work_struct work;
354 struct static_key *key;
355};
356
357static void __net_random_once_deferred(struct work_struct *w)
358{
359 struct __net_random_once_work *work =
360 container_of(w, struct __net_random_once_work, work);
361 BUG_ON(!static_key_enabled(work->key));
362 static_key_slow_dec(work->key);
363 kfree(work);
364}
365
366static void __net_random_once_disable_jump(struct static_key *key)
367{
368 struct __net_random_once_work *w;
369
370 w = kmalloc(sizeof(*w), GFP_ATOMIC);
371 if (!w)
372 return;
373
374 INIT_WORK(&w->work, __net_random_once_deferred);
375 w->key = key;
376 schedule_work(&w->work);
377}
378
379bool __net_get_random_once(void *buf, int nbytes, bool *done,
380 struct static_key *once_key)
381{
382 static DEFINE_SPINLOCK(lock);
383 unsigned long flags;
384
385 spin_lock_irqsave(&lock, flags);
386 if (*done) {
387 spin_unlock_irqrestore(&lock, flags);
388 return false;
389 }
390
391 get_random_bytes(buf, nbytes);
392 *done = true;
393 spin_unlock_irqrestore(&lock, flags);
394
395 __net_random_once_disable_jump(once_key);
396
397 return true;
398}
399EXPORT_SYMBOL(__net_get_random_once);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 5b21f6f88e97..4f6c1862dfd2 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -13,6 +13,7 @@
13 * You should have received a copy of the GNU General Public License along with 13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>. 14 * this program; if not, see <http://www.gnu.org/licenses/>.
15 * 15 *
16 * Description: Data Center Bridging netlink interface
16 * Author: Lucy Liu <lucy.liu@intel.com> 17 * Author: Lucy Liu <lucy.liu@intel.com>
17 */ 18 */
18 19
@@ -24,7 +25,7 @@
24#include <linux/dcbnl.h> 25#include <linux/dcbnl.h>
25#include <net/dcbevent.h> 26#include <net/dcbevent.h>
26#include <linux/rtnetlink.h> 27#include <linux/rtnetlink.h>
27#include <linux/module.h> 28#include <linux/init.h>
28#include <net/sock.h> 29#include <net/sock.h>
29 30
30/* Data Center Bridging (DCB) is a collection of Ethernet enhancements 31/* Data Center Bridging (DCB) is a collection of Ethernet enhancements
@@ -48,10 +49,6 @@
48 * features for capable devices. 49 * features for capable devices.
49 */ 50 */
50 51
51MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>");
52MODULE_DESCRIPTION("Data Center Bridging netlink interface");
53MODULE_LICENSE("GPL");
54
55/**************** DCB attribute policies *************************************/ 52/**************** DCB attribute policies *************************************/
56 53
57/* DCB netlink attributes policy */ 54/* DCB netlink attributes policy */
@@ -1935,19 +1932,6 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
1935} 1932}
1936EXPORT_SYMBOL(dcb_ieee_delapp); 1933EXPORT_SYMBOL(dcb_ieee_delapp);
1937 1934
1938static void dcb_flushapp(void)
1939{
1940 struct dcb_app_type *app;
1941 struct dcb_app_type *tmp;
1942
1943 spin_lock_bh(&dcb_lock);
1944 list_for_each_entry_safe(app, tmp, &dcb_app_list, list) {
1945 list_del(&app->list);
1946 kfree(app);
1947 }
1948 spin_unlock_bh(&dcb_lock);
1949}
1950
1951static int __init dcbnl_init(void) 1935static int __init dcbnl_init(void)
1952{ 1936{
1953 INIT_LIST_HEAD(&dcb_app_list); 1937 INIT_LIST_HEAD(&dcb_app_list);
@@ -1957,12 +1941,4 @@ static int __init dcbnl_init(void)
1957 1941
1958 return 0; 1942 return 0;
1959} 1943}
1960module_init(dcbnl_init); 1944device_initcall(dcbnl_init);
1961
1962static void __exit dcbnl_exit(void)
1963{
1964 rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
1965 rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
1966 dcb_flushapp();
1967}
1968module_exit(dcbnl_exit);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index bebc735f5afc..b0e28d24e1a7 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -229,7 +229,7 @@ void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
229int dccp_retransmit_skb(struct sock *sk); 229int dccp_retransmit_skb(struct sock *sk);
230 230
231void dccp_send_ack(struct sock *sk); 231void dccp_send_ack(struct sock *sk);
232void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 232void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
233 struct request_sock *rsk); 233 struct request_sock *rsk);
234 234
235void dccp_send_sync(struct sock *sk, const u64 seq, 235void dccp_send_sync(struct sock *sk, const u64 seq,
@@ -270,15 +270,17 @@ int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
270 270
271int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); 271int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
272 272
273struct sock *dccp_create_openreq_child(struct sock *sk, 273struct sock *dccp_create_openreq_child(const struct sock *sk,
274 const struct request_sock *req, 274 const struct request_sock *req,
275 const struct sk_buff *skb); 275 const struct sk_buff *skb);
276 276
277int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); 277int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
278 278
279struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, 279struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb,
280 struct request_sock *req, 280 struct request_sock *req,
281 struct dst_entry *dst); 281 struct dst_entry *dst,
282 struct request_sock *req_unhash,
283 bool *own_req);
282struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, 284struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
283 struct request_sock *req); 285 struct request_sock *req);
284 286
@@ -293,7 +295,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
293void dccp_destroy_sock(struct sock *sk); 295void dccp_destroy_sock(struct sock *sk);
294 296
295void dccp_close(struct sock *sk, long timeout); 297void dccp_close(struct sock *sk, long timeout);
296struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, 298struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
297 struct request_sock *req); 299 struct request_sock *req);
298 300
299int dccp_connect(struct sock *sk); 301int dccp_connect(struct sock *sk);
@@ -325,13 +327,13 @@ void dccp_send_close(struct sock *sk, const int active);
325int dccp_invalid_packet(struct sk_buff *skb); 327int dccp_invalid_packet(struct sk_buff *skb);
326u32 dccp_sample_rtt(struct sock *sk, long delta); 328u32 dccp_sample_rtt(struct sock *sk, long delta);
327 329
328static inline int dccp_bad_service_code(const struct sock *sk, 330static inline bool dccp_bad_service_code(const struct sock *sk,
329 const __be32 service) 331 const __be32 service)
330{ 332{
331 const struct dccp_sock *dp = dccp_sk(sk); 333 const struct dccp_sock *dp = dccp_sk(sk);
332 334
333 if (dp->dccps_service == service) 335 if (dp->dccps_service == service)
334 return 0; 336 return false;
335 return !dccp_list_has_service(dp->dccps_service_list, service); 337 return !dccp_list_has_service(dp->dccps_service_list, service);
336} 338}
337 339
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ccf4c5629b3c..5684e14932bd 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -208,7 +208,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
208 208
209 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { 209 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
210 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 210 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
211 reqsk_put(req);
212 } else { 211 } else {
213 /* 212 /*
214 * Still in RESPOND, just remove it silently. 213 * Still in RESPOND, just remove it silently.
@@ -218,6 +217,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
218 */ 217 */
219 inet_csk_reqsk_queue_drop(req->rsk_listener, req); 218 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
220 } 219 }
220 reqsk_put(req);
221} 221}
222EXPORT_SYMBOL(dccp_req_err); 222EXPORT_SYMBOL(dccp_req_err);
223 223
@@ -390,9 +390,12 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
390 * 390 *
391 * This is the equivalent of TCP's tcp_v4_syn_recv_sock 391 * This is the equivalent of TCP's tcp_v4_syn_recv_sock
392 */ 392 */
393struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, 393struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
394 struct sk_buff *skb,
394 struct request_sock *req, 395 struct request_sock *req,
395 struct dst_entry *dst) 396 struct dst_entry *dst,
397 struct request_sock *req_unhash,
398 bool *own_req)
396{ 399{
397 struct inet_request_sock *ireq; 400 struct inet_request_sock *ireq;
398 struct inet_sock *newinet; 401 struct inet_sock *newinet;
@@ -425,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
425 428
426 if (__inet_inherit_port(sk, newsk) < 0) 429 if (__inet_inherit_port(sk, newsk) < 0)
427 goto put_and_exit; 430 goto put_and_exit;
428 __inet_hash_nolisten(newsk, NULL); 431 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
429 432
430 return newsk; 433 return newsk;
431 434
@@ -443,36 +446,6 @@ put_and_exit:
443} 446}
444EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); 447EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
445 448
446static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
447{
448 const struct dccp_hdr *dh = dccp_hdr(skb);
449 const struct iphdr *iph = ip_hdr(skb);
450 struct sock *nsk;
451 /* Find possible connection requests. */
452 struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
453 iph->saddr, iph->daddr);
454 if (req) {
455 nsk = dccp_check_req(sk, skb, req);
456 if (!nsk)
457 reqsk_put(req);
458 return nsk;
459 }
460 nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
461 iph->saddr, dh->dccph_sport,
462 iph->daddr, dh->dccph_dport,
463 inet_iif(skb));
464 if (nsk != NULL) {
465 if (nsk->sk_state != DCCP_TIME_WAIT) {
466 bh_lock_sock(nsk);
467 return nsk;
468 }
469 inet_twsk_put(inet_twsk(nsk));
470 return NULL;
471 }
472
473 return sk;
474}
475
476static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, 449static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
477 struct sk_buff *skb) 450 struct sk_buff *skb)
478{ 451{
@@ -498,7 +471,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
498 return &rt->dst; 471 return &rt->dst;
499} 472}
500 473
501static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) 474static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req)
502{ 475{
503 int err = -1; 476 int err = -1;
504 struct sk_buff *skb; 477 struct sk_buff *skb;
@@ -527,7 +500,7 @@ out:
527 return err; 500 return err;
528} 501}
529 502
530static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) 503static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
531{ 504{
532 int err; 505 int err;
533 const struct iphdr *rxiph; 506 const struct iphdr *rxiph;
@@ -624,7 +597,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
624 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 597 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
625 goto drop; 598 goto drop;
626 599
627 req = inet_reqsk_alloc(&dccp_request_sock_ops, sk); 600 req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
628 if (req == NULL) 601 if (req == NULL)
629 goto drop; 602 goto drop;
630 603
@@ -704,18 +677,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
704 * NOTE: the check for the packet types is done in 677 * NOTE: the check for the packet types is done in
705 * dccp_rcv_state_process 678 * dccp_rcv_state_process
706 */ 679 */
707 if (sk->sk_state == DCCP_LISTEN) {
708 struct sock *nsk = dccp_v4_hnd_req(sk, skb);
709
710 if (nsk == NULL)
711 goto discard;
712
713 if (nsk != sk) {
714 if (dccp_child_process(sk, nsk, skb))
715 goto reset;
716 return 0;
717 }
718 }
719 680
720 if (dccp_rcv_state_process(sk, skb, dh, skb->len)) 681 if (dccp_rcv_state_process(sk, skb, dh, skb->len))
721 goto reset; 682 goto reset;
@@ -723,7 +684,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
723 684
724reset: 685reset:
725 dccp_v4_ctl_send_reset(sk, skb); 686 dccp_v4_ctl_send_reset(sk, skb);
726discard:
727 kfree_skb(skb); 687 kfree_skb(skb);
728 return 0; 688 return 0;
729} 689}
@@ -841,15 +801,10 @@ static int dccp_v4_rcv(struct sk_buff *skb)
841 DCCP_SKB_CB(skb)->dccpd_ack_seq); 801 DCCP_SKB_CB(skb)->dccpd_ack_seq);
842 } 802 }
843 803
844 /* Step 2: 804lookup:
845 * Look up flow ID in table and get corresponding socket */
846 sk = __inet_lookup_skb(&dccp_hashinfo, skb, 805 sk = __inet_lookup_skb(&dccp_hashinfo, skb,
847 dh->dccph_sport, dh->dccph_dport); 806 dh->dccph_sport, dh->dccph_dport);
848 /* 807 if (!sk) {
849 * Step 2:
850 * If no socket ...
851 */
852 if (sk == NULL) {
853 dccp_pr_debug("failed to look up flow ID in table and " 808 dccp_pr_debug("failed to look up flow ID in table and "
854 "get corresponding socket\n"); 809 "get corresponding socket\n");
855 goto no_dccp_socket; 810 goto no_dccp_socket;
@@ -867,6 +822,31 @@ static int dccp_v4_rcv(struct sk_buff *skb)
867 goto no_dccp_socket; 822 goto no_dccp_socket;
868 } 823 }
869 824
825 if (sk->sk_state == DCCP_NEW_SYN_RECV) {
826 struct request_sock *req = inet_reqsk(sk);
827 struct sock *nsk = NULL;
828
829 sk = req->rsk_listener;
830 if (likely(sk->sk_state == DCCP_LISTEN)) {
831 nsk = dccp_check_req(sk, skb, req);
832 } else {
833 inet_csk_reqsk_queue_drop_and_put(sk, req);
834 goto lookup;
835 }
836 if (!nsk) {
837 reqsk_put(req);
838 goto discard_it;
839 }
840 if (nsk == sk) {
841 sock_hold(sk);
842 reqsk_put(req);
843 } else if (dccp_child_process(sk, nsk, skb)) {
844 dccp_v4_ctl_send_reset(sk, skb);
845 goto discard_it;
846 } else {
847 return 0;
848 }
849 }
870 /* 850 /*
871 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage 851 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
872 * o if MinCsCov = 0, only packets with CsCov = 0 are accepted 852 * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5165571f397a..db5fc2440a23 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -181,7 +181,7 @@ out:
181} 181}
182 182
183 183
184static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) 184static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
185{ 185{
186 struct inet_request_sock *ireq = inet_rsk(req); 186 struct inet_request_sock *ireq = inet_rsk(req);
187 struct ipv6_pinfo *np = inet6_sk(sk); 187 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -234,7 +234,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
234 kfree_skb(inet_rsk(req)->pktopts); 234 kfree_skb(inet_rsk(req)->pktopts);
235} 235}
236 236
237static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) 237static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
238{ 238{
239 const struct ipv6hdr *rxip6h; 239 const struct ipv6hdr *rxip6h;
240 struct sk_buff *skb; 240 struct sk_buff *skb;
@@ -290,37 +290,6 @@ static struct request_sock_ops dccp6_request_sock_ops = {
290 .syn_ack_timeout = dccp_syn_ack_timeout, 290 .syn_ack_timeout = dccp_syn_ack_timeout,
291}; 291};
292 292
293static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
294{
295 const struct dccp_hdr *dh = dccp_hdr(skb);
296 const struct ipv6hdr *iph = ipv6_hdr(skb);
297 struct request_sock *req;
298 struct sock *nsk;
299
300 req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
301 &iph->daddr, inet6_iif(skb));
302 if (req) {
303 nsk = dccp_check_req(sk, skb, req);
304 if (!nsk)
305 reqsk_put(req);
306 return nsk;
307 }
308 nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
309 &iph->saddr, dh->dccph_sport,
310 &iph->daddr, ntohs(dh->dccph_dport),
311 inet6_iif(skb));
312 if (nsk != NULL) {
313 if (nsk->sk_state != DCCP_TIME_WAIT) {
314 bh_lock_sock(nsk);
315 return nsk;
316 }
317 inet_twsk_put(inet_twsk(nsk));
318 return NULL;
319 }
320
321 return sk;
322}
323
324static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 293static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
325{ 294{
326 struct request_sock *req; 295 struct request_sock *req;
@@ -350,7 +319,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
350 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 319 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
351 goto drop; 320 goto drop;
352 321
353 req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk); 322 req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
354 if (req == NULL) 323 if (req == NULL)
355 goto drop; 324 goto drop;
356 325
@@ -398,7 +367,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
398 if (dccp_v6_send_response(sk, req)) 367 if (dccp_v6_send_response(sk, req))
399 goto drop_and_free; 368 goto drop_and_free;
400 369
401 inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 370 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
402 return 0; 371 return 0;
403 372
404drop_and_free: 373drop_and_free:
@@ -408,13 +377,16 @@ drop:
408 return -1; 377 return -1;
409} 378}
410 379
411static struct sock *dccp_v6_request_recv_sock(struct sock *sk, 380static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
412 struct sk_buff *skb, 381 struct sk_buff *skb,
413 struct request_sock *req, 382 struct request_sock *req,
414 struct dst_entry *dst) 383 struct dst_entry *dst,
384 struct request_sock *req_unhash,
385 bool *own_req)
415{ 386{
416 struct inet_request_sock *ireq = inet_rsk(req); 387 struct inet_request_sock *ireq = inet_rsk(req);
417 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 388 struct ipv6_pinfo *newnp;
389 const struct ipv6_pinfo *np = inet6_sk(sk);
418 struct inet_sock *newinet; 390 struct inet_sock *newinet;
419 struct dccp6_sock *newdp6; 391 struct dccp6_sock *newdp6;
420 struct sock *newsk; 392 struct sock *newsk;
@@ -423,7 +395,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
423 /* 395 /*
424 * v6 mapped 396 * v6 mapped
425 */ 397 */
426 newsk = dccp_v4_request_recv_sock(sk, skb, req, dst); 398 newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
399 req_unhash, own_req);
427 if (newsk == NULL) 400 if (newsk == NULL)
428 return NULL; 401 return NULL;
429 402
@@ -462,22 +435,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
462 if (sk_acceptq_is_full(sk)) 435 if (sk_acceptq_is_full(sk))
463 goto out_overflow; 436 goto out_overflow;
464 437
465 if (dst == NULL) { 438 if (!dst) {
466 struct in6_addr *final_p, final;
467 struct flowi6 fl6; 439 struct flowi6 fl6;
468 440
469 memset(&fl6, 0, sizeof(fl6)); 441 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
470 fl6.flowi6_proto = IPPROTO_DCCP; 442 if (!dst)
471 fl6.daddr = ireq->ir_v6_rmt_addr;
472 final_p = fl6_update_dst(&fl6, np->opt, &final);
473 fl6.saddr = ireq->ir_v6_loc_addr;
474 fl6.flowi6_oif = sk->sk_bound_dev_if;
475 fl6.fl6_dport = ireq->ir_rmt_port;
476 fl6.fl6_sport = htons(ireq->ir_num);
477 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
478
479 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
480 if (IS_ERR(dst))
481 goto out; 443 goto out;
482 } 444 }
483 445
@@ -515,15 +477,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
515 /* Clone RX bits */ 477 /* Clone RX bits */
516 newnp->rxopt.all = np->rxopt.all; 478 newnp->rxopt.all = np->rxopt.all;
517 479
518 /* Clone pktoptions received with SYN */
519 newnp->pktoptions = NULL; 480 newnp->pktoptions = NULL;
520 if (ireq->pktopts != NULL) {
521 newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
522 consume_skb(ireq->pktopts);
523 ireq->pktopts = NULL;
524 if (newnp->pktoptions)
525 skb_set_owner_r(newnp->pktoptions, newsk);
526 }
527 newnp->opt = NULL; 481 newnp->opt = NULL;
528 newnp->mcast_oif = inet6_iif(skb); 482 newnp->mcast_oif = inet6_iif(skb);
529 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 483 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -552,7 +506,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
552 dccp_done(newsk); 506 dccp_done(newsk);
553 goto out; 507 goto out;
554 } 508 }
555 __inet_hash(newsk, NULL); 509 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
510 /* Clone pktoptions received with SYN, if we own the req */
511 if (*own_req && ireq->pktopts) {
512 newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
513 consume_skb(ireq->pktopts);
514 ireq->pktopts = NULL;
515 if (newnp->pktoptions)
516 skb_set_owner_r(newnp->pktoptions, newsk);
517 }
556 518
557 return newsk; 519 return newsk;
558 520
@@ -651,24 +613,6 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
651 * NOTE: the check for the packet types is done in 613 * NOTE: the check for the packet types is done in
652 * dccp_rcv_state_process 614 * dccp_rcv_state_process
653 */ 615 */
654 if (sk->sk_state == DCCP_LISTEN) {
655 struct sock *nsk = dccp_v6_hnd_req(sk, skb);
656
657 if (nsk == NULL)
658 goto discard;
659 /*
660 * Queue it on the new socket if the new socket is active,
661 * otherwise we just shortcircuit this and continue with
662 * the new socket..
663 */
664 if (nsk != sk) {
665 if (dccp_child_process(sk, nsk, skb))
666 goto reset;
667 if (opt_skb != NULL)
668 __kfree_skb(opt_skb);
669 return 0;
670 }
671 }
672 616
673 if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) 617 if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
674 goto reset; 618 goto reset;
@@ -715,16 +659,11 @@ static int dccp_v6_rcv(struct sk_buff *skb)
715 else 659 else
716 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); 660 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
717 661
718 /* Step 2: 662lookup:
719 * Look up flow ID in table and get corresponding socket */
720 sk = __inet6_lookup_skb(&dccp_hashinfo, skb, 663 sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
721 dh->dccph_sport, dh->dccph_dport, 664 dh->dccph_sport, dh->dccph_dport,
722 inet6_iif(skb)); 665 inet6_iif(skb));
723 /* 666 if (!sk) {
724 * Step 2:
725 * If no socket ...
726 */
727 if (sk == NULL) {
728 dccp_pr_debug("failed to look up flow ID in table and " 667 dccp_pr_debug("failed to look up flow ID in table and "
729 "get corresponding socket\n"); 668 "get corresponding socket\n");
730 goto no_dccp_socket; 669 goto no_dccp_socket;
@@ -742,6 +681,31 @@ static int dccp_v6_rcv(struct sk_buff *skb)
742 goto no_dccp_socket; 681 goto no_dccp_socket;
743 } 682 }
744 683
684 if (sk->sk_state == DCCP_NEW_SYN_RECV) {
685 struct request_sock *req = inet_reqsk(sk);
686 struct sock *nsk = NULL;
687
688 sk = req->rsk_listener;
689 if (likely(sk->sk_state == DCCP_LISTEN)) {
690 nsk = dccp_check_req(sk, skb, req);
691 } else {
692 inet_csk_reqsk_queue_drop_and_put(sk, req);
693 goto lookup;
694 }
695 if (!nsk) {
696 reqsk_put(req);
697 goto discard_it;
698 }
699 if (nsk == sk) {
700 sock_hold(sk);
701 reqsk_put(req);
702 } else if (dccp_child_process(sk, nsk, skb)) {
703 dccp_v6_ctl_send_reset(sk, skb);
704 goto discard_it;
705 } else {
706 return 0;
707 }
708 }
745 /* 709 /*
746 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage 710 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
747 * o if MinCsCov = 0, only packets with CsCov = 0 are accepted 711 * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 838f524cf11a..1994f8af646b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -72,7 +72,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
72 dccp_done(sk); 72 dccp_done(sk);
73} 73}
74 74
75struct sock *dccp_create_openreq_child(struct sock *sk, 75struct sock *dccp_create_openreq_child(const struct sock *sk,
76 const struct request_sock *req, 76 const struct request_sock *req,
77 const struct sk_buff *skb) 77 const struct sk_buff *skb)
78{ 78{
@@ -143,6 +143,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
143{ 143{
144 struct sock *child = NULL; 144 struct sock *child = NULL;
145 struct dccp_request_sock *dreq = dccp_rsk(req); 145 struct dccp_request_sock *dreq = dccp_rsk(req);
146 bool own_req;
146 147
147 /* Check for retransmitted REQUEST */ 148 /* Check for retransmitted REQUEST */
148 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { 149 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
@@ -182,14 +183,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
182 if (dccp_parse_options(sk, dreq, skb)) 183 if (dccp_parse_options(sk, dreq, skb))
183 goto drop; 184 goto drop;
184 185
185 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 186 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
186 if (child == NULL) 187 req, &own_req);
188 if (!child)
187 goto listen_overflow; 189 goto listen_overflow;
188 190
189 inet_csk_reqsk_queue_drop(sk, req); 191 return inet_csk_complete_hashdance(sk, child, req, own_req);
190 inet_csk_reqsk_queue_add(sk, req, child); 192
191out:
192 return child;
193listen_overflow: 193listen_overflow:
194 dccp_pr_debug("listen_overflow!\n"); 194 dccp_pr_debug("listen_overflow!\n");
195 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; 195 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -198,7 +198,7 @@ drop:
198 req->rsk_ops->send_reset(sk, skb); 198 req->rsk_ops->send_reset(sk, skb);
199 199
200 inet_csk_reqsk_queue_drop(sk, req); 200 inet_csk_reqsk_queue_drop(sk, req);
201 goto out; 201 return NULL;
202} 202}
203 203
204EXPORT_SYMBOL_GPL(dccp_check_req); 204EXPORT_SYMBOL_GPL(dccp_check_req);
@@ -236,7 +236,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
236 236
237EXPORT_SYMBOL_GPL(dccp_child_process); 237EXPORT_SYMBOL_GPL(dccp_child_process);
238 238
239void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 239void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
240 struct request_sock *rsk) 240 struct request_sock *rsk)
241{ 241{
242 DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state"); 242 DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state");
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 0248e8a3460c..4ce912e691d0 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -390,7 +390,7 @@ int dccp_retransmit_skb(struct sock *sk)
390 return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC)); 390 return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC));
391} 391}
392 392
393struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, 393struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
394 struct request_sock *req) 394 struct request_sock *req)
395{ 395{
396 struct dccp_hdr *dh; 396 struct dccp_hdr *dh;
@@ -398,13 +398,18 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
398 const u32 dccp_header_size = sizeof(struct dccp_hdr) + 398 const u32 dccp_header_size = sizeof(struct dccp_hdr) +
399 sizeof(struct dccp_hdr_ext) + 399 sizeof(struct dccp_hdr_ext) +
400 sizeof(struct dccp_hdr_response); 400 sizeof(struct dccp_hdr_response);
401 struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, 401 struct sk_buff *skb;
402 GFP_ATOMIC); 402
403 if (skb == NULL) 403 /* sk is marked const to clearly express we dont hold socket lock.
404 * sock_wmalloc() will atomically change sk->sk_wmem_alloc,
405 * it is safe to promote sk to non const.
406 */
407 skb = sock_wmalloc((struct sock *)sk, MAX_DCCP_HEADER, 1,
408 GFP_ATOMIC);
409 if (!skb)
404 return NULL; 410 return NULL;
405 411
406 /* Reserve space for headers. */ 412 skb_reserve(skb, MAX_DCCP_HEADER);
407 skb_reserve(skb, sk->sk_prot->max_header);
408 413
409 skb_dst_set(skb, dst_clone(dst)); 414 skb_dst_set(skb, dst_clone(dst));
410 415
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index d8346d0eadeb..3d3fda05b32d 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/kfifo.h> 31#include <linux/kfifo.h>
32#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
33#include <linux/time64.h>
33#include <linux/gfp.h> 34#include <linux/gfp.h>
34#include <net/net_namespace.h> 35#include <net/net_namespace.h>
35 36
@@ -47,20 +48,20 @@ static struct {
47 struct kfifo fifo; 48 struct kfifo fifo;
48 spinlock_t lock; 49 spinlock_t lock;
49 wait_queue_head_t wait; 50 wait_queue_head_t wait;
50 struct timespec tstart; 51 struct timespec64 tstart;
51} dccpw; 52} dccpw;
52 53
53static void printl(const char *fmt, ...) 54static void printl(const char *fmt, ...)
54{ 55{
55 va_list args; 56 va_list args;
56 int len; 57 int len;
57 struct timespec now; 58 struct timespec64 now;
58 char tbuf[256]; 59 char tbuf[256];
59 60
60 va_start(args, fmt); 61 va_start(args, fmt);
61 getnstimeofday(&now); 62 getnstimeofday64(&now);
62 63
63 now = timespec_sub(now, dccpw.tstart); 64 now = timespec64_sub(now, dccpw.tstart);
64 65
65 len = sprintf(tbuf, "%lu.%06lu ", 66 len = sprintf(tbuf, "%lu.%06lu ",
66 (unsigned long) now.tv_sec, 67 (unsigned long) now.tv_sec,
@@ -110,7 +111,7 @@ static struct jprobe dccp_send_probe = {
110static int dccpprobe_open(struct inode *inode, struct file *file) 111static int dccpprobe_open(struct inode *inode, struct file *file)
111{ 112{
112 kfifo_reset(&dccpw.fifo); 113 kfifo_reset(&dccpw.fifo);
113 getnstimeofday(&dccpw.tstart); 114 getnstimeofday64(&dccpw.tstart);
114 return 0; 115 return 0;
115} 116}
116 117
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 4507b188fc51..482730cd8a56 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
194 return err; 194 return err;
195} 195}
196 196
197static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb) 197static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
198{ 198{
199 struct dst_entry *dst = skb_dst(skb); 199 struct dst_entry *dst = skb_dst(skb);
200 struct dn_route *rt = (struct dn_route *)dst; 200 struct dn_route *rt = (struct dn_route *)dst;
@@ -246,8 +246,9 @@ static int dn_long_output(struct neighbour *neigh, struct sock *sk,
246 246
247 skb_reset_network_header(skb); 247 skb_reset_network_header(skb);
248 248
249 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, 249 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
250 NULL, neigh->dev, dn_neigh_output_packet); 250 &init_net, sk, skb, NULL, neigh->dev,
251 dn_neigh_output_packet);
251} 252}
252 253
253/* 254/*
@@ -286,8 +287,9 @@ static int dn_short_output(struct neighbour *neigh, struct sock *sk,
286 287
287 skb_reset_network_header(skb); 288 skb_reset_network_header(skb);
288 289
289 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, 290 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
290 NULL, neigh->dev, dn_neigh_output_packet); 291 &init_net, sk, skb, NULL, neigh->dev,
292 dn_neigh_output_packet);
291} 293}
292 294
293/* 295/*
@@ -327,11 +329,12 @@ static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
327 329
328 skb_reset_network_header(skb); 330 skb_reset_network_header(skb);
329 331
330 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, 332 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
331 NULL, neigh->dev, dn_neigh_output_packet); 333 &init_net, sk, skb, NULL, neigh->dev,
334 dn_neigh_output_packet);
332} 335}
333 336
334int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb) 337int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
335{ 338{
336 struct dst_entry *dst = skb_dst(skb); 339 struct dst_entry *dst = skb_dst(skb);
337 struct dn_route *rt = (struct dn_route *) dst; 340 struct dn_route *rt = (struct dn_route *) dst;
@@ -375,7 +378,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb)
375/* 378/*
376 * Ethernet router hello message received 379 * Ethernet router hello message received
377 */ 380 */
378int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb) 381int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
379{ 382{
380 struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data; 383 struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
381 384
@@ -437,7 +440,7 @@ int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
437/* 440/*
438 * Endnode hello message received 441 * Endnode hello message received
439 */ 442 */
440int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb) 443int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
441{ 444{
442 struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data; 445 struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
443 struct neighbour *neigh; 446 struct neighbour *neigh;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index a321eac9fd0c..7ac086d5c0c0 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -714,7 +714,8 @@ out:
714 return ret; 714 return ret;
715} 715}
716 716
717static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb) 717static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
718 struct sk_buff *skb)
718{ 719{
719 struct dn_skb_cb *cb = DN_SKB_CB(skb); 720 struct dn_skb_cb *cb = DN_SKB_CB(skb);
720 struct sock *sk = NULL; 721 struct sock *sk = NULL;
@@ -814,8 +815,8 @@ free_out:
814 815
815int dn_nsp_rx(struct sk_buff *skb) 816int dn_nsp_rx(struct sk_buff *skb)
816{ 817{
817 return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb, 818 return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN,
818 skb->dev, NULL, 819 &init_net, NULL, skb, skb->dev, NULL,
819 dn_nsp_rx_packet); 820 dn_nsp_rx_packet);
820} 821}
821 822
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 1aaa51ebbda6..849805e7af52 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -85,7 +85,7 @@ static void dn_nsp_send(struct sk_buff *skb)
85 if (dst) { 85 if (dst) {
86try_again: 86try_again:
87 skb_dst_set(skb, dst); 87 skb_dst_set(skb, dst);
88 dst_output(skb); 88 dst_output(&init_net, skb->sk, skb);
89 return; 89 return;
90 } 90 }
91 91
@@ -582,7 +582,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
582 * associations. 582 * associations.
583 */ 583 */
584 skb_dst_set(skb, dst_clone(dst)); 584 skb_dst_set(skb, dst_clone(dst));
585 dst_output(skb); 585 dst_output(&init_net, skb->sk, skb);
586} 586}
587 587
588 588
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 03227ffd19ce..607a14f20d88 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb)
512 * 512 *
513 * Returns: result of input function if route is found, error code otherwise 513 * Returns: result of input function if route is found, error code otherwise
514 */ 514 */
515static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb) 515static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
516{ 516{
517 struct dn_skb_cb *cb; 517 struct dn_skb_cb *cb;
518 int err; 518 int err;
@@ -573,8 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb)
573 ptr++; 573 ptr++;
574 cb->hops = *ptr++; /* Visit Count */ 574 cb->hops = *ptr++; /* Visit Count */
575 575
576 return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, 576 return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
577 skb->dev, NULL, 577 &init_net, NULL, skb, skb->dev, NULL,
578 dn_route_rx_packet); 578 dn_route_rx_packet);
579 579
580drop_it: 580drop_it:
@@ -601,8 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb)
601 ptr += 2; 601 ptr += 2;
602 cb->hops = *ptr & 0x3f; 602 cb->hops = *ptr & 0x3f;
603 603
604 return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, 604 return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
605 skb->dev, NULL, 605 &init_net, NULL, skb, skb->dev, NULL,
606 dn_route_rx_packet); 606 dn_route_rx_packet);
607 607
608drop_it: 608drop_it:
@@ -610,7 +610,7 @@ drop_it:
610 return NET_RX_DROP; 610 return NET_RX_DROP;
611} 611}
612 612
613static int dn_route_discard(struct sock *sk, struct sk_buff *skb) 613static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
614{ 614{
615 /* 615 /*
616 * I know we drop the packet here, but thats considered success in 616 * I know we drop the packet here, but thats considered success in
@@ -620,7 +620,7 @@ static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
620 return NET_RX_SUCCESS; 620 return NET_RX_SUCCESS;
621} 621}
622 622
623static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb) 623static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
624{ 624{
625 dn_dev_hello(skb); 625 dn_dev_hello(skb);
626 dn_neigh_pointopoint_hello(skb); 626 dn_neigh_pointopoint_hello(skb);
@@ -706,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
706 switch (flags & DN_RT_CNTL_MSK) { 706 switch (flags & DN_RT_CNTL_MSK) {
707 case DN_RT_PKT_HELO: 707 case DN_RT_PKT_HELO:
708 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, 708 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
709 NULL, skb, skb->dev, NULL, 709 &init_net, NULL, skb, skb->dev, NULL,
710 dn_route_ptp_hello); 710 dn_route_ptp_hello);
711 711
712 case DN_RT_PKT_L1RT: 712 case DN_RT_PKT_L1RT:
713 case DN_RT_PKT_L2RT: 713 case DN_RT_PKT_L2RT:
714 return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, 714 return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
715 NULL, skb, skb->dev, NULL, 715 &init_net, NULL, skb, skb->dev, NULL,
716 dn_route_discard); 716 dn_route_discard);
717 case DN_RT_PKT_ERTH: 717 case DN_RT_PKT_ERTH:
718 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, 718 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
719 NULL, skb, skb->dev, NULL, 719 &init_net, NULL, skb, skb->dev, NULL,
720 dn_neigh_router_hello); 720 dn_neigh_router_hello);
721 721
722 case DN_RT_PKT_EEDH: 722 case DN_RT_PKT_EEDH:
723 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, 723 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
724 NULL, skb, skb->dev, NULL, 724 &init_net, NULL, skb, skb->dev, NULL,
725 dn_neigh_endnode_hello); 725 dn_neigh_endnode_hello);
726 } 726 }
727 } else { 727 } else {
@@ -744,7 +744,7 @@ out:
744 return NET_RX_DROP; 744 return NET_RX_DROP;
745} 745}
746 746
747static int dn_output(struct sock *sk, struct sk_buff *skb) 747static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb)
748{ 748{
749 struct dst_entry *dst = skb_dst(skb); 749 struct dst_entry *dst = skb_dst(skb);
750 struct dn_route *rt = (struct dn_route *)dst; 750 struct dn_route *rt = (struct dn_route *)dst;
@@ -770,8 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb)
770 cb->rt_flags |= DN_RT_F_IE; 770 cb->rt_flags |= DN_RT_F_IE;
771 cb->hops = 0; 771 cb->hops = 0;
772 772
773 return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb, 773 return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
774 NULL, dev, 774 &init_net, sk, skb, NULL, dev,
775 dn_to_neigh_output); 775 dn_to_neigh_output);
776 776
777error: 777error:
@@ -789,9 +789,7 @@ static int dn_forward(struct sk_buff *skb)
789 struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); 789 struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
790 struct dn_route *rt; 790 struct dn_route *rt;
791 int header_len; 791 int header_len;
792#ifdef CONFIG_NETFILTER
793 struct net_device *dev = skb->dev; 792 struct net_device *dev = skb->dev;
794#endif
795 793
796 if (skb->pkt_type != PACKET_HOST) 794 if (skb->pkt_type != PACKET_HOST)
797 goto drop; 795 goto drop;
@@ -819,8 +817,8 @@ static int dn_forward(struct sk_buff *skb)
819 if (rt->rt_flags & RTCF_DOREDIRECT) 817 if (rt->rt_flags & RTCF_DOREDIRECT)
820 cb->rt_flags |= DN_RT_F_IE; 818 cb->rt_flags |= DN_RT_F_IE;
821 819
822 return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb, 820 return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
823 dev, skb->dev, 821 &init_net, NULL, skb, dev, skb->dev,
824 dn_to_neigh_output); 822 dn_to_neigh_output);
825 823
826drop: 824drop:
@@ -832,7 +830,7 @@ drop:
832 * Used to catch bugs. This should never normally get 830 * Used to catch bugs. This should never normally get
833 * called. 831 * called.
834 */ 832 */
835static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb) 833static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb)
836{ 834{
837 struct dn_skb_cb *cb = DN_SKB_CB(skb); 835 struct dn_skb_cb *cb = DN_SKB_CB(skb);
838 836
@@ -1469,7 +1467,7 @@ make_route:
1469 1467
1470 rt->n = neigh; 1468 rt->n = neigh;
1471 rt->dst.lastuse = jiffies; 1469 rt->dst.lastuse = jiffies;
1472 rt->dst.output = dn_rt_bug_sk; 1470 rt->dst.output = dn_rt_bug_out;
1473 switch (res.type) { 1471 switch (res.type) {
1474 case RTN_UNICAST: 1472 case RTN_UNICAST:
1475 rt->dst.input = dn_forward; 1473 rt->dst.input = dn_forward;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index af34fc9bdf69..85f2fdc360c2 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
87} 87}
88 88
89 89
90static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, 90static unsigned int dnrmg_hook(void *priv,
91 struct sk_buff *skb, 91 struct sk_buff *skb,
92 const struct nf_hook_state *state) 92 const struct nf_hook_state *state)
93{ 93{
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 31cd4fd75486..c79b85eb4d4c 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -122,7 +122,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
122 goto bad_option_value; 122 goto bad_option_value;
123 123
124 kdebug("dns error no. = %lu", derrno); 124 kdebug("dns error no. = %lu", derrno);
125 prep->type_data[0] = ERR_PTR(-derrno); 125 prep->payload.data[dns_key_error] = ERR_PTR(-derrno);
126 continue; 126 continue;
127 } 127 }
128 128
@@ -137,8 +137,8 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
137 137
138 /* don't cache the result if we're caching an error saying there's no 138 /* don't cache the result if we're caching an error saying there's no
139 * result */ 139 * result */
140 if (prep->type_data[0]) { 140 if (prep->payload.data[dns_key_error]) {
141 kleave(" = 0 [h_error %ld]", PTR_ERR(prep->type_data[0])); 141 kleave(" = 0 [h_error %ld]", PTR_ERR(prep->payload.data[dns_key_error]));
142 return 0; 142 return 0;
143 } 143 }
144 144
@@ -155,7 +155,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
155 memcpy(upayload->data, data, result_len); 155 memcpy(upayload->data, data, result_len);
156 upayload->data[result_len] = '\0'; 156 upayload->data[result_len] = '\0';
157 157
158 prep->payload[0] = upayload; 158 prep->payload.data[dns_key_data] = upayload;
159 kleave(" = 0"); 159 kleave(" = 0");
160 return 0; 160 return 0;
161} 161}
@@ -167,7 +167,7 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
167{ 167{
168 pr_devel("==>%s()\n", __func__); 168 pr_devel("==>%s()\n", __func__);
169 169
170 kfree(prep->payload[0]); 170 kfree(prep->payload.data[dns_key_data]);
171} 171}
172 172
173/* 173/*
@@ -223,10 +223,10 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
223 */ 223 */
224static void dns_resolver_describe(const struct key *key, struct seq_file *m) 224static void dns_resolver_describe(const struct key *key, struct seq_file *m)
225{ 225{
226 int err = key->type_data.x[0];
227
228 seq_puts(m, key->description); 226 seq_puts(m, key->description);
229 if (key_is_instantiated(key)) { 227 if (key_is_instantiated(key)) {
228 int err = PTR_ERR(key->payload.data[dns_key_error]);
229
230 if (err) 230 if (err)
231 seq_printf(m, ": %d", err); 231 seq_printf(m, ": %d", err);
232 else 232 else
@@ -241,8 +241,10 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
241static long dns_resolver_read(const struct key *key, 241static long dns_resolver_read(const struct key *key,
242 char __user *buffer, size_t buflen) 242 char __user *buffer, size_t buflen)
243{ 243{
244 if (key->type_data.x[0]) 244 int err = PTR_ERR(key->payload.data[dns_key_error]);
245 return key->type_data.x[0]; 245
246 if (err)
247 return err;
246 248
247 return user_read(key, buffer, buflen); 249 return user_read(key, buffer, buflen);
248} 250}
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 39d2c39bdf87..4677b6fa6dda 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -70,7 +70,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
70 const char *options, char **_result, time_t *_expiry) 70 const char *options, char **_result, time_t *_expiry)
71{ 71{
72 struct key *rkey; 72 struct key *rkey;
73 struct user_key_payload *upayload; 73 const struct user_key_payload *upayload;
74 const struct cred *saved_cred; 74 const struct cred *saved_cred;
75 size_t typelen, desclen; 75 size_t typelen, desclen;
76 char *desc, *cp; 76 char *desc, *cp;
@@ -137,12 +137,11 @@ int dns_query(const char *type, const char *name, size_t namelen,
137 goto put; 137 goto put;
138 138
139 /* If the DNS server gave an error, return that to the caller */ 139 /* If the DNS server gave an error, return that to the caller */
140 ret = rkey->type_data.x[0]; 140 ret = PTR_ERR(rkey->payload.data[dns_key_error]);
141 if (ret) 141 if (ret)
142 goto put; 142 goto put;
143 143
144 upayload = rcu_dereference_protected(rkey->payload.data, 144 upayload = user_key_payload(rkey);
145 lockdep_is_held(&rkey->sem));
146 len = upayload->datalen; 145 len = upayload->datalen;
147 146
148 ret = -ENOMEM; 147 ret = -ENOMEM;
diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h
index 7af1ed39c009..0c570d40e4d6 100644
--- a/net/dns_resolver/internal.h
+++ b/net/dns_resolver/internal.h
@@ -23,6 +23,14 @@
23#include <linux/sched.h> 23#include <linux/sched.h>
24 24
25/* 25/*
26 * Layout of key payload words.
27 */
28enum {
29 dns_key_data,
30 dns_key_error,
31};
32
33/*
26 * dns_key.c 34 * dns_key.c
27 */ 35 */
28extern const struct cred *dns_resolver_cache; 36extern const struct cred *dns_resolver_cache;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index c59fa5d9c22c..1eba07feb34a 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -22,6 +22,7 @@
22#include <linux/of_platform.h> 22#include <linux/of_platform.h>
23#include <linux/of_net.h> 23#include <linux/of_net.h>
24#include <linux/sysfs.h> 24#include <linux/sysfs.h>
25#include <linux/phy_fixed.h>
25#include "dsa_priv.h" 26#include "dsa_priv.h"
26 27
27char dsa_driver_version[] = "0.1"; 28char dsa_driver_version[] = "0.1";
@@ -305,7 +306,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
305 if (ret < 0) 306 if (ret < 0)
306 goto out; 307 goto out;
307 308
308 ds->slave_mii_bus = mdiobus_alloc(); 309 ds->slave_mii_bus = devm_mdiobus_alloc(parent);
309 if (ds->slave_mii_bus == NULL) { 310 if (ds->slave_mii_bus == NULL) {
310 ret = -ENOMEM; 311 ret = -ENOMEM;
311 goto out; 312 goto out;
@@ -314,7 +315,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
314 315
315 ret = mdiobus_register(ds->slave_mii_bus); 316 ret = mdiobus_register(ds->slave_mii_bus);
316 if (ret < 0) 317 if (ret < 0)
317 goto out_free; 318 goto out;
318 319
319 320
320 /* 321 /*
@@ -326,8 +327,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
326 327
327 ret = dsa_slave_create(ds, parent, i, pd->port_names[i]); 328 ret = dsa_slave_create(ds, parent, i, pd->port_names[i]);
328 if (ret < 0) { 329 if (ret < 0) {
329 netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n", 330 netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
330 index, i, pd->port_names[i]); 331 index, i, pd->port_names[i], ret);
331 ret = 0; 332 ret = 0;
332 } 333 }
333 } 334 }
@@ -367,10 +368,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
367 368
368 return ret; 369 return ret;
369 370
370out_free:
371 mdiobus_free(ds->slave_mii_bus);
372out: 371out:
373 kfree(ds);
374 return ret; 372 return ret;
375} 373}
376 374
@@ -400,7 +398,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
400 /* 398 /*
401 * Allocate and initialise switch state. 399 * Allocate and initialise switch state.
402 */ 400 */
403 ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); 401 ds = devm_kzalloc(parent, sizeof(*ds) + drv->priv_size, GFP_KERNEL);
404 if (ds == NULL) 402 if (ds == NULL)
405 return ERR_PTR(-ENOMEM); 403 return ERR_PTR(-ENOMEM);
406 404
@@ -420,10 +418,47 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
420 418
421static void dsa_switch_destroy(struct dsa_switch *ds) 419static void dsa_switch_destroy(struct dsa_switch *ds)
422{ 420{
421 struct device_node *port_dn;
422 struct phy_device *phydev;
423 struct dsa_chip_data *cd = ds->pd;
424 int port;
425
423#ifdef CONFIG_NET_DSA_HWMON 426#ifdef CONFIG_NET_DSA_HWMON
424 if (ds->hwmon_dev) 427 if (ds->hwmon_dev)
425 hwmon_device_unregister(ds->hwmon_dev); 428 hwmon_device_unregister(ds->hwmon_dev);
426#endif 429#endif
430
431 /* Disable configuration of the CPU and DSA ports */
432 for (port = 0; port < DSA_MAX_PORTS; port++) {
433 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
434 continue;
435
436 port_dn = cd->port_dn[port];
437 if (of_phy_is_fixed_link(port_dn)) {
438 phydev = of_phy_find_device(port_dn);
439 if (phydev) {
440 int addr = phydev->addr;
441
442 phy_device_free(phydev);
443 of_node_put(port_dn);
444 fixed_phy_del(addr);
445 }
446 }
447 }
448
449 /* Destroy network devices for physical switch ports. */
450 for (port = 0; port < DSA_MAX_PORTS; port++) {
451 if (!(ds->phys_port_mask & (1 << port)))
452 continue;
453
454 if (!ds->ports[port])
455 continue;
456
457 unregister_netdev(ds->ports[port]);
458 free_netdev(ds->ports[port]);
459 }
460
461 mdiobus_unregister(ds->slave_mii_bus);
427} 462}
428 463
429#ifdef CONFIG_PM_SLEEP 464#ifdef CONFIG_PM_SLEEP
@@ -802,10 +837,11 @@ static inline void dsa_of_remove(struct device *dev)
802} 837}
803#endif 838#endif
804 839
805static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, 840static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
806 struct device *parent, struct dsa_platform_data *pd) 841 struct device *parent, struct dsa_platform_data *pd)
807{ 842{
808 int i; 843 int i;
844 unsigned configured = 0;
809 845
810 dst->pd = pd; 846 dst->pd = pd;
811 dst->master_netdev = dev; 847 dst->master_netdev = dev;
@@ -825,9 +861,17 @@ static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
825 dst->ds[i] = ds; 861 dst->ds[i] = ds;
826 if (ds->drv->poll_link != NULL) 862 if (ds->drv->poll_link != NULL)
827 dst->link_poll_needed = 1; 863 dst->link_poll_needed = 1;
864
865 ++configured;
828 } 866 }
829 867
830 /* 868 /*
869 * If no switch was found, exit cleanly
870 */
871 if (!configured)
872 return -EPROBE_DEFER;
873
874 /*
831 * If we use a tagging format that doesn't have an ethertype 875 * If we use a tagging format that doesn't have an ethertype
832 * field, make sure that all packets from this point on get 876 * field, make sure that all packets from this point on get
833 * sent to the tag format's receive function. 877 * sent to the tag format's receive function.
@@ -843,6 +887,8 @@ static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
843 dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); 887 dst->link_poll_timer.expires = round_jiffies(jiffies + HZ);
844 add_timer(&dst->link_poll_timer); 888 add_timer(&dst->link_poll_timer);
845 } 889 }
890
891 return 0;
846} 892}
847 893
848static int dsa_probe(struct platform_device *pdev) 894static int dsa_probe(struct platform_device *pdev)
@@ -883,7 +929,7 @@ static int dsa_probe(struct platform_device *pdev)
883 goto out; 929 goto out;
884 } 930 }
885 931
886 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 932 dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL);
887 if (dst == NULL) { 933 if (dst == NULL) {
888 dev_put(dev); 934 dev_put(dev);
889 ret = -ENOMEM; 935 ret = -ENOMEM;
@@ -892,7 +938,9 @@ static int dsa_probe(struct platform_device *pdev)
892 938
893 platform_set_drvdata(pdev, dst); 939 platform_set_drvdata(pdev, dst);
894 940
895 dsa_setup_dst(dst, dev, &pdev->dev, pd); 941 ret = dsa_setup_dst(dst, dev, &pdev->dev, pd);
942 if (ret)
943 goto out;
896 944
897 return 0; 945 return 0;
898 946
@@ -914,7 +962,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
914 for (i = 0; i < dst->pd->nr_chips; i++) { 962 for (i = 0; i < dst->pd->nr_chips; i++) {
915 struct dsa_switch *ds = dst->ds[i]; 963 struct dsa_switch *ds = dst->ds[i];
916 964
917 if (ds != NULL) 965 if (ds)
918 dsa_switch_destroy(ds); 966 dsa_switch_destroy(ds);
919 } 967 }
920} 968}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7d91f4612ac0..7bc787b095c8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -242,17 +242,15 @@ static int dsa_bridge_check_vlan_range(struct dsa_switch *ds,
242} 242}
243 243
244static int dsa_slave_port_vlan_add(struct net_device *dev, 244static int dsa_slave_port_vlan_add(struct net_device *dev,
245 struct switchdev_obj *obj) 245 const struct switchdev_obj_port_vlan *vlan,
246 struct switchdev_trans *trans)
246{ 247{
247 struct switchdev_obj_vlan *vlan = &obj->u.vlan;
248 struct dsa_slave_priv *p = netdev_priv(dev); 248 struct dsa_slave_priv *p = netdev_priv(dev);
249 struct dsa_switch *ds = p->parent; 249 struct dsa_switch *ds = p->parent;
250 u16 vid;
251 int err; 250 int err;
252 251
253 switch (obj->trans) { 252 if (switchdev_trans_ph_prepare(trans)) {
254 case SWITCHDEV_TRANS_PREPARE: 253 if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
255 if (!ds->drv->port_vlan_add || !ds->drv->port_pvid_set)
256 return -EOPNOTSUPP; 254 return -EOPNOTSUPP;
257 255
258 /* If the requested port doesn't belong to the same bridge as 256 /* If the requested port doesn't belong to the same bridge as
@@ -263,50 +261,35 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
263 vlan->vid_end); 261 vlan->vid_end);
264 if (err) 262 if (err)
265 return err; 263 return err;
266 break; 264
267 case SWITCHDEV_TRANS_COMMIT: 265 err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
268 for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { 266 if (err)
269 err = ds->drv->port_vlan_add(ds, p->port, vid, 267 return err;
270 vlan->flags & 268 } else {
271 BRIDGE_VLAN_INFO_UNTAGGED); 269 err = ds->drv->port_vlan_add(ds, p->port, vlan, trans);
272 if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID) 270 if (err)
273 err = ds->drv->port_pvid_set(ds, p->port, vid); 271 return err;
274 if (err)
275 return err;
276 }
277 break;
278 default:
279 return -EOPNOTSUPP;
280 } 272 }
281 273
282 return 0; 274 return 0;
283} 275}
284 276
285static int dsa_slave_port_vlan_del(struct net_device *dev, 277static int dsa_slave_port_vlan_del(struct net_device *dev,
286 struct switchdev_obj *obj) 278 const struct switchdev_obj_port_vlan *vlan)
287{ 279{
288 struct switchdev_obj_vlan *vlan = &obj->u.vlan;
289 struct dsa_slave_priv *p = netdev_priv(dev); 280 struct dsa_slave_priv *p = netdev_priv(dev);
290 struct dsa_switch *ds = p->parent; 281 struct dsa_switch *ds = p->parent;
291 u16 vid;
292 int err;
293 282
294 if (!ds->drv->port_vlan_del) 283 if (!ds->drv->port_vlan_del)
295 return -EOPNOTSUPP; 284 return -EOPNOTSUPP;
296 285
297 for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { 286 return ds->drv->port_vlan_del(ds, p->port, vlan);
298 err = ds->drv->port_vlan_del(ds, p->port, vid);
299 if (err)
300 return err;
301 }
302
303 return 0;
304} 287}
305 288
306static int dsa_slave_port_vlan_dump(struct net_device *dev, 289static int dsa_slave_port_vlan_dump(struct net_device *dev,
307 struct switchdev_obj *obj) 290 struct switchdev_obj_port_vlan *vlan,
291 switchdev_obj_dump_cb_t *cb)
308{ 292{
309 struct switchdev_obj_vlan *vlan = &obj->u.vlan;
310 struct dsa_slave_priv *p = netdev_priv(dev); 293 struct dsa_slave_priv *p = netdev_priv(dev);
311 struct dsa_switch *ds = p->parent; 294 struct dsa_switch *ds = p->parent;
312 DECLARE_BITMAP(members, DSA_MAX_PORTS); 295 DECLARE_BITMAP(members, DSA_MAX_PORTS);
@@ -338,7 +321,7 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
338 if (test_bit(p->port, untagged)) 321 if (test_bit(p->port, untagged))
339 vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED; 322 vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
340 323
341 err = obj->cb(dev, obj); 324 err = cb(&vlan->obj);
342 if (err) 325 if (err)
343 break; 326 break;
344 } 327 }
@@ -347,65 +330,48 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
347} 330}
348 331
349static int dsa_slave_port_fdb_add(struct net_device *dev, 332static int dsa_slave_port_fdb_add(struct net_device *dev,
350 struct switchdev_obj *obj) 333 const struct switchdev_obj_port_fdb *fdb,
334 struct switchdev_trans *trans)
351{ 335{
352 struct switchdev_obj_fdb *fdb = &obj->u.fdb;
353 struct dsa_slave_priv *p = netdev_priv(dev); 336 struct dsa_slave_priv *p = netdev_priv(dev);
354 struct dsa_switch *ds = p->parent; 337 struct dsa_switch *ds = p->parent;
355 int ret = -EOPNOTSUPP; 338 int ret;
356 339
357 if (obj->trans == SWITCHDEV_TRANS_PREPARE) 340 if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add)
358 ret = ds->drv->port_fdb_add ? 0 : -EOPNOTSUPP; 341 return -EOPNOTSUPP;
359 else if (obj->trans == SWITCHDEV_TRANS_COMMIT) 342
360 ret = ds->drv->port_fdb_add(ds, p->port, fdb->addr, fdb->vid); 343 if (switchdev_trans_ph_prepare(trans))
344 ret = ds->drv->port_fdb_prepare(ds, p->port, fdb, trans);
345 else
346 ret = ds->drv->port_fdb_add(ds, p->port, fdb, trans);
361 347
362 return ret; 348 return ret;
363} 349}
364 350
365static int dsa_slave_port_fdb_del(struct net_device *dev, 351static int dsa_slave_port_fdb_del(struct net_device *dev,
366 struct switchdev_obj *obj) 352 const struct switchdev_obj_port_fdb *fdb)
367{ 353{
368 struct switchdev_obj_fdb *fdb = &obj->u.fdb;
369 struct dsa_slave_priv *p = netdev_priv(dev); 354 struct dsa_slave_priv *p = netdev_priv(dev);
370 struct dsa_switch *ds = p->parent; 355 struct dsa_switch *ds = p->parent;
371 int ret = -EOPNOTSUPP; 356 int ret = -EOPNOTSUPP;
372 357
373 if (ds->drv->port_fdb_del) 358 if (ds->drv->port_fdb_del)
374 ret = ds->drv->port_fdb_del(ds, p->port, fdb->addr, fdb->vid); 359 ret = ds->drv->port_fdb_del(ds, p->port, fdb);
375 360
376 return ret; 361 return ret;
377} 362}
378 363
379static int dsa_slave_port_fdb_dump(struct net_device *dev, 364static int dsa_slave_port_fdb_dump(struct net_device *dev,
380 struct switchdev_obj *obj) 365 struct switchdev_obj_port_fdb *fdb,
366 switchdev_obj_dump_cb_t *cb)
381{ 367{
382 struct dsa_slave_priv *p = netdev_priv(dev); 368 struct dsa_slave_priv *p = netdev_priv(dev);
383 struct dsa_switch *ds = p->parent; 369 struct dsa_switch *ds = p->parent;
384 unsigned char addr[ETH_ALEN] = { 0 };
385 u16 vid = 0;
386 int ret;
387
388 if (!ds->drv->port_fdb_getnext)
389 return -EOPNOTSUPP;
390
391 for (;;) {
392 bool is_static;
393 370
394 ret = ds->drv->port_fdb_getnext(ds, p->port, addr, &vid, 371 if (ds->drv->port_fdb_dump)
395 &is_static); 372 return ds->drv->port_fdb_dump(ds, p->port, fdb, cb);
396 if (ret < 0)
397 break;
398
399 obj->u.fdb.addr = addr;
400 obj->u.fdb.vid = vid;
401 obj->u.fdb.ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
402
403 ret = obj->cb(dev, obj);
404 if (ret < 0)
405 break;
406 }
407 373
408 return ret == -ENOENT ? 0 : ret; 374 return -EOPNOTSUPP;
409} 375}
410 376
411static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 377static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -456,15 +422,16 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
456} 422}
457 423
458static int dsa_slave_port_attr_set(struct net_device *dev, 424static int dsa_slave_port_attr_set(struct net_device *dev,
459 struct switchdev_attr *attr) 425 const struct switchdev_attr *attr,
426 struct switchdev_trans *trans)
460{ 427{
461 struct dsa_slave_priv *p = netdev_priv(dev); 428 struct dsa_slave_priv *p = netdev_priv(dev);
462 struct dsa_switch *ds = p->parent; 429 struct dsa_switch *ds = p->parent;
463 int ret; 430 int ret;
464 431
465 switch (attr->id) { 432 switch (attr->id) {
466 case SWITCHDEV_ATTR_PORT_STP_STATE: 433 case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
467 if (attr->trans == SWITCHDEV_TRANS_PREPARE) 434 if (switchdev_trans_ph_prepare(trans))
468 ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; 435 ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP;
469 else 436 else
470 ret = ds->drv->port_stp_update(ds, p->port, 437 ret = ds->drv->port_stp_update(ds, p->port,
@@ -479,7 +446,8 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
479} 446}
480 447
481static int dsa_slave_port_obj_add(struct net_device *dev, 448static int dsa_slave_port_obj_add(struct net_device *dev,
482 struct switchdev_obj *obj) 449 const struct switchdev_obj *obj,
450 struct switchdev_trans *trans)
483{ 451{
484 int err; 452 int err;
485 453
@@ -489,11 +457,15 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
489 */ 457 */
490 458
491 switch (obj->id) { 459 switch (obj->id) {
492 case SWITCHDEV_OBJ_PORT_FDB: 460 case SWITCHDEV_OBJ_ID_PORT_FDB:
493 err = dsa_slave_port_fdb_add(dev, obj); 461 err = dsa_slave_port_fdb_add(dev,
462 SWITCHDEV_OBJ_PORT_FDB(obj),
463 trans);
494 break; 464 break;
495 case SWITCHDEV_OBJ_PORT_VLAN: 465 case SWITCHDEV_OBJ_ID_PORT_VLAN:
496 err = dsa_slave_port_vlan_add(dev, obj); 466 err = dsa_slave_port_vlan_add(dev,
467 SWITCHDEV_OBJ_PORT_VLAN(obj),
468 trans);
497 break; 469 break;
498 default: 470 default:
499 err = -EOPNOTSUPP; 471 err = -EOPNOTSUPP;
@@ -504,16 +476,18 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
504} 476}
505 477
506static int dsa_slave_port_obj_del(struct net_device *dev, 478static int dsa_slave_port_obj_del(struct net_device *dev,
507 struct switchdev_obj *obj) 479 const struct switchdev_obj *obj)
508{ 480{
509 int err; 481 int err;
510 482
511 switch (obj->id) { 483 switch (obj->id) {
512 case SWITCHDEV_OBJ_PORT_FDB: 484 case SWITCHDEV_OBJ_ID_PORT_FDB:
513 err = dsa_slave_port_fdb_del(dev, obj); 485 err = dsa_slave_port_fdb_del(dev,
486 SWITCHDEV_OBJ_PORT_FDB(obj));
514 break; 487 break;
515 case SWITCHDEV_OBJ_PORT_VLAN: 488 case SWITCHDEV_OBJ_ID_PORT_VLAN:
516 err = dsa_slave_port_vlan_del(dev, obj); 489 err = dsa_slave_port_vlan_del(dev,
490 SWITCHDEV_OBJ_PORT_VLAN(obj));
517 break; 491 break;
518 default: 492 default:
519 err = -EOPNOTSUPP; 493 err = -EOPNOTSUPP;
@@ -524,16 +498,21 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
524} 498}
525 499
526static int dsa_slave_port_obj_dump(struct net_device *dev, 500static int dsa_slave_port_obj_dump(struct net_device *dev,
527 struct switchdev_obj *obj) 501 struct switchdev_obj *obj,
502 switchdev_obj_dump_cb_t *cb)
528{ 503{
529 int err; 504 int err;
530 505
531 switch (obj->id) { 506 switch (obj->id) {
532 case SWITCHDEV_OBJ_PORT_FDB: 507 case SWITCHDEV_OBJ_ID_PORT_FDB:
533 err = dsa_slave_port_fdb_dump(dev, obj); 508 err = dsa_slave_port_fdb_dump(dev,
509 SWITCHDEV_OBJ_PORT_FDB(obj),
510 cb);
534 break; 511 break;
535 case SWITCHDEV_OBJ_PORT_VLAN: 512 case SWITCHDEV_OBJ_ID_PORT_VLAN:
536 err = dsa_slave_port_vlan_dump(dev, obj); 513 err = dsa_slave_port_vlan_dump(dev,
514 SWITCHDEV_OBJ_PORT_VLAN(obj),
515 cb);
537 break; 516 break;
538 default: 517 default:
539 err = -EOPNOTSUPP; 518 err = -EOPNOTSUPP;
@@ -587,7 +566,7 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
587 struct dsa_switch *ds = p->parent; 566 struct dsa_switch *ds = p->parent;
588 567
589 switch (attr->id) { 568 switch (attr->id) {
590 case SWITCHDEV_ATTR_PORT_PARENT_ID: 569 case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
591 attr->u.ppid.id_len = sizeof(ds->index); 570 attr->u.ppid.id_len = sizeof(ds->index);
592 memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); 571 memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
593 break; 572 break;
@@ -967,6 +946,10 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = {
967 .switchdev_port_obj_dump = dsa_slave_port_obj_dump, 946 .switchdev_port_obj_dump = dsa_slave_port_obj_dump,
968}; 947};
969 948
949static struct device_type dsa_type = {
950 .name = "dsa",
951};
952
970static void dsa_slave_adjust_link(struct net_device *dev) 953static void dsa_slave_adjust_link(struct net_device *dev)
971{ 954{
972 struct dsa_slave_priv *p = netdev_priv(dev); 955 struct dsa_slave_priv *p = netdev_priv(dev);
@@ -1015,8 +998,10 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
1015 struct dsa_switch *ds = p->parent; 998 struct dsa_switch *ds = p->parent;
1016 999
1017 p->phy = ds->slave_mii_bus->phy_map[addr]; 1000 p->phy = ds->slave_mii_bus->phy_map[addr];
1018 if (!p->phy) 1001 if (!p->phy) {
1002 netdev_err(slave_dev, "no phy at %d\n", addr);
1019 return -ENODEV; 1003 return -ENODEV;
1004 }
1020 1005
1021 /* Use already configured phy mode */ 1006 /* Use already configured phy mode */
1022 if (p->phy_interface == PHY_INTERFACE_MODE_NA) 1007 if (p->phy_interface == PHY_INTERFACE_MODE_NA)
@@ -1050,7 +1035,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
1050 */ 1035 */
1051 ret = of_phy_register_fixed_link(port_dn); 1036 ret = of_phy_register_fixed_link(port_dn);
1052 if (ret) { 1037 if (ret) {
1053 netdev_err(slave_dev, "failed to register fixed PHY\n"); 1038 netdev_err(slave_dev, "failed to register fixed PHY: %d\n", ret);
1054 return ret; 1039 return ret;
1055 } 1040 }
1056 phy_is_fixed = true; 1041 phy_is_fixed = true;
@@ -1061,17 +1046,20 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
1061 phy_flags = ds->drv->get_phy_flags(ds, p->port); 1046 phy_flags = ds->drv->get_phy_flags(ds, p->port);
1062 1047
1063 if (phy_dn) { 1048 if (phy_dn) {
1064 ret = of_mdio_parse_addr(&slave_dev->dev, phy_dn); 1049 int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
1050
1065 /* If this PHY address is part of phys_mii_mask, which means 1051 /* If this PHY address is part of phys_mii_mask, which means
1066 * that we need to divert reads and writes to/from it, then we 1052 * that we need to divert reads and writes to/from it, then we
1067 * want to bind this device using the slave MII bus created by 1053 * want to bind this device using the slave MII bus created by
1068 * DSA to make that happen. 1054 * DSA to make that happen.
1069 */ 1055 */
1070 if (!phy_is_fixed && ret >= 0 && 1056 if (!phy_is_fixed && phy_id >= 0 &&
1071 (ds->phys_mii_mask & (1 << ret))) { 1057 (ds->phys_mii_mask & (1 << phy_id))) {
1072 ret = dsa_slave_phy_connect(p, slave_dev, ret); 1058 ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
1073 if (ret) 1059 if (ret) {
1060 netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
1074 return ret; 1061 return ret;
1062 }
1075 } else { 1063 } else {
1076 p->phy = of_phy_connect(slave_dev, phy_dn, 1064 p->phy = of_phy_connect(slave_dev, phy_dn,
1077 dsa_slave_adjust_link, 1065 dsa_slave_adjust_link,
@@ -1088,8 +1076,10 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
1088 */ 1076 */
1089 if (!p->phy) { 1077 if (!p->phy) {
1090 ret = dsa_slave_phy_connect(p, slave_dev, p->port); 1078 ret = dsa_slave_phy_connect(p, slave_dev, p->port);
1091 if (ret) 1079 if (ret) {
1080 netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret);
1092 return ret; 1081 return ret;
1082 }
1093 } else { 1083 } else {
1094 netdev_info(slave_dev, "attached PHY at address %d [%s]\n", 1084 netdev_info(slave_dev, "attached PHY at address %d [%s]\n",
1095 p->phy->addr, p->phy->drv->name); 1085 p->phy->addr, p->phy->drv->name);
@@ -1155,6 +1145,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
1155 slave_dev->priv_flags |= IFF_NO_QUEUE; 1145 slave_dev->priv_flags |= IFF_NO_QUEUE;
1156 slave_dev->netdev_ops = &dsa_slave_netdev_ops; 1146 slave_dev->netdev_ops = &dsa_slave_netdev_ops;
1157 slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; 1147 slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
1148 SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
1158 1149
1159 netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, 1150 netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
1160 NULL); 1151 NULL);
@@ -1200,6 +1191,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
1200 1191
1201 ret = dsa_slave_phy_setup(p, slave_dev); 1192 ret = dsa_slave_phy_setup(p, slave_dev);
1202 if (ret) { 1193 if (ret) {
1194 netdev_err(master, "error %d setting up slave phy\n", ret);
1203 free_netdev(slave_dev); 1195 free_netdev(slave_dev);
1204 return ret; 1196 return ret;
1205 } 1197 }
@@ -1253,7 +1245,7 @@ int dsa_slave_netdevice_event(struct notifier_block *unused,
1253 goto out; 1245 goto out;
1254 1246
1255 err = dsa_slave_master_changed(dev); 1247 err = dsa_slave_master_changed(dev);
1256 if (err) 1248 if (err && err != -EOPNOTSUPP)
1257 netdev_warn(dev, "failed to reflect master change\n"); 1249 netdev_warn(dev, "failed to reflect master change\n");
1258 1250
1259 break; 1251 break;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index d850fdc828f9..9e63f252a89e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -127,7 +127,7 @@ u32 eth_get_headlen(void *data, unsigned int len)
127 struct flow_keys keys; 127 struct flow_keys keys;
128 128
129 /* this should never happen, but better safe than sorry */ 129 /* this should never happen, but better safe than sorry */
130 if (len < sizeof(*eth)) 130 if (unlikely(len < sizeof(*eth)))
131 return len; 131 return len;
132 132
133 /* parse any remaining L2/L3 headers, check for L4 */ 133 /* parse any remaining L2/L3 headers, check for L4 */
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index ea339fa94c27..b4e17a7c0df0 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -7,6 +7,15 @@
7#include <net/inet_frag.h> 7#include <net/inet_frag.h>
8#include <net/6lowpan.h> 8#include <net/6lowpan.h>
9 9
10typedef unsigned __bitwise__ lowpan_rx_result;
11#define RX_CONTINUE ((__force lowpan_rx_result) 0u)
12#define RX_DROP_UNUSABLE ((__force lowpan_rx_result) 1u)
13#define RX_DROP ((__force lowpan_rx_result) 2u)
14#define RX_QUEUED ((__force lowpan_rx_result) 3u)
15
16#define LOWPAN_DISPATCH_FRAG1 0xc0
17#define LOWPAN_DISPATCH_FRAGN 0xe0
18
10struct lowpan_create_arg { 19struct lowpan_create_arg {
11 u16 tag; 20 u16 tag;
12 u16 d_size; 21 u16 d_size;
@@ -40,7 +49,7 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
40 49
41/* private device info */ 50/* private device info */
42struct lowpan_dev_info { 51struct lowpan_dev_info {
43 struct net_device *real_dev; /* real WPAN device ptr */ 52 struct net_device *wdev; /* wpan device ptr */
44 u16 fragment_tag; 53 u16 fragment_tag;
45}; 54};
46 55
@@ -62,4 +71,7 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
62 const void *_saddr, unsigned int len); 71 const void *_saddr, unsigned int len);
63netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev); 72netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev);
64 73
74int lowpan_iphc_decompress(struct sk_buff *skb);
75lowpan_rx_result lowpan_rx_h_ipv6(struct sk_buff *skb);
76
65#endif /* __IEEE802154_6LOWPAN_I_H__ */ 77#endif /* __IEEE802154_6LOWPAN_I_H__ */
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 953b1c49f5d1..20c49c724ba0 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -61,7 +61,7 @@ static struct header_ops lowpan_header_ops = {
61static struct lock_class_key lowpan_tx_busylock; 61static struct lock_class_key lowpan_tx_busylock;
62static struct lock_class_key lowpan_netdev_xmit_lock_key; 62static struct lock_class_key lowpan_netdev_xmit_lock_key;
63 63
64static void lowpan_set_lockdep_class_one(struct net_device *dev, 64static void lowpan_set_lockdep_class_one(struct net_device *ldev,
65 struct netdev_queue *txq, 65 struct netdev_queue *txq,
66 void *_unused) 66 void *_unused)
67{ 67{
@@ -69,35 +69,47 @@ static void lowpan_set_lockdep_class_one(struct net_device *dev,
69 &lowpan_netdev_xmit_lock_key); 69 &lowpan_netdev_xmit_lock_key);
70} 70}
71 71
72static int lowpan_dev_init(struct net_device *dev) 72static int lowpan_dev_init(struct net_device *ldev)
73{ 73{
74 netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); 74 netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL);
75 dev->qdisc_tx_busylock = &lowpan_tx_busylock; 75 ldev->qdisc_tx_busylock = &lowpan_tx_busylock;
76 return 0;
77}
78
79static int lowpan_open(struct net_device *dev)
80{
81 if (!open_count)
82 lowpan_rx_init();
83 open_count++;
84 return 0;
85}
86
87static int lowpan_stop(struct net_device *dev)
88{
89 open_count--;
90 if (!open_count)
91 lowpan_rx_exit();
76 return 0; 92 return 0;
77} 93}
78 94
79static const struct net_device_ops lowpan_netdev_ops = { 95static const struct net_device_ops lowpan_netdev_ops = {
80 .ndo_init = lowpan_dev_init, 96 .ndo_init = lowpan_dev_init,
81 .ndo_start_xmit = lowpan_xmit, 97 .ndo_start_xmit = lowpan_xmit,
98 .ndo_open = lowpan_open,
99 .ndo_stop = lowpan_stop,
82}; 100};
83 101
84static void lowpan_setup(struct net_device *dev) 102static void lowpan_setup(struct net_device *ldev)
85{ 103{
86 dev->addr_len = IEEE802154_ADDR_LEN; 104 memset(ldev->broadcast, 0xff, IEEE802154_ADDR_LEN);
87 memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); 105 /* We need an ipv6hdr as minimum len when calling xmit */
88 dev->type = ARPHRD_6LOWPAN; 106 ldev->hard_header_len = sizeof(struct ipv6hdr);
89 /* Frame Control + Sequence Number + Address fields + Security Header */ 107 ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
90 dev->hard_header_len = 2 + 1 + 20 + 14; 108
91 dev->needed_tailroom = 2; /* FCS */ 109 ldev->netdev_ops = &lowpan_netdev_ops;
92 dev->mtu = IPV6_MIN_MTU; 110 ldev->header_ops = &lowpan_header_ops;
93 dev->priv_flags |= IFF_NO_QUEUE; 111 ldev->destructor = free_netdev;
94 dev->flags = IFF_BROADCAST | IFF_MULTICAST; 112 ldev->features |= NETIF_F_NETNS_LOCAL;
95 dev->watchdog_timeo = 0;
96
97 dev->netdev_ops = &lowpan_netdev_ops;
98 dev->header_ops = &lowpan_header_ops;
99 dev->destructor = free_netdev;
100 dev->features |= NETIF_F_NETNS_LOCAL;
101} 113}
102 114
103static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) 115static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -109,10 +121,10 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
109 return 0; 121 return 0;
110} 122}
111 123
112static int lowpan_newlink(struct net *src_net, struct net_device *dev, 124static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
113 struct nlattr *tb[], struct nlattr *data[]) 125 struct nlattr *tb[], struct nlattr *data[])
114{ 126{
115 struct net_device *real_dev; 127 struct net_device *wdev;
116 int ret; 128 int ret;
117 129
118 ASSERT_RTNL(); 130 ASSERT_RTNL();
@@ -120,58 +132,56 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
120 pr_debug("adding new link\n"); 132 pr_debug("adding new link\n");
121 133
122 if (!tb[IFLA_LINK] || 134 if (!tb[IFLA_LINK] ||
123 !net_eq(dev_net(dev), &init_net)) 135 !net_eq(dev_net(ldev), &init_net))
124 return -EINVAL; 136 return -EINVAL;
125 /* find and hold real wpan device */ 137 /* find and hold wpan device */
126 real_dev = dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); 138 wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK]));
127 if (!real_dev) 139 if (!wdev)
128 return -ENODEV; 140 return -ENODEV;
129 if (real_dev->type != ARPHRD_IEEE802154) { 141 if (wdev->type != ARPHRD_IEEE802154) {
130 dev_put(real_dev); 142 dev_put(wdev);
131 return -EINVAL; 143 return -EINVAL;
132 } 144 }
133 145
134 if (real_dev->ieee802154_ptr->lowpan_dev) { 146 if (wdev->ieee802154_ptr->lowpan_dev) {
135 dev_put(real_dev); 147 dev_put(wdev);
136 return -EBUSY; 148 return -EBUSY;
137 } 149 }
138 150
139 lowpan_dev_info(dev)->real_dev = real_dev; 151 lowpan_dev_info(ldev)->wdev = wdev;
140 /* Set the lowpan hardware address to the wpan hardware address. */ 152 /* Set the lowpan hardware address to the wpan hardware address. */
141 memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); 153 memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN);
142 154 /* We need headroom for possible wpan_dev_hard_header call and tailroom
143 lowpan_netdev_setup(dev, LOWPAN_LLTYPE_IEEE802154); 155 * for encryption/fcs handling. The lowpan interface will replace
144 156 * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN
145 ret = register_netdevice(dev); 157 * header has LOWPAN_IPHC_MAX_HEADER_LEN more bytes than the IPv6
158 * header.
159 */
160 ldev->needed_headroom = LOWPAN_IPHC_MAX_HEADER_LEN +
161 wdev->needed_headroom;
162 ldev->needed_tailroom = wdev->needed_tailroom;
163
164 lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154);
165
166 ret = register_netdevice(ldev);
146 if (ret < 0) { 167 if (ret < 0) {
147 dev_put(real_dev); 168 dev_put(wdev);
148 return ret; 169 return ret;
149 } 170 }
150 171
151 real_dev->ieee802154_ptr->lowpan_dev = dev; 172 wdev->ieee802154_ptr->lowpan_dev = ldev;
152 if (!open_count)
153 lowpan_rx_init();
154
155 open_count++;
156
157 return 0; 173 return 0;
158} 174}
159 175
160static void lowpan_dellink(struct net_device *dev, struct list_head *head) 176static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
161{ 177{
162 struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); 178 struct net_device *wdev = lowpan_dev_info(ldev)->wdev;
163 struct net_device *real_dev = lowpan_dev->real_dev;
164 179
165 ASSERT_RTNL(); 180 ASSERT_RTNL();
166 181
167 open_count--; 182 wdev->ieee802154_ptr->lowpan_dev = NULL;
168 183 unregister_netdevice(ldev);
169 if (!open_count) 184 dev_put(wdev);
170 lowpan_rx_exit();
171
172 real_dev->ieee802154_ptr->lowpan_dev = NULL;
173 unregister_netdevice(dev);
174 dev_put(real_dev);
175} 185}
176 186
177static struct rtnl_link_ops lowpan_link_ops __read_mostly = { 187static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
@@ -196,9 +206,9 @@ static inline void lowpan_netlink_fini(void)
196static int lowpan_device_event(struct notifier_block *unused, 206static int lowpan_device_event(struct notifier_block *unused,
197 unsigned long event, void *ptr) 207 unsigned long event, void *ptr)
198{ 208{
199 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 209 struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
200 210
201 if (dev->type != ARPHRD_IEEE802154) 211 if (wdev->type != ARPHRD_IEEE802154)
202 goto out; 212 goto out;
203 213
204 switch (event) { 214 switch (event) {
@@ -207,8 +217,8 @@ static int lowpan_device_event(struct notifier_block *unused,
207 * also delete possible lowpan interfaces which belongs 217 * also delete possible lowpan interfaces which belongs
208 * to the wpan interface. 218 * to the wpan interface.
209 */ 219 */
210 if (dev->ieee802154_ptr && dev->ieee802154_ptr->lowpan_dev) 220 if (wdev->ieee802154_ptr->lowpan_dev)
211 lowpan_dellink(dev->ieee802154_ptr->lowpan_dev, NULL); 221 lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
212 break; 222 break;
213 default: 223 default:
214 break; 224 break;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 214d44aef35b..6b437e8760d3 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -32,21 +32,10 @@
32 32
33static const char lowpan_frags_cache_name[] = "lowpan-frags"; 33static const char lowpan_frags_cache_name[] = "lowpan-frags";
34 34
35struct lowpan_frag_info {
36 u16 d_tag;
37 u16 d_size;
38 u8 d_offset;
39};
40
41static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
42{
43 return (struct lowpan_frag_info *)skb->cb;
44}
45
46static struct inet_frags lowpan_frags; 35static struct inet_frags lowpan_frags;
47 36
48static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, 37static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
49 struct sk_buff *prev, struct net_device *dev); 38 struct sk_buff *prev, struct net_device *ldev);
50 39
51static unsigned int lowpan_hash_frag(u16 tag, u16 d_size, 40static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
52 const struct ieee802154_addr *saddr, 41 const struct ieee802154_addr *saddr,
@@ -111,7 +100,7 @@ out:
111} 100}
112 101
113static inline struct lowpan_frag_queue * 102static inline struct lowpan_frag_queue *
114fq_find(struct net *net, const struct lowpan_frag_info *frag_info, 103fq_find(struct net *net, const struct lowpan_802154_cb *cb,
115 const struct ieee802154_addr *src, 104 const struct ieee802154_addr *src,
116 const struct ieee802154_addr *dst) 105 const struct ieee802154_addr *dst)
117{ 106{
@@ -121,12 +110,12 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
121 struct netns_ieee802154_lowpan *ieee802154_lowpan = 110 struct netns_ieee802154_lowpan *ieee802154_lowpan =
122 net_ieee802154_lowpan(net); 111 net_ieee802154_lowpan(net);
123 112
124 arg.tag = frag_info->d_tag; 113 arg.tag = cb->d_tag;
125 arg.d_size = frag_info->d_size; 114 arg.d_size = cb->d_size;
126 arg.src = src; 115 arg.src = src;
127 arg.dst = dst; 116 arg.dst = dst;
128 117
129 hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); 118 hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
130 119
131 q = inet_frag_find(&ieee802154_lowpan->frags, 120 q = inet_frag_find(&ieee802154_lowpan->frags,
132 &lowpan_frags, &arg, hash); 121 &lowpan_frags, &arg, hash);
@@ -138,17 +127,17 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
138} 127}
139 128
140static int lowpan_frag_queue(struct lowpan_frag_queue *fq, 129static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
141 struct sk_buff *skb, const u8 frag_type) 130 struct sk_buff *skb, u8 frag_type)
142{ 131{
143 struct sk_buff *prev, *next; 132 struct sk_buff *prev, *next;
144 struct net_device *dev; 133 struct net_device *ldev;
145 int end, offset; 134 int end, offset;
146 135
147 if (fq->q.flags & INET_FRAG_COMPLETE) 136 if (fq->q.flags & INET_FRAG_COMPLETE)
148 goto err; 137 goto err;
149 138
150 offset = lowpan_cb(skb)->d_offset << 3; 139 offset = lowpan_802154_cb(skb)->d_offset << 3;
151 end = lowpan_cb(skb)->d_size; 140 end = lowpan_802154_cb(skb)->d_size;
152 141
153 /* Is this the final fragment? */ 142 /* Is this the final fragment? */
154 if (offset + skb->len == end) { 143 if (offset + skb->len == end) {
@@ -174,13 +163,16 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
174 * this fragment, right? 163 * this fragment, right?
175 */ 164 */
176 prev = fq->q.fragments_tail; 165 prev = fq->q.fragments_tail;
177 if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) { 166 if (!prev ||
167 lowpan_802154_cb(prev)->d_offset <
168 lowpan_802154_cb(skb)->d_offset) {
178 next = NULL; 169 next = NULL;
179 goto found; 170 goto found;
180 } 171 }
181 prev = NULL; 172 prev = NULL;
182 for (next = fq->q.fragments; next != NULL; next = next->next) { 173 for (next = fq->q.fragments; next != NULL; next = next->next) {
183 if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset) 174 if (lowpan_802154_cb(next)->d_offset >=
175 lowpan_802154_cb(skb)->d_offset)
184 break; /* bingo! */ 176 break; /* bingo! */
185 prev = next; 177 prev = next;
186 } 178 }
@@ -195,18 +187,15 @@ found:
195 else 187 else
196 fq->q.fragments = skb; 188 fq->q.fragments = skb;
197 189
198 dev = skb->dev; 190 ldev = skb->dev;
199 if (dev) 191 if (ldev)
200 skb->dev = NULL; 192 skb->dev = NULL;
201 193
202 fq->q.stamp = skb->tstamp; 194 fq->q.stamp = skb->tstamp;
203 if (frag_type == LOWPAN_DISPATCH_FRAG1) { 195 if (frag_type == LOWPAN_DISPATCH_FRAG1)
204 /* Calculate uncomp. 6lowpan header to estimate full size */
205 fq->q.meat += lowpan_uncompress_size(skb, NULL);
206 fq->q.flags |= INET_FRAG_FIRST_IN; 196 fq->q.flags |= INET_FRAG_FIRST_IN;
207 } else { 197
208 fq->q.meat += skb->len; 198 fq->q.meat += skb->len;
209 }
210 add_frag_mem_limit(fq->q.net, skb->truesize); 199 add_frag_mem_limit(fq->q.net, skb->truesize);
211 200
212 if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 201 if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
@@ -215,7 +204,7 @@ found:
215 unsigned long orefdst = skb->_skb_refdst; 204 unsigned long orefdst = skb->_skb_refdst;
216 205
217 skb->_skb_refdst = 0UL; 206 skb->_skb_refdst = 0UL;
218 res = lowpan_frag_reasm(fq, prev, dev); 207 res = lowpan_frag_reasm(fq, prev, ldev);
219 skb->_skb_refdst = orefdst; 208 skb->_skb_refdst = orefdst;
220 return res; 209 return res;
221 } 210 }
@@ -235,7 +224,7 @@ err:
235 * the last and the first frames arrived and all the bits are here. 224 * the last and the first frames arrived and all the bits are here.
236 */ 225 */
237static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, 226static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
238 struct net_device *dev) 227 struct net_device *ldev)
239{ 228{
240 struct sk_buff *fp, *head = fq->q.fragments; 229 struct sk_buff *fp, *head = fq->q.fragments;
241 int sum_truesize; 230 int sum_truesize;
@@ -313,7 +302,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
313 sub_frag_mem_limit(fq->q.net, sum_truesize); 302 sub_frag_mem_limit(fq->q.net, sum_truesize);
314 303
315 head->next = NULL; 304 head->next = NULL;
316 head->dev = dev; 305 head->dev = ldev;
317 head->tstamp = fq->q.stamp; 306 head->tstamp = fq->q.stamp;
318 307
319 fq->q.fragments = NULL; 308 fq->q.fragments = NULL;
@@ -325,24 +314,87 @@ out_oom:
325 return -1; 314 return -1;
326} 315}
327 316
328static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type, 317static int lowpan_frag_rx_handlers_result(struct sk_buff *skb,
329 struct lowpan_frag_info *frag_info) 318 lowpan_rx_result res)
319{
320 switch (res) {
321 case RX_QUEUED:
322 return NET_RX_SUCCESS;
323 case RX_CONTINUE:
324 /* nobody cared about this packet */
325 net_warn_ratelimited("%s: received unknown dispatch\n",
326 __func__);
327
328 /* fall-through */
329 default:
330 /* all others failure */
331 return NET_RX_DROP;
332 }
333}
334
335static lowpan_rx_result lowpan_frag_rx_h_iphc(struct sk_buff *skb)
336{
337 int ret;
338
339 if (!lowpan_is_iphc(*skb_network_header(skb)))
340 return RX_CONTINUE;
341
342 ret = lowpan_iphc_decompress(skb);
343 if (ret < 0)
344 return RX_DROP;
345
346 return RX_QUEUED;
347}
348
349static int lowpan_invoke_frag_rx_handlers(struct sk_buff *skb)
350{
351 lowpan_rx_result res;
352
353#define CALL_RXH(rxh) \
354 do { \
355 res = rxh(skb); \
356 if (res != RX_CONTINUE) \
357 goto rxh_next; \
358 } while (0)
359
360 /* likely at first */
361 CALL_RXH(lowpan_frag_rx_h_iphc);
362 CALL_RXH(lowpan_rx_h_ipv6);
363
364rxh_next:
365 return lowpan_frag_rx_handlers_result(skb, res);
366#undef CALL_RXH
367}
368
369#define LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK 0x07
370#define LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT 8
371
372static int lowpan_get_cb(struct sk_buff *skb, u8 frag_type,
373 struct lowpan_802154_cb *cb)
330{ 374{
331 bool fail; 375 bool fail;
332 u8 pattern = 0, low = 0; 376 u8 high = 0, low = 0;
333 __be16 d_tag = 0; 377 __be16 d_tag = 0;
334 378
335 fail = lowpan_fetch_skb(skb, &pattern, 1); 379 fail = lowpan_fetch_skb(skb, &high, 1);
336 fail |= lowpan_fetch_skb(skb, &low, 1); 380 fail |= lowpan_fetch_skb(skb, &low, 1);
337 frag_info->d_size = (pattern & 7) << 8 | low; 381 /* remove the dispatch value and use first three bits as high value
382 * for the datagram size
383 */
384 cb->d_size = (high & LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK) <<
385 LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT | low;
338 fail |= lowpan_fetch_skb(skb, &d_tag, 2); 386 fail |= lowpan_fetch_skb(skb, &d_tag, 2);
339 frag_info->d_tag = ntohs(d_tag); 387 cb->d_tag = ntohs(d_tag);
340 388
341 if (frag_type == LOWPAN_DISPATCH_FRAGN) { 389 if (frag_type == LOWPAN_DISPATCH_FRAGN) {
342 fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1); 390 fail |= lowpan_fetch_skb(skb, &cb->d_offset, 1);
343 } else { 391 } else {
344 skb_reset_network_header(skb); 392 skb_reset_network_header(skb);
345 frag_info->d_offset = 0; 393 cb->d_offset = 0;
394 /* check if datagram_size has ipv6hdr on FRAG1 */
395 fail |= cb->d_size < sizeof(struct ipv6hdr);
396 /* check if we can dereference the dispatch value */
397 fail |= !skb->len;
346 } 398 }
347 399
348 if (unlikely(fail)) 400 if (unlikely(fail))
@@ -351,27 +403,33 @@ static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
351 return 0; 403 return 0;
352} 404}
353 405
354int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) 406int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
355{ 407{
356 struct lowpan_frag_queue *fq; 408 struct lowpan_frag_queue *fq;
357 struct net *net = dev_net(skb->dev); 409 struct net *net = dev_net(skb->dev);
358 struct lowpan_frag_info *frag_info = lowpan_cb(skb); 410 struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
359 struct ieee802154_addr source, dest; 411 struct ieee802154_hdr hdr;
360 int err; 412 int err;
361 413
362 source = mac_cb(skb)->source; 414 if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
363 dest = mac_cb(skb)->dest; 415 goto err;
364 416
365 err = lowpan_get_frag_info(skb, frag_type, frag_info); 417 err = lowpan_get_cb(skb, frag_type, cb);
366 if (err < 0) 418 if (err < 0)
367 goto err; 419 goto err;
368 420
369 if (frag_info->d_size > IPV6_MIN_MTU) { 421 if (frag_type == LOWPAN_DISPATCH_FRAG1) {
422 err = lowpan_invoke_frag_rx_handlers(skb);
423 if (err == NET_RX_DROP)
424 goto err;
425 }
426
427 if (cb->d_size > IPV6_MIN_MTU) {
370 net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n"); 428 net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n");
371 goto err; 429 goto err;
372 } 430 }
373 431
374 fq = fq_find(net, frag_info, &source, &dest); 432 fq = fq_find(net, cb, &hdr.source, &hdr.dest);
375 if (fq != NULL) { 433 if (fq != NULL) {
376 int ret; 434 int ret;
377 435
@@ -387,7 +445,6 @@ err:
387 kfree_skb(skb); 445 kfree_skb(skb);
388 return -1; 446 return -1;
389} 447}
390EXPORT_SYMBOL(lowpan_frag_rcv);
391 448
392#ifdef CONFIG_SYSCTL 449#ifdef CONFIG_SYSCTL
393static int zero; 450static int zero;
@@ -523,14 +580,19 @@ static int __net_init lowpan_frags_init_net(struct net *net)
523{ 580{
524 struct netns_ieee802154_lowpan *ieee802154_lowpan = 581 struct netns_ieee802154_lowpan *ieee802154_lowpan =
525 net_ieee802154_lowpan(net); 582 net_ieee802154_lowpan(net);
583 int res;
526 584
527 ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; 585 ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
528 ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; 586 ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
529 ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; 587 ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
530 588
531 inet_frags_init_net(&ieee802154_lowpan->frags); 589 res = inet_frags_init_net(&ieee802154_lowpan->frags);
532 590 if (res)
533 return lowpan_frags_ns_sysctl_register(net); 591 return res;
592 res = lowpan_frags_ns_sysctl_register(net);
593 if (res)
594 inet_frags_uninit_net(&ieee802154_lowpan->frags);
595 return res;
534} 596}
535 597
536static void __net_exit lowpan_frags_exit_net(struct net *net) 598static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index 12e10201d263..ef185dd4110d 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -11,126 +11,307 @@
11#include <linux/if_arp.h> 11#include <linux/if_arp.h>
12 12
13#include <net/6lowpan.h> 13#include <net/6lowpan.h>
14#include <net/mac802154.h>
14#include <net/ieee802154_netdev.h> 15#include <net/ieee802154_netdev.h>
15 16
16#include "6lowpan_i.h" 17#include "6lowpan_i.h"
17 18
18static int lowpan_give_skb_to_device(struct sk_buff *skb, 19#define LOWPAN_DISPATCH_FIRST 0xc0
19 struct net_device *dev) 20#define LOWPAN_DISPATCH_FRAG_MASK 0xf8
21
22#define LOWPAN_DISPATCH_NALP 0x00
23#define LOWPAN_DISPATCH_ESC 0x40
24#define LOWPAN_DISPATCH_HC1 0x42
25#define LOWPAN_DISPATCH_DFF 0x43
26#define LOWPAN_DISPATCH_BC0 0x50
27#define LOWPAN_DISPATCH_MESH 0x80
28
29static int lowpan_give_skb_to_device(struct sk_buff *skb)
20{ 30{
21 skb->dev = dev->ieee802154_ptr->lowpan_dev;
22 skb->protocol = htons(ETH_P_IPV6); 31 skb->protocol = htons(ETH_P_IPV6);
23 skb->pkt_type = PACKET_HOST; 32 skb->dev->stats.rx_packets++;
33 skb->dev->stats.rx_bytes += skb->len;
24 34
25 return netif_rx(skb); 35 return netif_rx(skb);
26} 36}
27 37
28static int 38static int lowpan_rx_handlers_result(struct sk_buff *skb, lowpan_rx_result res)
29iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
30{ 39{
31 u8 iphc0, iphc1; 40 switch (res) {
32 struct ieee802154_addr_sa sa, da; 41 case RX_CONTINUE:
33 void *sap, *dap; 42 /* nobody cared about this packet */
43 net_warn_ratelimited("%s: received unknown dispatch\n",
44 __func__);
34 45
35 raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); 46 /* fall-through */
36 /* at least two bytes will be used for the encoding */ 47 case RX_DROP_UNUSABLE:
37 if (skb->len < 2) 48 kfree_skb(skb);
38 return -EINVAL;
39 49
40 if (lowpan_fetch_skb_u8(skb, &iphc0)) 50 /* fall-through */
41 return -EINVAL; 51 case RX_DROP:
52 return NET_RX_DROP;
53 case RX_QUEUED:
54 return lowpan_give_skb_to_device(skb);
55 default:
56 break;
57 }
42 58
43 if (lowpan_fetch_skb_u8(skb, &iphc1)) 59 return NET_RX_DROP;
44 return -EINVAL; 60}
45 61
46 ieee802154_addr_to_sa(&sa, &hdr->source); 62static inline bool lowpan_is_frag1(u8 dispatch)
47 ieee802154_addr_to_sa(&da, &hdr->dest); 63{
64 return (dispatch & LOWPAN_DISPATCH_FRAG_MASK) == LOWPAN_DISPATCH_FRAG1;
65}
48 66
49 if (sa.addr_type == IEEE802154_ADDR_SHORT) 67static inline bool lowpan_is_fragn(u8 dispatch)
50 sap = &sa.short_addr; 68{
51 else 69 return (dispatch & LOWPAN_DISPATCH_FRAG_MASK) == LOWPAN_DISPATCH_FRAGN;
52 sap = &sa.hwaddr; 70}
53 71
54 if (da.addr_type == IEEE802154_ADDR_SHORT) 72static lowpan_rx_result lowpan_rx_h_frag(struct sk_buff *skb)
55 dap = &da.short_addr; 73{
56 else 74 int ret;
57 dap = &da.hwaddr;
58 75
59 return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type, 76 if (!(lowpan_is_frag1(*skb_network_header(skb)) ||
60 IEEE802154_ADDR_LEN, dap, da.addr_type, 77 lowpan_is_fragn(*skb_network_header(skb))))
61 IEEE802154_ADDR_LEN, iphc0, iphc1); 78 return RX_CONTINUE;
79
80 ret = lowpan_frag_rcv(skb, *skb_network_header(skb) &
81 LOWPAN_DISPATCH_FRAG_MASK);
82 if (ret == 1)
83 return RX_QUEUED;
84
85 /* Packet is freed by lowpan_frag_rcv on error or put into the frag
86 * bucket.
87 */
88 return RX_DROP;
62} 89}
63 90
64static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, 91int lowpan_iphc_decompress(struct sk_buff *skb)
65 struct packet_type *pt, struct net_device *orig_dev)
66{ 92{
67 struct ieee802154_hdr hdr; 93 struct ieee802154_hdr hdr;
94
95 if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
96 return -EINVAL;
97
98 return lowpan_header_decompress(skb, skb->dev, &hdr.dest, &hdr.source);
99}
100
101static lowpan_rx_result lowpan_rx_h_iphc(struct sk_buff *skb)
102{
68 int ret; 103 int ret;
69 104
70 if (dev->type != ARPHRD_IEEE802154 || 105 if (!lowpan_is_iphc(*skb_network_header(skb)))
71 !dev->ieee802154_ptr->lowpan_dev) 106 return RX_CONTINUE;
72 goto drop;
73 107
74 skb = skb_share_check(skb, GFP_ATOMIC); 108 /* Setting datagram_offset to zero indicates non frag handling
75 if (!skb) 109 * while doing lowpan_header_decompress.
110 */
111 lowpan_802154_cb(skb)->d_size = 0;
112
113 ret = lowpan_iphc_decompress(skb);
114 if (ret < 0)
115 return RX_DROP_UNUSABLE;
116
117 return RX_QUEUED;
118}
119
120lowpan_rx_result lowpan_rx_h_ipv6(struct sk_buff *skb)
121{
122 if (!lowpan_is_ipv6(*skb_network_header(skb)))
123 return RX_CONTINUE;
124
125 /* Pull off the 1-byte of 6lowpan header. */
126 skb_pull(skb, 1);
127 return RX_QUEUED;
128}
129
130static inline bool lowpan_is_esc(u8 dispatch)
131{
132 return dispatch == LOWPAN_DISPATCH_ESC;
133}
134
135static lowpan_rx_result lowpan_rx_h_esc(struct sk_buff *skb)
136{
137 if (!lowpan_is_esc(*skb_network_header(skb)))
138 return RX_CONTINUE;
139
140 net_warn_ratelimited("%s: %s\n", skb->dev->name,
141 "6LoWPAN ESC not supported\n");
142
143 return RX_DROP_UNUSABLE;
144}
145
146static inline bool lowpan_is_hc1(u8 dispatch)
147{
148 return dispatch == LOWPAN_DISPATCH_HC1;
149}
150
151static lowpan_rx_result lowpan_rx_h_hc1(struct sk_buff *skb)
152{
153 if (!lowpan_is_hc1(*skb_network_header(skb)))
154 return RX_CONTINUE;
155
156 net_warn_ratelimited("%s: %s\n", skb->dev->name,
157 "6LoWPAN HC1 not supported\n");
158
159 return RX_DROP_UNUSABLE;
160}
161
162static inline bool lowpan_is_dff(u8 dispatch)
163{
164 return dispatch == LOWPAN_DISPATCH_DFF;
165}
166
167static lowpan_rx_result lowpan_rx_h_dff(struct sk_buff *skb)
168{
169 if (!lowpan_is_dff(*skb_network_header(skb)))
170 return RX_CONTINUE;
171
172 net_warn_ratelimited("%s: %s\n", skb->dev->name,
173 "6LoWPAN DFF not supported\n");
174
175 return RX_DROP_UNUSABLE;
176}
177
178static inline bool lowpan_is_bc0(u8 dispatch)
179{
180 return dispatch == LOWPAN_DISPATCH_BC0;
181}
182
183static lowpan_rx_result lowpan_rx_h_bc0(struct sk_buff *skb)
184{
185 if (!lowpan_is_bc0(*skb_network_header(skb)))
186 return RX_CONTINUE;
187
188 net_warn_ratelimited("%s: %s\n", skb->dev->name,
189 "6LoWPAN BC0 not supported\n");
190
191 return RX_DROP_UNUSABLE;
192}
193
194static inline bool lowpan_is_mesh(u8 dispatch)
195{
196 return (dispatch & LOWPAN_DISPATCH_FIRST) == LOWPAN_DISPATCH_MESH;
197}
198
199static lowpan_rx_result lowpan_rx_h_mesh(struct sk_buff *skb)
200{
201 if (!lowpan_is_mesh(*skb_network_header(skb)))
202 return RX_CONTINUE;
203
204 net_warn_ratelimited("%s: %s\n", skb->dev->name,
205 "6LoWPAN MESH not supported\n");
206
207 return RX_DROP_UNUSABLE;
208}
209
210static int lowpan_invoke_rx_handlers(struct sk_buff *skb)
211{
212 lowpan_rx_result res;
213
214#define CALL_RXH(rxh) \
215 do { \
216 res = rxh(skb); \
217 if (res != RX_CONTINUE) \
218 goto rxh_next; \
219 } while (0)
220
221 /* likely at first */
222 CALL_RXH(lowpan_rx_h_iphc);
223 CALL_RXH(lowpan_rx_h_frag);
224 CALL_RXH(lowpan_rx_h_ipv6);
225 CALL_RXH(lowpan_rx_h_esc);
226 CALL_RXH(lowpan_rx_h_hc1);
227 CALL_RXH(lowpan_rx_h_dff);
228 CALL_RXH(lowpan_rx_h_bc0);
229 CALL_RXH(lowpan_rx_h_mesh);
230
231rxh_next:
232 return lowpan_rx_handlers_result(skb, res);
233#undef CALL_RXH
234}
235
236static inline bool lowpan_is_nalp(u8 dispatch)
237{
238 return (dispatch & LOWPAN_DISPATCH_FIRST) == LOWPAN_DISPATCH_NALP;
239}
240
241/* Lookup for reserved dispatch values at:
242 * https://www.iana.org/assignments/_6lowpan-parameters/_6lowpan-parameters.xhtml#_6lowpan-parameters-1
243 *
244 * Last Updated: 2015-01-22
245 */
246static inline bool lowpan_is_reserved(u8 dispatch)
247{
248 return ((dispatch >= 0x44 && dispatch <= 0x4F) ||
249 (dispatch >= 0x51 && dispatch <= 0x5F) ||
250 (dispatch >= 0xc8 && dispatch <= 0xdf) ||
251 (dispatch >= 0xe8 && dispatch <= 0xff));
252}
253
254/* lowpan_rx_h_check checks on generic 6LoWPAN requirements
255 * in MAC and 6LoWPAN header.
256 *
257 * Don't manipulate the skb here, it could be shared buffer.
258 */
259static inline bool lowpan_rx_h_check(struct sk_buff *skb)
260{
261 __le16 fc = ieee802154_get_fc_from_skb(skb);
262
263 /* check on ieee802154 conform 6LoWPAN header */
264 if (!ieee802154_is_data(fc) ||
265 !ieee802154_is_intra_pan(fc))
266 return false;
267
268 /* check if we can dereference the dispatch */
269 if (unlikely(!skb->len))
270 return false;
271
272 if (lowpan_is_nalp(*skb_network_header(skb)) ||
273 lowpan_is_reserved(*skb_network_header(skb)))
274 return false;
275
276 return true;
277}
278
279static int lowpan_rcv(struct sk_buff *skb, struct net_device *wdev,
280 struct packet_type *pt, struct net_device *orig_wdev)
281{
282 struct net_device *ldev;
283
284 if (wdev->type != ARPHRD_IEEE802154 ||
285 skb->pkt_type == PACKET_OTHERHOST ||
286 !lowpan_rx_h_check(skb))
76 goto drop; 287 goto drop;
77 288
78 if (!netif_running(dev)) 289 ldev = wdev->ieee802154_ptr->lowpan_dev;
79 goto drop_skb; 290 if (!ldev || !netif_running(ldev))
291 goto drop;
80 292
81 if (skb->pkt_type == PACKET_OTHERHOST) 293 /* Replacing skb->dev and followed rx handlers will manipulate skb. */
82 goto drop_skb; 294 skb = skb_share_check(skb, GFP_ATOMIC);
295 if (!skb)
296 goto out;
297 skb->dev = ldev;
83 298
84 if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) 299 /* When receive frag1 it's likely that we manipulate the buffer.
85 goto drop_skb; 300 * When recevie iphc we manipulate the data buffer. So we need
86 301 * to unshare the buffer.
87 /* check that it's our buffer */ 302 */
88 if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { 303 if (lowpan_is_frag1(*skb_network_header(skb)) ||
89 /* Pull off the 1-byte of 6lowpan header. */ 304 lowpan_is_iphc(*skb_network_header(skb))) {
90 skb_pull(skb, 1); 305 skb = skb_unshare(skb, GFP_ATOMIC);
91 return lowpan_give_skb_to_device(skb, dev); 306 if (!skb)
92 } else { 307 goto out;
93 switch (skb->data[0] & 0xe0) {
94 case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */
95 ret = iphc_decompress(skb, &hdr);
96 if (ret < 0)
97 goto drop_skb;
98
99 return lowpan_give_skb_to_device(skb, dev);
100 case LOWPAN_DISPATCH_FRAG1: /* first fragment header */
101 ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
102 if (ret == 1) {
103 ret = iphc_decompress(skb, &hdr);
104 if (ret < 0)
105 goto drop_skb;
106
107 return lowpan_give_skb_to_device(skb, dev);
108 } else if (ret == -1) {
109 return NET_RX_DROP;
110 } else {
111 return NET_RX_SUCCESS;
112 }
113 case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */
114 ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
115 if (ret == 1) {
116 ret = iphc_decompress(skb, &hdr);
117 if (ret < 0)
118 goto drop_skb;
119
120 return lowpan_give_skb_to_device(skb, dev);
121 } else if (ret == -1) {
122 return NET_RX_DROP;
123 } else {
124 return NET_RX_SUCCESS;
125 }
126 default:
127 break;
128 }
129 } 308 }
130 309
131drop_skb: 310 return lowpan_invoke_rx_handlers(skb);
132 kfree_skb(skb); 311
133drop: 312drop:
313 kfree_skb(skb);
314out:
134 return NET_RX_DROP; 315 return NET_RX_DROP;
135} 316}
136 317
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index f6263fc12340..d4353faced35 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -10,9 +10,13 @@
10 10
11#include <net/6lowpan.h> 11#include <net/6lowpan.h>
12#include <net/ieee802154_netdev.h> 12#include <net/ieee802154_netdev.h>
13#include <net/mac802154.h>
13 14
14#include "6lowpan_i.h" 15#include "6lowpan_i.h"
15 16
17#define LOWPAN_FRAG1_HEAD_SIZE 0x4
18#define LOWPAN_FRAGN_HEAD_SIZE 0x5
19
16/* don't save pan id, it's intra pan */ 20/* don't save pan id, it's intra pan */
17struct lowpan_addr { 21struct lowpan_addr {
18 u8 mode; 22 u8 mode;
@@ -36,7 +40,14 @@ lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
36 sizeof(struct lowpan_addr_info)); 40 sizeof(struct lowpan_addr_info));
37} 41}
38 42
39int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, 43/* This callback will be called from AF_PACKET and IPv6 stack, the AF_PACKET
44 * sockets gives an 8 byte array for addresses only!
45 *
46 * TODO I think AF_PACKET DGRAM (sending/receiving) RAW (sending) makes no
47 * sense here. We should disable it, the right use-case would be AF_INET6
48 * RAW/DGRAM sockets.
49 */
50int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
40 unsigned short type, const void *_daddr, 51 unsigned short type, const void *_daddr,
41 const void *_saddr, unsigned int len) 52 const void *_saddr, unsigned int len)
42{ 53{
@@ -51,7 +62,7 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
51 return 0; 62 return 0;
52 63
53 if (!saddr) 64 if (!saddr)
54 saddr = dev->dev_addr; 65 saddr = ldev->dev_addr;
55 66
56 raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); 67 raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
57 raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); 68 raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
@@ -71,28 +82,33 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
71 82
72static struct sk_buff* 83static struct sk_buff*
73lowpan_alloc_frag(struct sk_buff *skb, int size, 84lowpan_alloc_frag(struct sk_buff *skb, int size,
74 const struct ieee802154_hdr *master_hdr) 85 const struct ieee802154_hdr *master_hdr, bool frag1)
75{ 86{
76 struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; 87 struct net_device *wdev = lowpan_dev_info(skb->dev)->wdev;
77 struct sk_buff *frag; 88 struct sk_buff *frag;
78 int rc; 89 int rc;
79 90
80 frag = alloc_skb(real_dev->hard_header_len + 91 frag = alloc_skb(wdev->needed_headroom + wdev->needed_tailroom + size,
81 real_dev->needed_tailroom + size,
82 GFP_ATOMIC); 92 GFP_ATOMIC);
83 93
84 if (likely(frag)) { 94 if (likely(frag)) {
85 frag->dev = real_dev; 95 frag->dev = wdev;
86 frag->priority = skb->priority; 96 frag->priority = skb->priority;
87 skb_reserve(frag, real_dev->hard_header_len); 97 skb_reserve(frag, wdev->needed_headroom);
88 skb_reset_network_header(frag); 98 skb_reset_network_header(frag);
89 *mac_cb(frag) = *mac_cb(skb); 99 *mac_cb(frag) = *mac_cb(skb);
90 100
91 rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, 101 if (frag1) {
92 &master_hdr->source, size); 102 memcpy(skb_put(frag, skb->mac_len),
93 if (rc < 0) { 103 skb_mac_header(skb), skb->mac_len);
94 kfree_skb(frag); 104 } else {
95 return ERR_PTR(rc); 105 rc = wpan_dev_hard_header(frag, wdev,
106 &master_hdr->dest,
107 &master_hdr->source, size);
108 if (rc < 0) {
109 kfree_skb(frag);
110 return ERR_PTR(rc);
111 }
96 } 112 }
97 } else { 113 } else {
98 frag = ERR_PTR(-ENOMEM); 114 frag = ERR_PTR(-ENOMEM);
@@ -104,13 +120,13 @@ lowpan_alloc_frag(struct sk_buff *skb, int size,
104static int 120static int
105lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, 121lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
106 u8 *frag_hdr, int frag_hdrlen, 122 u8 *frag_hdr, int frag_hdrlen,
107 int offset, int len) 123 int offset, int len, bool frag1)
108{ 124{
109 struct sk_buff *frag; 125 struct sk_buff *frag;
110 126
111 raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); 127 raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
112 128
113 frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); 129 frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr, frag1);
114 if (IS_ERR(frag)) 130 if (IS_ERR(frag))
115 return PTR_ERR(frag); 131 return PTR_ERR(frag);
116 132
@@ -123,19 +139,17 @@ lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
123} 139}
124 140
125static int 141static int
126lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, 142lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *ldev,
127 const struct ieee802154_hdr *wpan_hdr) 143 const struct ieee802154_hdr *wpan_hdr, u16 dgram_size,
144 u16 dgram_offset)
128{ 145{
129 u16 dgram_size, dgram_offset;
130 __be16 frag_tag; 146 __be16 frag_tag;
131 u8 frag_hdr[5]; 147 u8 frag_hdr[5];
132 int frag_cap, frag_len, payload_cap, rc; 148 int frag_cap, frag_len, payload_cap, rc;
133 int skb_unprocessed, skb_offset; 149 int skb_unprocessed, skb_offset;
134 150
135 dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - 151 frag_tag = htons(lowpan_dev_info(ldev)->fragment_tag);
136 skb->mac_len; 152 lowpan_dev_info(ldev)->fragment_tag++;
137 frag_tag = htons(lowpan_dev_info(dev)->fragment_tag);
138 lowpan_dev_info(dev)->fragment_tag++;
139 153
140 frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); 154 frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
141 frag_hdr[1] = dgram_size & 0xff; 155 frag_hdr[1] = dgram_size & 0xff;
@@ -151,7 +165,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
151 165
152 rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, 166 rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
153 LOWPAN_FRAG1_HEAD_SIZE, 0, 167 LOWPAN_FRAG1_HEAD_SIZE, 0,
154 frag_len + skb_network_header_len(skb)); 168 frag_len + skb_network_header_len(skb),
169 true);
155 if (rc) { 170 if (rc) {
156 pr_debug("%s unable to send FRAG1 packet (tag: %d)", 171 pr_debug("%s unable to send FRAG1 packet (tag: %d)",
157 __func__, ntohs(frag_tag)); 172 __func__, ntohs(frag_tag));
@@ -172,7 +187,7 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
172 187
173 rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, 188 rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
174 LOWPAN_FRAGN_HEAD_SIZE, skb_offset, 189 LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
175 frag_len); 190 frag_len, false);
176 if (rc) { 191 if (rc) {
177 pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", 192 pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
178 __func__, ntohs(frag_tag), skb_offset); 193 __func__, ntohs(frag_tag), skb_offset);
@@ -180,6 +195,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
180 } 195 }
181 } while (skb_unprocessed > frag_cap); 196 } while (skb_unprocessed > frag_cap);
182 197
198 ldev->stats.tx_packets++;
199 ldev->stats.tx_bytes += dgram_size;
183 consume_skb(skb); 200 consume_skb(skb);
184 return NET_XMIT_SUCCESS; 201 return NET_XMIT_SUCCESS;
185 202
@@ -188,9 +205,10 @@ err:
188 return rc; 205 return rc;
189} 206}
190 207
191static int lowpan_header(struct sk_buff *skb, struct net_device *dev) 208static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
209 u16 *dgram_size, u16 *dgram_offset)
192{ 210{
193 struct wpan_dev *wpan_dev = lowpan_dev_info(dev)->real_dev->ieee802154_ptr; 211 struct wpan_dev *wpan_dev = lowpan_dev_info(ldev)->wdev->ieee802154_ptr;
194 struct ieee802154_addr sa, da; 212 struct ieee802154_addr sa, da;
195 struct ieee802154_mac_cb *cb = mac_cb_init(skb); 213 struct ieee802154_mac_cb *cb = mac_cb_init(skb);
196 struct lowpan_addr_info info; 214 struct lowpan_addr_info info;
@@ -202,7 +220,10 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
202 daddr = &info.daddr.u.extended_addr; 220 daddr = &info.daddr.u.extended_addr;
203 saddr = &info.saddr.u.extended_addr; 221 saddr = &info.saddr.u.extended_addr;
204 222
205 lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len); 223 *dgram_size = skb->len;
224 lowpan_header_compress(skb, ldev, daddr, saddr);
225 /* dgram_offset = (saved bytes after compression) + lowpan header len */
226 *dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb);
206 227
207 cb->type = IEEE802154_FC_TYPE_DATA; 228 cb->type = IEEE802154_FC_TYPE_DATA;
208 229
@@ -217,7 +238,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
217 /* if the destination address is the broadcast address, use the 238 /* if the destination address is the broadcast address, use the
218 * corresponding short address 239 * corresponding short address
219 */ 240 */
220 if (lowpan_is_addr_broadcast((const u8 *)daddr)) { 241 if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
221 da.mode = IEEE802154_ADDR_SHORT; 242 da.mode = IEEE802154_ADDR_SHORT;
222 da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); 243 da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
223 cb->ackreq = false; 244 cb->ackreq = false;
@@ -227,17 +248,20 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
227 cb->ackreq = wpan_dev->ackreq; 248 cb->ackreq = wpan_dev->ackreq;
228 } 249 }
229 250
230 return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, 251 return wpan_dev_hard_header(skb, lowpan_dev_info(ldev)->wdev, &da, &sa,
231 ETH_P_IPV6, (void *)&da, (void *)&sa, 0); 252 0);
232} 253}
233 254
234netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) 255netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
235{ 256{
236 struct ieee802154_hdr wpan_hdr; 257 struct ieee802154_hdr wpan_hdr;
237 int max_single, ret; 258 int max_single, ret;
259 u16 dgram_size, dgram_offset;
238 260
239 pr_debug("package xmit\n"); 261 pr_debug("package xmit\n");
240 262
263 WARN_ON_ONCE(skb->len > IPV6_MIN_MTU);
264
241 /* We must take a copy of the skb before we modify/replace the ipv6 265 /* We must take a copy of the skb before we modify/replace the ipv6
242 * header as the header could be used elsewhere 266 * header as the header could be used elsewhere
243 */ 267 */
@@ -245,7 +269,7 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
245 if (!skb) 269 if (!skb)
246 return NET_XMIT_DROP; 270 return NET_XMIT_DROP;
247 271
248 ret = lowpan_header(skb, dev); 272 ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset);
249 if (ret < 0) { 273 if (ret < 0) {
250 kfree_skb(skb); 274 kfree_skb(skb);
251 return NET_XMIT_DROP; 275 return NET_XMIT_DROP;
@@ -259,13 +283,16 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
259 max_single = ieee802154_max_payload(&wpan_hdr); 283 max_single = ieee802154_max_payload(&wpan_hdr);
260 284
261 if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { 285 if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
262 skb->dev = lowpan_dev_info(dev)->real_dev; 286 skb->dev = lowpan_dev_info(ldev)->wdev;
287 ldev->stats.tx_packets++;
288 ldev->stats.tx_bytes += dgram_size;
263 return dev_queue_xmit(skb); 289 return dev_queue_xmit(skb);
264 } else { 290 } else {
265 netdev_tx_t rc; 291 netdev_tx_t rc;
266 292
267 pr_debug("frame is too big, fragmentation is needed\n"); 293 pr_debug("frame is too big, fragmentation is needed\n");
268 rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); 294 rc = lowpan_xmit_fragmented(skb, ldev, &wpan_hdr, dgram_size,
295 dgram_offset);
269 296
270 return rc < 0 ? NET_XMIT_DROP : rc; 297 return rc < 0 ? NET_XMIT_DROP : rc;
271 } 298 }
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 1370d5b0041b..188135bcb803 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -12,6 +12,11 @@ menuconfig IEEE802154
12 12
13if IEEE802154 13if IEEE802154
14 14
15config IEEE802154_NL802154_EXPERIMENTAL
16 bool "IEEE 802.15.4 experimental netlink support"
17 ---help---
18 Adds experimental netlink support for nl802154.
19
15config IEEE802154_SOCKET 20config IEEE802154_SOCKET
16 tristate "IEEE 802.15.4 socket interface" 21 tristate "IEEE 802.15.4 socket interface"
17 default y 22 default y
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index b0248e934230..c35fdfa6d04e 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -95,6 +95,18 @@ cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx)
95 return result; 95 return result;
96} 96}
97 97
98struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx)
99{
100 struct cfg802154_registered_device *rdev;
101
102 ASSERT_RTNL();
103
104 rdev = cfg802154_rdev_by_wpan_phy_idx(wpan_phy_idx);
105 if (!rdev)
106 return NULL;
107 return &rdev->wpan_phy;
108}
109
98struct wpan_phy * 110struct wpan_phy *
99wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size) 111wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
100{ 112{
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index f3e95580caee..231fade959f3 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -42,5 +42,6 @@ extern int cfg802154_rdev_list_generation;
42void cfg802154_dev_free(struct cfg802154_registered_device *rdev); 42void cfg802154_dev_free(struct cfg802154_registered_device *rdev);
43struct cfg802154_registered_device * 43struct cfg802154_registered_device *
44cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx); 44cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx);
45struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx);
45 46
46#endif /* __IEEE802154_CORE_H */ 47#endif /* __IEEE802154_CORE_H */
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index a051b6993177..c7439f0fbbdf 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c
@@ -83,35 +83,35 @@ ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr)
83} 83}
84 84
85int 85int
86ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr) 86ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr)
87{ 87{
88 u8 buf[MAC802154_FRAME_HARD_HEADER_LEN]; 88 u8 buf[IEEE802154_MAX_HEADER_LEN];
89 int pos = 2; 89 int pos = 2;
90 int rc; 90 int rc;
91 struct ieee802154_hdr_fc fc = hdr->fc; 91 struct ieee802154_hdr_fc *fc = &hdr->fc;
92 92
93 buf[pos++] = hdr->seq; 93 buf[pos++] = hdr->seq;
94 94
95 fc.dest_addr_mode = hdr->dest.mode; 95 fc->dest_addr_mode = hdr->dest.mode;
96 96
97 rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false); 97 rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false);
98 if (rc < 0) 98 if (rc < 0)
99 return -EINVAL; 99 return -EINVAL;
100 pos += rc; 100 pos += rc;
101 101
102 fc.source_addr_mode = hdr->source.mode; 102 fc->source_addr_mode = hdr->source.mode;
103 103
104 if (hdr->source.pan_id == hdr->dest.pan_id && 104 if (hdr->source.pan_id == hdr->dest.pan_id &&
105 hdr->dest.mode != IEEE802154_ADDR_NONE) 105 hdr->dest.mode != IEEE802154_ADDR_NONE)
106 fc.intra_pan = true; 106 fc->intra_pan = true;
107 107
108 rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc.intra_pan); 108 rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc->intra_pan);
109 if (rc < 0) 109 if (rc < 0)
110 return -EINVAL; 110 return -EINVAL;
111 pos += rc; 111 pos += rc;
112 112
113 if (fc.security_enabled) { 113 if (fc->security_enabled) {
114 fc.version = 1; 114 fc->version = 1;
115 115
116 rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec); 116 rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec);
117 if (rc < 0) 117 if (rc < 0)
@@ -120,7 +120,7 @@ ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
120 pos += rc; 120 pos += rc;
121 } 121 }
122 122
123 memcpy(buf, &fc, 2); 123 memcpy(buf, fc, 2);
124 124
125 memcpy(skb_push(skb, pos), buf, pos); 125 memcpy(skb_push(skb, pos), buf, pos);
126 126
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 3f89c0abdab1..16ef0d9f566e 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -232,8 +232,86 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
232 [NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED }, 232 [NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED },
233 233
234 [NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 }, 234 [NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 },
235
236#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
237 [NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
238 [NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
239 [NL802154_ATTR_SEC_OUT_KEY_ID] = { .type = NLA_NESTED, },
240 [NL802154_ATTR_SEC_FRAME_COUNTER] = { .type = NLA_U32 },
241
242 [NL802154_ATTR_SEC_LEVEL] = { .type = NLA_NESTED },
243 [NL802154_ATTR_SEC_DEVICE] = { .type = NLA_NESTED },
244 [NL802154_ATTR_SEC_DEVKEY] = { .type = NLA_NESTED },
245 [NL802154_ATTR_SEC_KEY] = { .type = NLA_NESTED },
246#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
235}; 247};
236 248
249#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
250static int
251nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
252 struct netlink_callback *cb,
253 struct cfg802154_registered_device **rdev,
254 struct wpan_dev **wpan_dev)
255{
256 int err;
257
258 rtnl_lock();
259
260 if (!cb->args[0]) {
261 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
262 nl802154_fam.attrbuf, nl802154_fam.maxattr,
263 nl802154_policy);
264 if (err)
265 goto out_unlock;
266
267 *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
268 nl802154_fam.attrbuf);
269 if (IS_ERR(*wpan_dev)) {
270 err = PTR_ERR(*wpan_dev);
271 goto out_unlock;
272 }
273 *rdev = wpan_phy_to_rdev((*wpan_dev)->wpan_phy);
274 /* 0 is the first index - add 1 to parse only once */
275 cb->args[0] = (*rdev)->wpan_phy_idx + 1;
276 cb->args[1] = (*wpan_dev)->identifier;
277 } else {
278 /* subtract the 1 again here */
279 struct wpan_phy *wpan_phy = wpan_phy_idx_to_wpan_phy(cb->args[0] - 1);
280 struct wpan_dev *tmp;
281
282 if (!wpan_phy) {
283 err = -ENODEV;
284 goto out_unlock;
285 }
286 *rdev = wpan_phy_to_rdev(wpan_phy);
287 *wpan_dev = NULL;
288
289 list_for_each_entry(tmp, &(*rdev)->wpan_dev_list, list) {
290 if (tmp->identifier == cb->args[1]) {
291 *wpan_dev = tmp;
292 break;
293 }
294 }
295
296 if (!*wpan_dev) {
297 err = -ENODEV;
298 goto out_unlock;
299 }
300 }
301
302 return 0;
303 out_unlock:
304 rtnl_unlock();
305 return err;
306}
307
308static void
309nl802154_finish_wpan_dev_dump(struct cfg802154_registered_device *rdev)
310{
311 rtnl_unlock();
312}
313#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
314
237/* message building helper */ 315/* message building helper */
238static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq, 316static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
239 int flags, u8 cmd) 317 int flags, u8 cmd)
@@ -612,6 +690,107 @@ static inline u64 wpan_dev_id(struct wpan_dev *wpan_dev)
612 ((u64)wpan_phy_to_rdev(wpan_dev->wpan_phy)->wpan_phy_idx << 32); 690 ((u64)wpan_phy_to_rdev(wpan_dev->wpan_phy)->wpan_phy_idx << 32);
613} 691}
614 692
693#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
694#include <net/ieee802154_netdev.h>
695
696static int
697ieee802154_llsec_send_key_id(struct sk_buff *msg,
698 const struct ieee802154_llsec_key_id *desc)
699{
700 struct nlattr *nl_dev_addr;
701
702 if (nla_put_u32(msg, NL802154_KEY_ID_ATTR_MODE, desc->mode))
703 return -ENOBUFS;
704
705 switch (desc->mode) {
706 case NL802154_KEY_ID_MODE_IMPLICIT:
707 nl_dev_addr = nla_nest_start(msg, NL802154_KEY_ID_ATTR_IMPLICIT);
708 if (!nl_dev_addr)
709 return -ENOBUFS;
710
711 if (nla_put_le16(msg, NL802154_DEV_ADDR_ATTR_PAN_ID,
712 desc->device_addr.pan_id) ||
713 nla_put_u32(msg, NL802154_DEV_ADDR_ATTR_MODE,
714 desc->device_addr.mode))
715 return -ENOBUFS;
716
717 switch (desc->device_addr.mode) {
718 case NL802154_DEV_ADDR_SHORT:
719 if (nla_put_le16(msg, NL802154_DEV_ADDR_ATTR_SHORT,
720 desc->device_addr.short_addr))
721 return -ENOBUFS;
722 break;
723 case NL802154_DEV_ADDR_EXTENDED:
724 if (nla_put_le64(msg, NL802154_DEV_ADDR_ATTR_EXTENDED,
725 desc->device_addr.extended_addr))
726 return -ENOBUFS;
727 break;
728 default:
729 /* userspace should handle unknown */
730 break;
731 }
732
733 nla_nest_end(msg, nl_dev_addr);
734 break;
735 case NL802154_KEY_ID_MODE_INDEX:
736 break;
737 case NL802154_KEY_ID_MODE_INDEX_SHORT:
738 /* TODO renmae short_source? */
739 if (nla_put_le32(msg, NL802154_KEY_ID_ATTR_SOURCE_SHORT,
740 desc->short_source))
741 return -ENOBUFS;
742 break;
743 case NL802154_KEY_ID_MODE_INDEX_EXTENDED:
744 if (nla_put_le64(msg, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED,
745 desc->extended_source))
746 return -ENOBUFS;
747 break;
748 default:
749 /* userspace should handle unknown */
750 break;
751 }
752
753 /* TODO key_id to key_idx ? Check naming */
754 if (desc->mode != NL802154_KEY_ID_MODE_IMPLICIT) {
755 if (nla_put_u8(msg, NL802154_KEY_ID_ATTR_INDEX, desc->id))
756 return -ENOBUFS;
757 }
758
759 return 0;
760}
761
762static int nl802154_get_llsec_params(struct sk_buff *msg,
763 struct cfg802154_registered_device *rdev,
764 struct wpan_dev *wpan_dev)
765{
766 struct nlattr *nl_key_id;
767 struct ieee802154_llsec_params params;
768 int ret;
769
770 ret = rdev_get_llsec_params(rdev, wpan_dev, &params);
771 if (ret < 0)
772 return ret;
773
774 if (nla_put_u8(msg, NL802154_ATTR_SEC_ENABLED, params.enabled) ||
775 nla_put_u32(msg, NL802154_ATTR_SEC_OUT_LEVEL, params.out_level) ||
776 nla_put_be32(msg, NL802154_ATTR_SEC_FRAME_COUNTER,
777 params.frame_counter))
778 return -ENOBUFS;
779
780 nl_key_id = nla_nest_start(msg, NL802154_ATTR_SEC_OUT_KEY_ID);
781 if (!nl_key_id)
782 return -ENOBUFS;
783
784 ret = ieee802154_llsec_send_key_id(msg, &params.out_key);
785 if (ret < 0)
786 return ret;
787
788 nla_nest_end(msg, nl_key_id);
789
790 return 0;
791}
792#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
793
615static int 794static int
616nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, 795nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
617 struct cfg802154_registered_device *rdev, 796 struct cfg802154_registered_device *rdev,
@@ -663,6 +842,11 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
663 if (nla_put_u8(msg, NL802154_ATTR_ACKREQ_DEFAULT, wpan_dev->ackreq)) 842 if (nla_put_u8(msg, NL802154_ATTR_ACKREQ_DEFAULT, wpan_dev->ackreq))
664 goto nla_put_failure; 843 goto nla_put_failure;
665 844
845#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
846 if (nl802154_get_llsec_params(msg, rdev, wpan_dev) < 0)
847 goto nla_put_failure;
848#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
849
666 genlmsg_end(msg, hdr); 850 genlmsg_end(msg, hdr);
667 return 0; 851 return 0;
668 852
@@ -753,10 +937,8 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info)
753 return -EINVAL; 937 return -EINVAL;
754 } 938 }
755 939
756 /* TODO add nla_get_le64 to netlink */
757 if (info->attrs[NL802154_ATTR_EXTENDED_ADDR]) 940 if (info->attrs[NL802154_ATTR_EXTENDED_ADDR])
758 extended_addr = (__force __le64)nla_get_u64( 941 extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
759 info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
760 942
761 if (!rdev->ops->add_virtual_intf) 943 if (!rdev->ops->add_virtual_intf)
762 return -EOPNOTSUPP; 944 return -EOPNOTSUPP;
@@ -1075,6 +1257,838 @@ nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info)
1075 return rdev_set_ackreq_default(rdev, wpan_dev, ackreq); 1257 return rdev_set_ackreq_default(rdev, wpan_dev, ackreq);
1076} 1258}
1077 1259
1260#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
1261static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
1262 [NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
1263 [NL802154_DEV_ADDR_ATTR_MODE] = { .type = NLA_U32 },
1264 [NL802154_DEV_ADDR_ATTR_SHORT] = { .type = NLA_U16 },
1265 [NL802154_DEV_ADDR_ATTR_EXTENDED] = { .type = NLA_U64 },
1266};
1267
1268static int
1269ieee802154_llsec_parse_dev_addr(struct nlattr *nla,
1270 struct ieee802154_addr *addr)
1271{
1272 struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1];
1273
1274 if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla,
1275 nl802154_dev_addr_policy))
1276 return -EINVAL;
1277
1278 if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] &&
1279 !attrs[NL802154_DEV_ADDR_ATTR_MODE] &&
1280 !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] ||
1281 attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]))
1282 return -EINVAL;
1283
1284 addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]);
1285 addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]);
1286 switch (addr->mode) {
1287 case NL802154_DEV_ADDR_SHORT:
1288 addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]);
1289 break;
1290 case NL802154_DEV_ADDR_EXTENDED:
1291 addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]);
1292 break;
1293 default:
1294 return -EINVAL;
1295 }
1296
1297 return 0;
1298}
1299
1300static const struct nla_policy nl802154_key_id_policy[NL802154_KEY_ID_ATTR_MAX + 1] = {
1301 [NL802154_KEY_ID_ATTR_MODE] = { .type = NLA_U32 },
1302 [NL802154_KEY_ID_ATTR_INDEX] = { .type = NLA_U8 },
1303 [NL802154_KEY_ID_ATTR_IMPLICIT] = { .type = NLA_NESTED },
1304 [NL802154_KEY_ID_ATTR_SOURCE_SHORT] = { .type = NLA_U32 },
1305 [NL802154_KEY_ID_ATTR_SOURCE_EXTENDED] = { .type = NLA_U64 },
1306};
1307
1308static int
1309ieee802154_llsec_parse_key_id(struct nlattr *nla,
1310 struct ieee802154_llsec_key_id *desc)
1311{
1312 struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1];
1313
1314 if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla,
1315 nl802154_key_id_policy))
1316 return -EINVAL;
1317
1318 if (!attrs[NL802154_KEY_ID_ATTR_MODE])
1319 return -EINVAL;
1320
1321 desc->mode = nla_get_u32(attrs[NL802154_KEY_ID_ATTR_MODE]);
1322 switch (desc->mode) {
1323 case NL802154_KEY_ID_MODE_IMPLICIT:
1324 if (!attrs[NL802154_KEY_ID_ATTR_IMPLICIT])
1325 return -EINVAL;
1326
1327 if (ieee802154_llsec_parse_dev_addr(attrs[NL802154_KEY_ID_ATTR_IMPLICIT],
1328 &desc->device_addr) < 0)
1329 return -EINVAL;
1330 break;
1331 case NL802154_KEY_ID_MODE_INDEX:
1332 break;
1333 case NL802154_KEY_ID_MODE_INDEX_SHORT:
1334 if (!attrs[NL802154_KEY_ID_ATTR_SOURCE_SHORT])
1335 return -EINVAL;
1336
1337 desc->short_source = nla_get_le32(attrs[NL802154_KEY_ID_ATTR_SOURCE_SHORT]);
1338 break;
1339 case NL802154_KEY_ID_MODE_INDEX_EXTENDED:
1340 if (!attrs[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED])
1341 return -EINVAL;
1342
1343 desc->extended_source = nla_get_le64(attrs[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED]);
1344 break;
1345 default:
1346 return -EINVAL;
1347 }
1348
1349 if (desc->mode != NL802154_KEY_ID_MODE_IMPLICIT) {
1350 if (!attrs[NL802154_KEY_ID_ATTR_INDEX])
1351 return -EINVAL;
1352
1353 /* TODO change id to idx */
1354 desc->id = nla_get_u8(attrs[NL802154_KEY_ID_ATTR_INDEX]);
1355 }
1356
1357 return 0;
1358}
1359
1360static int nl802154_set_llsec_params(struct sk_buff *skb,
1361 struct genl_info *info)
1362{
1363 struct cfg802154_registered_device *rdev = info->user_ptr[0];
1364 struct net_device *dev = info->user_ptr[1];
1365 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
1366 struct ieee802154_llsec_params params;
1367 u32 changed = 0;
1368 int ret;
1369
1370 if (info->attrs[NL802154_ATTR_SEC_ENABLED]) {
1371 u8 enabled;
1372
1373 enabled = nla_get_u8(info->attrs[NL802154_ATTR_SEC_ENABLED]);
1374 if (enabled != 0 && enabled != 1)
1375 return -EINVAL;
1376
1377 params.enabled = nla_get_u8(info->attrs[NL802154_ATTR_SEC_ENABLED]);
1378 changed |= IEEE802154_LLSEC_PARAM_ENABLED;
1379 }
1380
1381 if (info->attrs[NL802154_ATTR_SEC_OUT_KEY_ID]) {
1382 ret = ieee802154_llsec_parse_key_id(info->attrs[NL802154_ATTR_SEC_OUT_KEY_ID],
1383 &params.out_key);
1384 if (ret < 0)
1385 return ret;
1386
1387 changed |= IEEE802154_LLSEC_PARAM_OUT_KEY;
1388 }
1389
1390 if (info->attrs[NL802154_ATTR_SEC_OUT_LEVEL]) {
1391 params.out_level = nla_get_u32(info->attrs[NL802154_ATTR_SEC_OUT_LEVEL]);
1392 if (params.out_level > NL802154_SECLEVEL_MAX)
1393 return -EINVAL;
1394
1395 changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL;
1396 }
1397
1398 if (info->attrs[NL802154_ATTR_SEC_FRAME_COUNTER]) {
1399 params.frame_counter = nla_get_be32(info->attrs[NL802154_ATTR_SEC_FRAME_COUNTER]);
1400 changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER;
1401 }
1402
1403 return rdev_set_llsec_params(rdev, wpan_dev, &params, changed);
1404}
1405
1406static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid,
1407 u32 seq, int flags,
1408 struct cfg802154_registered_device *rdev,
1409 struct net_device *dev,
1410 const struct ieee802154_llsec_key_entry *key)
1411{
1412 void *hdr;
1413 u32 commands[NL802154_CMD_FRAME_NR_IDS / 32];
1414 struct nlattr *nl_key, *nl_key_id;
1415
1416 hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
1417 if (!hdr)
1418 return -1;
1419
1420 if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
1421 goto nla_put_failure;
1422
1423 nl_key = nla_nest_start(msg, NL802154_ATTR_SEC_KEY);
1424 if (!nl_key)
1425 goto nla_put_failure;
1426
1427 nl_key_id = nla_nest_start(msg, NL802154_KEY_ATTR_ID);
1428 if (!nl_key_id)
1429 goto nla_put_failure;
1430
1431 if (ieee802154_llsec_send_key_id(msg, &key->id) < 0)
1432 goto nla_put_failure;
1433
1434 nla_nest_end(msg, nl_key_id);
1435
1436 if (nla_put_u8(msg, NL802154_KEY_ATTR_USAGE_FRAMES,
1437 key->key->frame_types))
1438 goto nla_put_failure;
1439
1440 if (key->key->frame_types & BIT(NL802154_FRAME_CMD)) {
1441 /* TODO for each nested */
1442 memset(commands, 0, sizeof(commands));
1443 commands[7] = key->key->cmd_frame_ids;
1444 if (nla_put(msg, NL802154_KEY_ATTR_USAGE_CMDS,
1445 sizeof(commands), commands))
1446 goto nla_put_failure;
1447 }
1448
1449 if (nla_put(msg, NL802154_KEY_ATTR_BYTES, NL802154_KEY_SIZE,
1450 key->key->key))
1451 goto nla_put_failure;
1452
1453 nla_nest_end(msg, nl_key);
1454 genlmsg_end(msg, hdr);
1455
1456 return 0;
1457
1458nla_put_failure:
1459 genlmsg_cancel(msg, hdr);
1460 return -EMSGSIZE;
1461}
1462
1463static int
1464nl802154_dump_llsec_key(struct sk_buff *skb, struct netlink_callback *cb)
1465{
1466 struct cfg802154_registered_device *rdev = NULL;
1467 struct ieee802154_llsec_key_entry *key;
1468 struct ieee802154_llsec_table *table;
1469 struct wpan_dev *wpan_dev;
1470 int err;
1471
1472 err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
1473 if (err)
1474 return err;
1475
1476 if (!wpan_dev->netdev) {
1477 err = -EINVAL;
1478 goto out_err;
1479 }
1480
1481 rdev_lock_llsec_table(rdev, wpan_dev);
1482 rdev_get_llsec_table(rdev, wpan_dev, &table);
1483
1484 /* TODO make it like station dump */
1485 if (cb->args[2])
1486 goto out;
1487
1488 list_for_each_entry(key, &table->keys, list) {
1489 if (nl802154_send_key(skb, NL802154_CMD_NEW_SEC_KEY,
1490 NETLINK_CB(cb->skb).portid,
1491 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1492 rdev, wpan_dev->netdev, key) < 0) {
1493 /* TODO */
1494 err = -EIO;
1495 rdev_unlock_llsec_table(rdev, wpan_dev);
1496 goto out_err;
1497 }
1498 }
1499
1500 cb->args[2] = 1;
1501
1502out:
1503 rdev_unlock_llsec_table(rdev, wpan_dev);
1504 err = skb->len;
1505out_err:
1506 nl802154_finish_wpan_dev_dump(rdev);
1507
1508 return err;
1509}
1510
1511static const struct nla_policy nl802154_key_policy[NL802154_KEY_ATTR_MAX + 1] = {
1512 [NL802154_KEY_ATTR_ID] = { NLA_NESTED },
1513 /* TODO handle it as for_each_nested and NLA_FLAG? */
1514 [NL802154_KEY_ATTR_USAGE_FRAMES] = { NLA_U8 },
1515 /* TODO handle it as for_each_nested, not static array? */
1516 [NL802154_KEY_ATTR_USAGE_CMDS] = { .len = NL802154_CMD_FRAME_NR_IDS / 8 },
1517 [NL802154_KEY_ATTR_BYTES] = { .len = NL802154_KEY_SIZE },
1518};
1519
1520static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info)
1521{
1522 struct cfg802154_registered_device *rdev = info->user_ptr[0];
1523 struct net_device *dev = info->user_ptr[1];
1524 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
1525 struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
1526 struct ieee802154_llsec_key key = { };
1527 struct ieee802154_llsec_key_id id = { };
1528 u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { };
1529
1530 if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
1531 info->attrs[NL802154_ATTR_SEC_KEY],
1532 nl802154_key_policy))
1533 return -EINVAL;
1534
1535 if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] ||
1536 !attrs[NL802154_KEY_ATTR_BYTES])
1537 return -EINVAL;
1538
1539 if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
1540 return -ENOBUFS;
1541
1542 key.frame_types = nla_get_u8(attrs[NL802154_KEY_ATTR_USAGE_FRAMES]);
1543 if (key.frame_types > BIT(NL802154_FRAME_MAX) ||
1544 ((key.frame_types & BIT(NL802154_FRAME_CMD)) &&
1545 !attrs[NL802154_KEY_ATTR_USAGE_CMDS]))
1546 return -EINVAL;
1547
1548 if (attrs[NL802154_KEY_ATTR_USAGE_CMDS]) {
1549 /* TODO for each nested */
1550 nla_memcpy(commands, attrs[NL802154_KEY_ATTR_USAGE_CMDS],
1551 NL802154_CMD_FRAME_NR_IDS / 8);
1552
1553 /* TODO understand the -EINVAL logic here? last condition */
1554 if (commands[0] || commands[1] || commands[2] || commands[3] ||
1555 commands[4] || commands[5] || commands[6] ||
1556 commands[7] > BIT(NL802154_CMD_FRAME_MAX))
1557 return -EINVAL;
1558
1559 key.cmd_frame_ids = commands[7];
1560 } else {
1561 key.cmd_frame_ids = 0;
1562 }
1563
1564 nla_memcpy(key.key, attrs[NL802154_KEY_ATTR_BYTES], NL802154_KEY_SIZE);
1565
1566 if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
1567 return -ENOBUFS;
1568
1569 return rdev_add_llsec_key(rdev, wpan_dev, &id, &key);
1570}
1571
1572static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info)
1573{
1574 struct cfg802154_registered_device *rdev = info->user_ptr[0];
1575 struct net_device *dev = info->user_ptr[1];
1576 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
1577 struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
1578 struct ieee802154_llsec_key_id id;
1579
1580 if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
1581 info->attrs[NL802154_ATTR_SEC_KEY],
1582 nl802154_key_policy))
1583 return -EINVAL;
1584
1585 if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
1586 return -ENOBUFS;
1587
1588 return rdev_del_llsec_key(rdev, wpan_dev, &id);
1589}
1590
1591static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid,
1592 u32 seq, int flags,
1593 struct cfg802154_registered_device *rdev,
1594 struct net_device *dev,
1595 const struct ieee802154_llsec_device *dev_desc)
1596{
1597 void *hdr;
1598 struct nlattr *nl_device;
1599
1600 hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
1601 if (!hdr)
1602 return -1;
1603
1604 if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
1605 goto nla_put_failure;
1606
1607 nl_device = nla_nest_start(msg, NL802154_ATTR_SEC_DEVICE);
1608 if (!nl_device)
1609 goto nla_put_failure;
1610
1611 if (nla_put_u32(msg, NL802154_DEV_ATTR_FRAME_COUNTER,
1612 dev_desc->frame_counter) ||
1613 nla_put_le16(msg, NL802154_DEV_ATTR_PAN_ID, dev_desc->pan_id) ||
1614 nla_put_le16(msg, NL802154_DEV_ATTR_SHORT_ADDR,
1615 dev_desc->short_addr) ||
1616 nla_put_le64(msg, NL802154_DEV_ATTR_EXTENDED_ADDR,
1617 dev_desc->hwaddr) ||
1618 nla_put_u8(msg, NL802154_DEV_ATTR_SECLEVEL_EXEMPT,
1619 dev_desc->seclevel_exempt) ||
1620 nla_put_u32(msg, NL802154_DEV_ATTR_KEY_MODE, dev_desc->key_mode))
1621 goto nla_put_failure;
1622
1623 nla_nest_end(msg, nl_device);
1624 genlmsg_end(msg, hdr);
1625
1626 return 0;
1627
1628nla_put_failure:
1629 genlmsg_cancel(msg, hdr);
1630 return -EMSGSIZE;
1631}
1632
1633static int
1634nl802154_dump_llsec_dev(struct sk_buff *skb, struct netlink_callback *cb)
1635{
1636 struct cfg802154_registered_device *rdev = NULL;
1637 struct ieee802154_llsec_device *dev;
1638 struct ieee802154_llsec_table *table;
1639 struct wpan_dev *wpan_dev;
1640 int err;
1641
1642 err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
1643 if (err)
1644 return err;
1645
1646 if (!wpan_dev->netdev) {
1647 err = -EINVAL;
1648 goto out_err;
1649 }
1650
1651 rdev_lock_llsec_table(rdev, wpan_dev);
1652 rdev_get_llsec_table(rdev, wpan_dev, &table);
1653
1654 /* TODO make it like station dump */
1655 if (cb->args[2])
1656 goto out;
1657
1658 list_for_each_entry(dev, &table->devices, list) {
1659 if (nl802154_send_device(skb, NL802154_CMD_NEW_SEC_LEVEL,
1660 NETLINK_CB(cb->skb).portid,
1661 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1662 rdev, wpan_dev->netdev, dev) < 0) {
1663 /* TODO */
1664 err = -EIO;
1665 rdev_unlock_llsec_table(rdev, wpan_dev);
1666 goto out_err;
1667 }
1668 }
1669
1670 cb->args[2] = 1;
1671
1672out:
1673 rdev_unlock_llsec_table(rdev, wpan_dev);
1674 err = skb->len;
1675out_err:
1676 nl802154_finish_wpan_dev_dump(rdev);
1677
1678 return err;
1679}
1680
1681static const struct nla_policy nl802154_dev_policy[NL802154_DEV_ATTR_MAX + 1] = {
1682 [NL802154_DEV_ATTR_FRAME_COUNTER] = { NLA_U32 },
1683 [NL802154_DEV_ATTR_PAN_ID] = { .type = NLA_U16 },
1684 [NL802154_DEV_ATTR_SHORT_ADDR] = { .type = NLA_U16 },
1685 [NL802154_DEV_ATTR_EXTENDED_ADDR] = { .type = NLA_U64 },
1686 [NL802154_DEV_ATTR_SECLEVEL_EXEMPT] = { NLA_U8 },
1687 [NL802154_DEV_ATTR_KEY_MODE] = { NLA_U32 },
1688};
1689
1690static int
1691ieee802154_llsec_parse_device(struct nlattr *nla,
1692 struct ieee802154_llsec_device *dev)
1693{
1694 struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
1695
1696 if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, nla,
1697 nl802154_dev_policy))
1698 return -EINVAL;
1699
1700 memset(dev, 0, sizeof(*dev));
1701
1702 if (!attrs[NL802154_DEV_ATTR_FRAME_COUNTER] ||
1703 !attrs[NL802154_DEV_ATTR_PAN_ID] ||
1704 !attrs[NL802154_DEV_ATTR_SHORT_ADDR] ||
1705 !attrs[NL802154_DEV_ATTR_EXTENDED_ADDR] ||
1706 !attrs[NL802154_DEV_ATTR_SECLEVEL_EXEMPT] ||
1707 !attrs[NL802154_DEV_ATTR_KEY_MODE])
1708 return -EINVAL;
1709
1710 /* TODO be32 */
1711 dev->frame_counter = nla_get_u32(attrs[NL802154_DEV_ATTR_FRAME_COUNTER]);
1712 dev->pan_id = nla_get_le16(attrs[NL802154_DEV_ATTR_PAN_ID]);
1713 dev->short_addr = nla_get_le16(attrs[NL802154_DEV_ATTR_SHORT_ADDR]);
1714 /* TODO rename hwaddr to extended_addr */
1715 dev->hwaddr = nla_get_le64(attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]);
1716 dev->seclevel_exempt = nla_get_u8(attrs[NL802154_DEV_ATTR_SECLEVEL_EXEMPT]);
1717 dev->key_mode = nla_get_u32(attrs[NL802154_DEV_ATTR_KEY_MODE]);
1718
1719 if (dev->key_mode > NL802154_DEVKEY_MAX ||
1720 (dev->seclevel_exempt != 0 && dev->seclevel_exempt != 1))
1721 return -EINVAL;
1722
1723 return 0;
1724}
1725
1726static int nl802154_add_llsec_dev(struct sk_buff *skb, struct genl_info *info)
1727{
1728 struct cfg802154_registered_device *rdev = info->user_ptr[0];
1729 struct net_device *dev = info->user_ptr[1];
1730 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
1731 struct ieee802154_llsec_device dev_desc;
1732
1733 if (ieee802154_llsec_parse_device(info->attrs[NL802154_ATTR_SEC_DEVICE],
1734 &dev_desc) < 0)
1735 return -EINVAL;
1736
1737 return rdev_add_device(rdev, wpan_dev, &dev_desc);
1738}
1739
1740static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info)
1741{
1742 struct cfg802154_registered_device *rdev = info->user_ptr[0];
1743 struct net_device *dev = info->user_ptr[1];
1744 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
1745 struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
1746 __le64 extended_addr;
1747
1748 if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
1749 info->attrs[NL802154_ATTR_SEC_DEVICE],
1750 nl802154_dev_policy))
1751 return -EINVAL;
1752
1753 if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR])
1754 return -EINVAL;
1755
1756 extended_addr = nla_get_le64(attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]);
1757 return rdev_del_device(rdev, wpan_dev, extended_addr);
1758}
1759
1760static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid,
1761 u32 seq, int flags,
1762 struct cfg802154_registered_device *rdev,
1763 struct net_device *dev, __le64 extended_addr,
1764 const struct ieee802154_llsec_device_key *devkey)
1765{
1766 void *hdr;
1767 struct nlattr *nl_devkey, *nl_key_id;
1768
1769 hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
1770 if (!hdr)
1771 return -1;
1772
1773 if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
1774 goto nla_put_failure;
1775
1776 nl_devkey = nla_nest_start(msg, NL802154_ATTR_SEC_DEVKEY);
1777 if (!nl_devkey)
1778 goto nla_put_failure;
1779
1780 if (nla_put_le64(msg, NL802154_DEVKEY_ATTR_EXTENDED_ADDR,
1781 extended_addr) ||
1782 nla_put_u32(msg, NL802154_DEVKEY_ATTR_FRAME_COUNTER,
1783 devkey->frame_counter))
1784 goto nla_put_failure;
1785
1786 nl_key_id = nla_nest_start(msg, NL802154_DEVKEY_ATTR_ID);
1787 if (!nl_key_id)
1788 goto nla_put_failure;
1789
1790 if (ieee802154_llsec_send_key_id(msg, &devkey->key_id) < 0)
1791 goto nla_put_failure;
1792
1793 nla_nest_end(msg, nl_key_id);
1794 nla_nest_end(msg, nl_devkey);
1795 genlmsg_end(msg, hdr);
1796
1797 return 0;
1798
1799nla_put_failure:
1800 genlmsg_cancel(msg, hdr);
1801 return -EMSGSIZE;
1802}
1803
1804static int
1805nl802154_dump_llsec_devkey(struct sk_buff *skb, struct netlink_callback *cb)
1806{
1807 struct cfg802154_registered_device *rdev = NULL;
1808 struct ieee802154_llsec_device_key *kpos;
1809 struct ieee802154_llsec_device *dpos;
1810 struct ieee802154_llsec_table *table;
1811 struct wpan_dev *wpan_dev;
1812 int err;
1813
1814 err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
1815 if (err)
1816 return err;
1817
1818 if (!wpan_dev->netdev) {
1819 err = -EINVAL;
1820 goto out_err;
1821 }
1822
1823 rdev_lock_llsec_table(rdev, wpan_dev);
1824 rdev_get_llsec_table(rdev, wpan_dev, &table);
1825
1826 /* TODO make it like station dump */
1827 if (cb->args[2])
1828 goto out;
1829
1830 /* TODO look if remove devkey and do some nested attribute */
1831 list_for_each_entry(dpos, &table->devices, list) {
1832 list_for_each_entry(kpos, &dpos->keys, list) {
1833 if (nl802154_send_devkey(skb,
1834 NL802154_CMD_NEW_SEC_LEVEL,
1835 NETLINK_CB(cb->skb).portid,
1836 cb->nlh->nlmsg_seq,
1837 NLM_F_MULTI, rdev,
1838 wpan_dev->netdev,
1839 dpos->hwaddr,
1840 kpos) < 0) {
1841 /* TODO */
1842 err = -EIO;
1843 rdev_unlock_llsec_table(rdev, wpan_dev);
1844 goto out_err;
1845 }
1846 }
1847 }
1848
1849 cb->args[2] = 1;
1850
1851out:
1852 rdev_unlock_llsec_table(rdev, wpan_dev);
1853 err = skb->len;
1854out_err:
1855 nl802154_finish_wpan_dev_dump(rdev);
1856
1857 return err;
1858}
1859
1860static const struct nla_policy nl802154_devkey_policy[NL802154_DEVKEY_ATTR_MAX + 1] = {
1861 [NL802154_DEVKEY_ATTR_FRAME_COUNTER] = { NLA_U32 },
1862 [NL802154_DEVKEY_ATTR_EXTENDED_ADDR] = { NLA_U64 },
1863 [NL802154_DEVKEY_ATTR_ID] = { NLA_NESTED },
1864};
1865
1866static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info)
1867{
1868 struct cfg802154_registered_device *rdev = info->user_ptr[0];
1869 struct net_device *dev = info->user_ptr[1];
1870 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
1871 struct nlattr *attrs[NL802154_DEVKEY_ATTR_MAX + 1];
1872 struct ieee802154_llsec_device_key key;
1873 __le64 extended_addr;
1874
1875 if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
1876 nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
1877 info->attrs[NL802154_ATTR_SEC_DEVKEY],
1878 nl802154_devkey_policy) < 0)
1879 return -EINVAL;
1880
1881 if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] ||
1882 !attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
1883 return -EINVAL;
1884
1885 /* TODO change key.id ? */
1886 if (ieee802154_llsec_parse_key_id(attrs[NL802154_DEVKEY_ATTR_ID],
1887 &key.key_id) < 0)
1888 return -ENOBUFS;
1889
1890 /* TODO be32 */
1891 key.frame_counter = nla_get_u32(attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER]);
1892 /* TODO change naming hwaddr -> extended_addr
1893 * check unique identifier short+pan OR extended_addr
1894 */
1895 extended_addr = nla_get_le64(attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]);
1896 return rdev_add_devkey(rdev, wpan_dev, extended_addr, &key);
1897}
1898
1899static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info)
1900{
1901 struct cfg802154_registered_device *rdev = info->user_ptr[0];
1902 struct net_device *dev = info->user_ptr[1];
1903 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
1904 struct nlattr *attrs[NL802154_DEVKEY_ATTR_MAX + 1];
1905 struct ieee802154_llsec_device_key key;
1906 __le64 extended_addr;
1907
1908 if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
1909 info->attrs[NL802154_ATTR_SEC_DEVKEY],
1910 nl802154_devkey_policy))
1911 return -EINVAL;
1912
1913 if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
1914 return -EINVAL;
1915
1916 /* TODO change key.id ? */
1917 if (ieee802154_llsec_parse_key_id(attrs[NL802154_DEVKEY_ATTR_ID],
1918 &key.key_id) < 0)
1919 return -ENOBUFS;
1920
1921 /* TODO change naming hwaddr -> extended_addr
1922 * check unique identifier short+pan OR extended_addr
1923 */
1924 extended_addr = nla_get_le64(attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]);
1925 return rdev_del_devkey(rdev, wpan_dev, extended_addr, &key);
1926}
1927
1928static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid,
1929 u32 seq, int flags,
1930 struct cfg802154_registered_device *rdev,
1931 struct net_device *dev,
1932 const struct ieee802154_llsec_seclevel *sl)
1933{
1934 void *hdr;
1935 struct nlattr *nl_seclevel;
1936
1937 hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
1938 if (!hdr)
1939 return -1;
1940
1941 if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
1942 goto nla_put_failure;
1943
1944 nl_seclevel = nla_nest_start(msg, NL802154_ATTR_SEC_LEVEL);
1945 if (!nl_seclevel)
1946 goto nla_put_failure;
1947
1948 if (nla_put_u32(msg, NL802154_SECLEVEL_ATTR_FRAME, sl->frame_type) ||
1949 nla_put_u32(msg, NL802154_SECLEVEL_ATTR_LEVELS, sl->sec_levels) ||
1950 nla_put_u8(msg, NL802154_SECLEVEL_ATTR_DEV_OVERRIDE,
1951 sl->device_override))
1952 goto nla_put_failure;
1953
1954 if (sl->frame_type == NL802154_FRAME_CMD) {
1955 if (nla_put_u32(msg, NL802154_SECLEVEL_ATTR_CMD_FRAME,
1956 sl->cmd_frame_id))
1957 goto nla_put_failure;
1958 }
1959
1960 nla_nest_end(msg, nl_seclevel);
1961 genlmsg_end(msg, hdr);
1962
1963 return 0;
1964
1965nla_put_failure:
1966 genlmsg_cancel(msg, hdr);
1967 return -EMSGSIZE;
1968}
1969
1970static int
1971nl802154_dump_llsec_seclevel(struct sk_buff *skb, struct netlink_callback *cb)
1972{
1973 struct cfg802154_registered_device *rdev = NULL;
1974 struct ieee802154_llsec_seclevel *sl;
1975 struct ieee802154_llsec_table *table;
1976 struct wpan_dev *wpan_dev;
1977 int err;
1978
1979 err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
1980 if (err)
1981 return err;
1982
1983 if (!wpan_dev->netdev) {
1984 err = -EINVAL;
1985 goto out_err;
1986 }
1987
1988 rdev_lock_llsec_table(rdev, wpan_dev);
1989 rdev_get_llsec_table(rdev, wpan_dev, &table);
1990
1991 /* TODO make it like station dump */
1992 if (cb->args[2])
1993 goto out;
1994
1995 list_for_each_entry(sl, &table->security_levels, list) {
1996 if (nl802154_send_seclevel(skb, NL802154_CMD_NEW_SEC_LEVEL,
1997 NETLINK_CB(cb->skb).portid,
1998 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1999 rdev, wpan_dev->netdev, sl) < 0) {
2000 /* TODO */
2001 err = -EIO;
2002 rdev_unlock_llsec_table(rdev, wpan_dev);
2003 goto out_err;
2004 }
2005 }
2006
2007 cb->args[2] = 1;
2008
2009out:
2010 rdev_unlock_llsec_table(rdev, wpan_dev);
2011 err = skb->len;
2012out_err:
2013 nl802154_finish_wpan_dev_dump(rdev);
2014
2015 return err;
2016}
2017
2018static const struct nla_policy nl802154_seclevel_policy[NL802154_SECLEVEL_ATTR_MAX + 1] = {
2019 [NL802154_SECLEVEL_ATTR_LEVELS] = { .type = NLA_U8 },
2020 [NL802154_SECLEVEL_ATTR_FRAME] = { .type = NLA_U32 },
2021 [NL802154_SECLEVEL_ATTR_CMD_FRAME] = { .type = NLA_U32 },
2022 [NL802154_SECLEVEL_ATTR_DEV_OVERRIDE] = { .type = NLA_U8 },
2023};
2024
2025static int
2026llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl)
2027{
2028 struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1];
2029
2030 if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, nla,
2031 nl802154_seclevel_policy))
2032 return -EINVAL;
2033
2034 memset(sl, 0, sizeof(*sl));
2035
2036 if (!attrs[NL802154_SECLEVEL_ATTR_LEVELS] ||
2037 !attrs[NL802154_SECLEVEL_ATTR_FRAME] ||
2038 !attrs[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE])
2039 return -EINVAL;
2040
2041 sl->sec_levels = nla_get_u8(attrs[NL802154_SECLEVEL_ATTR_LEVELS]);
2042 sl->frame_type = nla_get_u32(attrs[NL802154_SECLEVEL_ATTR_FRAME]);
2043 sl->device_override = nla_get_u8(attrs[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE]);
2044 if (sl->frame_type > NL802154_FRAME_MAX ||
2045 (sl->device_override != 0 && sl->device_override != 1))
2046 return -EINVAL;
2047
2048 if (sl->frame_type == NL802154_FRAME_CMD) {
2049 if (!attrs[NL802154_SECLEVEL_ATTR_CMD_FRAME])
2050 return -EINVAL;
2051
2052 sl->cmd_frame_id = nla_get_u32(attrs[NL802154_SECLEVEL_ATTR_CMD_FRAME]);
2053 if (sl->cmd_frame_id > NL802154_CMD_FRAME_MAX)
2054 return -EINVAL;
2055 }
2056
2057 return 0;
2058}
2059
2060static int nl802154_add_llsec_seclevel(struct sk_buff *skb,
2061 struct genl_info *info)
2062{
2063 struct cfg802154_registered_device *rdev = info->user_ptr[0];
2064 struct net_device *dev = info->user_ptr[1];
2065 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
2066 struct ieee802154_llsec_seclevel sl;
2067
2068 if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
2069 &sl) < 0)
2070 return -EINVAL;
2071
2072 return rdev_add_seclevel(rdev, wpan_dev, &sl);
2073}
2074
2075static int nl802154_del_llsec_seclevel(struct sk_buff *skb,
2076 struct genl_info *info)
2077{
2078 struct cfg802154_registered_device *rdev = info->user_ptr[0];
2079 struct net_device *dev = info->user_ptr[1];
2080 struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
2081 struct ieee802154_llsec_seclevel sl;
2082
2083 if (!info->attrs[NL802154_ATTR_SEC_LEVEL] ||
2084 llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
2085 &sl) < 0)
2086 return -EINVAL;
2087
2088 return rdev_del_seclevel(rdev, wpan_dev, &sl);
2089}
2090#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
2091
1078#define NL802154_FLAG_NEED_WPAN_PHY 0x01 2092#define NL802154_FLAG_NEED_WPAN_PHY 0x01
1079#define NL802154_FLAG_NEED_NETDEV 0x02 2093#define NL802154_FLAG_NEED_NETDEV 0x02
1080#define NL802154_FLAG_NEED_RTNL 0x04 2094#define NL802154_FLAG_NEED_RTNL 0x04
@@ -1289,6 +2303,119 @@ static const struct genl_ops nl802154_ops[] = {
1289 .internal_flags = NL802154_FLAG_NEED_NETDEV | 2303 .internal_flags = NL802154_FLAG_NEED_NETDEV |
1290 NL802154_FLAG_NEED_RTNL, 2304 NL802154_FLAG_NEED_RTNL,
1291 }, 2305 },
2306#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
2307 {
2308 .cmd = NL802154_CMD_SET_SEC_PARAMS,
2309 .doit = nl802154_set_llsec_params,
2310 .policy = nl802154_policy,
2311 .flags = GENL_ADMIN_PERM,
2312 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2313 NL802154_FLAG_NEED_RTNL,
2314 },
2315 {
2316 .cmd = NL802154_CMD_GET_SEC_KEY,
2317 /* TODO .doit by matching key id? */
2318 .dumpit = nl802154_dump_llsec_key,
2319 .policy = nl802154_policy,
2320 .flags = GENL_ADMIN_PERM,
2321 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2322 NL802154_FLAG_NEED_RTNL,
2323 },
2324 {
2325 .cmd = NL802154_CMD_NEW_SEC_KEY,
2326 .doit = nl802154_add_llsec_key,
2327 .policy = nl802154_policy,
2328 .flags = GENL_ADMIN_PERM,
2329 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2330 NL802154_FLAG_NEED_RTNL,
2331 },
2332 {
2333 .cmd = NL802154_CMD_DEL_SEC_KEY,
2334 .doit = nl802154_del_llsec_key,
2335 .policy = nl802154_policy,
2336 .flags = GENL_ADMIN_PERM,
2337 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2338 NL802154_FLAG_NEED_RTNL,
2339 },
2340 /* TODO unique identifier must short+pan OR extended_addr */
2341 {
2342 .cmd = NL802154_CMD_GET_SEC_DEV,
2343 /* TODO .doit by matching extended_addr? */
2344 .dumpit = nl802154_dump_llsec_dev,
2345 .policy = nl802154_policy,
2346 .flags = GENL_ADMIN_PERM,
2347 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2348 NL802154_FLAG_NEED_RTNL,
2349 },
2350 {
2351 .cmd = NL802154_CMD_NEW_SEC_DEV,
2352 .doit = nl802154_add_llsec_dev,
2353 .policy = nl802154_policy,
2354 .flags = GENL_ADMIN_PERM,
2355 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2356 NL802154_FLAG_NEED_RTNL,
2357 },
2358 {
2359 .cmd = NL802154_CMD_DEL_SEC_DEV,
2360 .doit = nl802154_del_llsec_dev,
2361 .policy = nl802154_policy,
2362 .flags = GENL_ADMIN_PERM,
2363 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2364 NL802154_FLAG_NEED_RTNL,
2365 },
2366 /* TODO remove complete devkey, put it as nested? */
2367 {
2368 .cmd = NL802154_CMD_GET_SEC_DEVKEY,
2369 /* TODO doit by matching ??? */
2370 .dumpit = nl802154_dump_llsec_devkey,
2371 .policy = nl802154_policy,
2372 .flags = GENL_ADMIN_PERM,
2373 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2374 NL802154_FLAG_NEED_RTNL,
2375 },
2376 {
2377 .cmd = NL802154_CMD_NEW_SEC_DEVKEY,
2378 .doit = nl802154_add_llsec_devkey,
2379 .policy = nl802154_policy,
2380 .flags = GENL_ADMIN_PERM,
2381 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2382 NL802154_FLAG_NEED_RTNL,
2383 },
2384 {
2385 .cmd = NL802154_CMD_DEL_SEC_DEVKEY,
2386 .doit = nl802154_del_llsec_devkey,
2387 .policy = nl802154_policy,
2388 .flags = GENL_ADMIN_PERM,
2389 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2390 NL802154_FLAG_NEED_RTNL,
2391 },
2392 {
2393 .cmd = NL802154_CMD_GET_SEC_LEVEL,
2394 /* TODO .doit by matching frame_type? */
2395 .dumpit = nl802154_dump_llsec_seclevel,
2396 .policy = nl802154_policy,
2397 .flags = GENL_ADMIN_PERM,
2398 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2399 NL802154_FLAG_NEED_RTNL,
2400 },
2401 {
2402 .cmd = NL802154_CMD_NEW_SEC_LEVEL,
2403 .doit = nl802154_add_llsec_seclevel,
2404 .policy = nl802154_policy,
2405 .flags = GENL_ADMIN_PERM,
2406 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2407 NL802154_FLAG_NEED_RTNL,
2408 },
2409 {
2410 .cmd = NL802154_CMD_DEL_SEC_LEVEL,
2411 /* TODO match frame_type only? */
2412 .doit = nl802154_del_llsec_seclevel,
2413 .policy = nl802154_policy,
2414 .flags = GENL_ADMIN_PERM,
2415 .internal_flags = NL802154_FLAG_NEED_NETDEV |
2416 NL802154_FLAG_NEED_RTNL,
2417 },
2418#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
1292}; 2419};
1293 2420
1294/* initialisation/exit functions */ 2421/* initialisation/exit functions */
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 03b357501cc5..4441c63b3ea6 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -208,4 +208,113 @@ rdev_set_ackreq_default(struct cfg802154_registered_device *rdev,
208 return ret; 208 return ret;
209} 209}
210 210
211#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
212/* TODO this is already a nl802154, so move into ieee802154 */
213static inline void
214rdev_get_llsec_table(struct cfg802154_registered_device *rdev,
215 struct wpan_dev *wpan_dev,
216 struct ieee802154_llsec_table **table)
217{
218 rdev->ops->get_llsec_table(&rdev->wpan_phy, wpan_dev, table);
219}
220
221static inline void
222rdev_lock_llsec_table(struct cfg802154_registered_device *rdev,
223 struct wpan_dev *wpan_dev)
224{
225 rdev->ops->lock_llsec_table(&rdev->wpan_phy, wpan_dev);
226}
227
228static inline void
229rdev_unlock_llsec_table(struct cfg802154_registered_device *rdev,
230 struct wpan_dev *wpan_dev)
231{
232 rdev->ops->unlock_llsec_table(&rdev->wpan_phy, wpan_dev);
233}
234
235static inline int
236rdev_get_llsec_params(struct cfg802154_registered_device *rdev,
237 struct wpan_dev *wpan_dev,
238 struct ieee802154_llsec_params *params)
239{
240 return rdev->ops->get_llsec_params(&rdev->wpan_phy, wpan_dev, params);
241}
242
243static inline int
244rdev_set_llsec_params(struct cfg802154_registered_device *rdev,
245 struct wpan_dev *wpan_dev,
246 const struct ieee802154_llsec_params *params,
247 u32 changed)
248{
249 return rdev->ops->set_llsec_params(&rdev->wpan_phy, wpan_dev, params,
250 changed);
251}
252
253static inline int
254rdev_add_llsec_key(struct cfg802154_registered_device *rdev,
255 struct wpan_dev *wpan_dev,
256 const struct ieee802154_llsec_key_id *id,
257 const struct ieee802154_llsec_key *key)
258{
259 return rdev->ops->add_llsec_key(&rdev->wpan_phy, wpan_dev, id, key);
260}
261
262static inline int
263rdev_del_llsec_key(struct cfg802154_registered_device *rdev,
264 struct wpan_dev *wpan_dev,
265 const struct ieee802154_llsec_key_id *id)
266{
267 return rdev->ops->del_llsec_key(&rdev->wpan_phy, wpan_dev, id);
268}
269
270static inline int
271rdev_add_seclevel(struct cfg802154_registered_device *rdev,
272 struct wpan_dev *wpan_dev,
273 const struct ieee802154_llsec_seclevel *sl)
274{
275 return rdev->ops->add_seclevel(&rdev->wpan_phy, wpan_dev, sl);
276}
277
278static inline int
279rdev_del_seclevel(struct cfg802154_registered_device *rdev,
280 struct wpan_dev *wpan_dev,
281 const struct ieee802154_llsec_seclevel *sl)
282{
283 return rdev->ops->del_seclevel(&rdev->wpan_phy, wpan_dev, sl);
284}
285
286static inline int
287rdev_add_device(struct cfg802154_registered_device *rdev,
288 struct wpan_dev *wpan_dev,
289 const struct ieee802154_llsec_device *dev_desc)
290{
291 return rdev->ops->add_device(&rdev->wpan_phy, wpan_dev, dev_desc);
292}
293
294static inline int
295rdev_del_device(struct cfg802154_registered_device *rdev,
296 struct wpan_dev *wpan_dev, __le64 extended_addr)
297{
298 return rdev->ops->del_device(&rdev->wpan_phy, wpan_dev, extended_addr);
299}
300
301static inline int
302rdev_add_devkey(struct cfg802154_registered_device *rdev,
303 struct wpan_dev *wpan_dev, __le64 extended_addr,
304 const struct ieee802154_llsec_device_key *devkey)
305{
306 return rdev->ops->add_devkey(&rdev->wpan_phy, wpan_dev, extended_addr,
307 devkey);
308}
309
310static inline int
311rdev_del_devkey(struct cfg802154_registered_device *rdev,
312 struct wpan_dev *wpan_dev, __le64 extended_addr,
313 const struct ieee802154_llsec_device_key *devkey)
314{
315 return rdev->ops->del_devkey(&rdev->wpan_phy, wpan_dev, extended_addr,
316 devkey);
317}
318#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
319
211#endif /* __CFG802154_RDEV_OPS */ 320#endif /* __CFG802154_RDEV_OPS */
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index b6eacf30ee7a..a548be247e15 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -273,7 +273,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
273 goto out; 273 goto out;
274 } 274 }
275 275
276 mtu = dev->mtu; 276 mtu = IEEE802154_MTU;
277 pr_debug("name = %s, mtu = %u\n", dev->name, mtu); 277 pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
278 278
279 if (size > mtu) { 279 if (size > mtu) {
@@ -637,7 +637,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
637 err = -ENXIO; 637 err = -ENXIO;
638 goto out; 638 goto out;
639 } 639 }
640 mtu = dev->mtu; 640 mtu = IEEE802154_MTU;
641 pr_debug("name = %s, mtu = %u\n", dev->name, mtu); 641 pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
642 642
643 if (size > mtu) { 643 if (size > mtu) {
@@ -676,8 +676,8 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
676 cb->seclevel = ro->seclevel; 676 cb->seclevel = ro->seclevel;
677 cb->seclevel_override = ro->seclevel_override; 677 cb->seclevel_override = ro->seclevel_override;
678 678
679 err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, 679 err = wpan_dev_hard_header(skb, dev, &dst_addr,
680 ro->bound ? &ro->src_addr : NULL, size); 680 ro->bound ? &ro->src_addr : NULL, size);
681 if (err < 0) 681 if (err < 0)
682 goto out_skb; 682 goto out_skb;
683 683
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 89aacb630a53..c29809f765dc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,6 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \
8 inet_timewait_sock.o inet_connection_sock.o \ 8 inet_timewait_sock.o inet_connection_sock.o \
9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
10 tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ 10 tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
11 tcp_recovery.o \
11 tcp_offload.o datagram.o raw.o udp.o udplite.o \ 12 tcp_offload.o datagram.o raw.o udp.o udplite.o \
12 udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 13 udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 14 fib_frontend.o fib_semantics.o fib_trie.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1d0c3adb6f34..11c4ca13ec3b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,7 +119,7 @@
119#ifdef CONFIG_IP_MROUTE 119#ifdef CONFIG_IP_MROUTE
120#include <linux/mroute.h> 120#include <linux/mroute.h>
121#endif 121#endif
122#include <net/vrf.h> 122#include <net/l3mdev.h>
123 123
124 124
125/* The inetsw table contains everything that inet_create needs to 125/* The inetsw table contains everything that inet_create needs to
@@ -219,17 +219,13 @@ int inet_listen(struct socket *sock, int backlog)
219 * shutdown() (rather than close()). 219 * shutdown() (rather than close()).
220 */ 220 */
221 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && 221 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
222 !inet_csk(sk)->icsk_accept_queue.fastopenq) { 222 !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
223 if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) 223 if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
224 err = fastopen_init_queue(sk, backlog); 224 fastopen_queue_tune(sk, backlog);
225 else if ((sysctl_tcp_fastopen & 225 else if ((sysctl_tcp_fastopen &
226 TFO_SERVER_WO_SOCKOPT2) != 0) 226 TFO_SERVER_WO_SOCKOPT2) != 0)
227 err = fastopen_init_queue(sk, 227 fastopen_queue_tune(sk,
228 ((uint)sysctl_tcp_fastopen) >> 16); 228 ((uint)sysctl_tcp_fastopen) >> 16);
229 else
230 err = 0;
231 if (err)
232 goto out;
233 229
234 tcp_fastopen_init_key_once(true); 230 tcp_fastopen_init_key_once(true);
235 } 231 }
@@ -450,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
450 goto out; 446 goto out;
451 } 447 }
452 448
453 tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id; 449 tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
454 chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); 450 chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
455 451
456 /* Not specified by any standard per-se, however it breaks too 452 /* Not specified by any standard per-se, however it breaks too
@@ -1043,22 +1039,16 @@ void inet_register_protosw(struct inet_protosw *p)
1043 goto out_illegal; 1039 goto out_illegal;
1044 1040
1045 /* If we are trying to override a permanent protocol, bail. */ 1041 /* If we are trying to override a permanent protocol, bail. */
1046 answer = NULL;
1047 last_perm = &inetsw[p->type]; 1042 last_perm = &inetsw[p->type];
1048 list_for_each(lh, &inetsw[p->type]) { 1043 list_for_each(lh, &inetsw[p->type]) {
1049 answer = list_entry(lh, struct inet_protosw, list); 1044 answer = list_entry(lh, struct inet_protosw, list);
1050
1051 /* Check only the non-wild match. */ 1045 /* Check only the non-wild match. */
1052 if (INET_PROTOSW_PERMANENT & answer->flags) { 1046 if ((INET_PROTOSW_PERMANENT & answer->flags) == 0)
1053 if (protocol == answer->protocol) 1047 break;
1054 break; 1048 if (protocol == answer->protocol)
1055 last_perm = lh; 1049 goto out_permanent;
1056 } 1050 last_perm = lh;
1057
1058 answer = NULL;
1059 } 1051 }
1060 if (answer)
1061 goto out_permanent;
1062 1052
1063 /* Add the new entry after the last permanent entry if any, so that 1053 /* Add the new entry after the last permanent entry if any, so that
1064 * the new entry does not override a permanent entry when matched with 1054 * the new entry does not override a permanent entry when matched with
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f03db8b7abee..59b3e0e8fd51 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -312,7 +312,7 @@ static void arp_send_dst(int type, int ptype, __be32 dest_ip,
312 if (!skb) 312 if (!skb)
313 return; 313 return;
314 314
315 skb_dst_set(skb, dst); 315 skb_dst_set(skb, dst_clone(dst));
316 arp_xmit(skb); 316 arp_xmit(skb);
317} 317}
318 318
@@ -384,7 +384,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
384 } 384 }
385 385
386 if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 386 if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE))
387 dst = dst_clone(skb_dst(skb)); 387 dst = skb_dst(skb);
388 arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, 388 arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
389 dst_hw, dev->dev_addr, NULL, dst); 389 dst_hw, dev->dev_addr, NULL, dst);
390} 390}
@@ -624,14 +624,20 @@ out:
624} 624}
625EXPORT_SYMBOL(arp_create); 625EXPORT_SYMBOL(arp_create);
626 626
627static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
628{
629 return dev_queue_xmit(skb);
630}
631
627/* 632/*
628 * Send an arp packet. 633 * Send an arp packet.
629 */ 634 */
630void arp_xmit(struct sk_buff *skb) 635void arp_xmit(struct sk_buff *skb)
631{ 636{
632 /* Send it off, maybe filter it using firewalling first. */ 637 /* Send it off, maybe filter it using firewalling first. */
633 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb, 638 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT,
634 NULL, skb->dev, dev_queue_xmit_sk); 639 dev_net(skb->dev), NULL, skb, NULL, skb->dev,
640 arp_xmit_finish);
635} 641}
636EXPORT_SYMBOL(arp_xmit); 642EXPORT_SYMBOL(arp_xmit);
637 643
@@ -639,7 +645,7 @@ EXPORT_SYMBOL(arp_xmit);
639 * Process an arp request. 645 * Process an arp request.
640 */ 646 */
641 647
642static int arp_process(struct sock *sk, struct sk_buff *skb) 648static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
643{ 649{
644 struct net_device *dev = skb->dev; 650 struct net_device *dev = skb->dev;
645 struct in_device *in_dev = __in_dev_get_rcu(dev); 651 struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -651,7 +657,6 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
651 u16 dev_type = dev->type; 657 u16 dev_type = dev->type;
652 int addr_type; 658 int addr_type;
653 struct neighbour *n; 659 struct neighbour *n;
654 struct net *net = dev_net(dev);
655 struct dst_entry *reply_dst = NULL; 660 struct dst_entry *reply_dst = NULL;
656 bool is_garp = false; 661 bool is_garp = false;
657 662
@@ -811,7 +816,7 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
811 } else { 816 } else {
812 pneigh_enqueue(&arp_tbl, 817 pneigh_enqueue(&arp_tbl,
813 in_dev->arp_parms, skb); 818 in_dev->arp_parms, skb);
814 return 0; 819 goto out_free_dst;
815 } 820 }
816 goto out; 821 goto out;
817 } 822 }
@@ -865,12 +870,14 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
865 870
866out: 871out:
867 consume_skb(skb); 872 consume_skb(skb);
873out_free_dst:
874 dst_release(reply_dst);
868 return 0; 875 return 0;
869} 876}
870 877
871static void parp_redo(struct sk_buff *skb) 878static void parp_redo(struct sk_buff *skb)
872{ 879{
873 arp_process(NULL, skb); 880 arp_process(dev_net(skb->dev), NULL, skb);
874} 881}
875 882
876 883
@@ -903,8 +910,9 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
903 910
904 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 911 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
905 912
906 return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb, 913 return NF_HOOK(NFPROTO_ARP, NF_ARP_IN,
907 dev, NULL, arp_process); 914 dev_net(dev), NULL, skb, dev, NULL,
915 arp_process);
908 916
909consumeskb: 917consumeskb:
910 consume_skb(skb); 918 consume_skb(skb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2d9cb1748f81..cebd9d31e65a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1644,7 +1644,8 @@ errout:
1644 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1644 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1645} 1645}
1646 1646
1647static size_t inet_get_link_af_size(const struct net_device *dev) 1647static size_t inet_get_link_af_size(const struct net_device *dev,
1648 u32 ext_filter_mask)
1648{ 1649{
1649 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1650 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1650 1651
@@ -1654,7 +1655,8 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
1654 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1655 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1655} 1656}
1656 1657
1657static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1658static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1659 u32 ext_filter_mask)
1658{ 1660{
1659 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1661 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1660 struct nlattr *nla; 1662 struct nlattr *nla;
@@ -2397,4 +2399,3 @@ void __init devinet_init(void)
2397 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, 2399 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2398 inet_netconf_dump_devconf, NULL); 2400 inet_netconf_dump_devconf, NULL);
2399} 2401}
2400
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 690bcbc59f26..cc8f3e506cde 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -45,7 +45,7 @@
45#include <net/ip_fib.h> 45#include <net/ip_fib.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/xfrm.h> 47#include <net/xfrm.h>
48#include <net/vrf.h> 48#include <net/l3mdev.h>
49#include <trace/events/fib.h> 49#include <trace/events/fib.h>
50 50
51#ifndef CONFIG_IP_MULTIPLE_TABLES 51#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(inet_addr_type);
255unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 255unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
256 __be32 addr) 256 __be32 addr)
257{ 257{
258 u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL; 258 u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
259 259
260 return __inet_dev_addr_type(net, dev, addr, rt_table); 260 return __inet_dev_addr_type(net, dev, addr, rt_table);
261} 261}
@@ -268,7 +268,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
268 const struct net_device *dev, 268 const struct net_device *dev,
269 __be32 addr) 269 __be32 addr)
270{ 270{
271 u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL; 271 u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
272 272
273 return __inet_dev_addr_type(net, NULL, addr, rt_table); 273 return __inet_dev_addr_type(net, NULL, addr, rt_table);
274} 274}
@@ -332,7 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
332 bool dev_match; 332 bool dev_match;
333 333
334 fl4.flowi4_oif = 0; 334 fl4.flowi4_oif = 0;
335 fl4.flowi4_iif = vrf_master_ifindex_rcu(dev); 335 fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
336 if (!fl4.flowi4_iif) 336 if (!fl4.flowi4_iif)
337 fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; 337 fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
338 fl4.daddr = src; 338 fl4.daddr = src;
@@ -367,7 +367,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
367 if (nh->nh_dev == dev) { 367 if (nh->nh_dev == dev) {
368 dev_match = true; 368 dev_match = true;
369 break; 369 break;
370 } else if (vrf_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { 370 } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
371 dev_match = true; 371 dev_match = true;
372 break; 372 break;
373 } 373 }
@@ -804,7 +804,7 @@ out:
804static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 804static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
805{ 805{
806 struct net *net = dev_net(ifa->ifa_dev->dev); 806 struct net *net = dev_net(ifa->ifa_dev->dev);
807 u32 tb_id = vrf_dev_table_rtnl(ifa->ifa_dev->dev); 807 u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
808 struct fib_table *tb; 808 struct fib_table *tb;
809 struct fib_config cfg = { 809 struct fib_config cfg = {
810 .fc_protocol = RTPROT_KERNEL, 810 .fc_protocol = RTPROT_KERNEL,
@@ -867,9 +867,10 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
867 867
868 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && 868 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
869 (prefix != addr || ifa->ifa_prefixlen < 32)) { 869 (prefix != addr || ifa->ifa_prefixlen < 32)) {
870 fib_magic(RTM_NEWROUTE, 870 if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
871 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, 871 fib_magic(RTM_NEWROUTE,
872 prefix, ifa->ifa_prefixlen, prim); 872 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
873 prefix, ifa->ifa_prefixlen, prim);
873 874
874 /* Add network specific broadcasts, when it takes a sense */ 875 /* Add network specific broadcasts, when it takes a sense */
875 if (ifa->ifa_prefixlen < 31) { 876 if (ifa->ifa_prefixlen < 31) {
@@ -914,9 +915,10 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
914 } 915 }
915 } else if (!ipv4_is_zeronet(any) && 916 } else if (!ipv4_is_zeronet(any) &&
916 (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) { 917 (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
917 fib_magic(RTM_DELROUTE, 918 if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
918 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, 919 fib_magic(RTM_DELROUTE,
919 any, ifa->ifa_prefixlen, prim); 920 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
921 any, ifa->ifa_prefixlen, prim);
920 subnet = 1; 922 subnet = 1;
921 } 923 }
922 924
@@ -1110,9 +1112,10 @@ static void nl_fib_lookup_exit(struct net *net)
1110 net->ipv4.fibnl = NULL; 1112 net->ipv4.fibnl = NULL;
1111} 1113}
1112 1114
1113static void fib_disable_ip(struct net_device *dev, unsigned long event) 1115static void fib_disable_ip(struct net_device *dev, unsigned long event,
1116 bool force)
1114{ 1117{
1115 if (fib_sync_down_dev(dev, event)) 1118 if (fib_sync_down_dev(dev, event, force))
1116 fib_flush(dev_net(dev)); 1119 fib_flush(dev_net(dev));
1117 rt_cache_flush(dev_net(dev)); 1120 rt_cache_flush(dev_net(dev));
1118 arp_ifdown(dev); 1121 arp_ifdown(dev);
@@ -1140,7 +1143,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
1140 /* Last address was deleted from this interface. 1143 /* Last address was deleted from this interface.
1141 * Disable IP. 1144 * Disable IP.
1142 */ 1145 */
1143 fib_disable_ip(dev, event); 1146 fib_disable_ip(dev, event, true);
1144 } else { 1147 } else {
1145 rt_cache_flush(dev_net(dev)); 1148 rt_cache_flush(dev_net(dev));
1146 } 1149 }
@@ -1157,7 +1160,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1157 unsigned int flags; 1160 unsigned int flags;
1158 1161
1159 if (event == NETDEV_UNREGISTER) { 1162 if (event == NETDEV_UNREGISTER) {
1160 fib_disable_ip(dev, event); 1163 fib_disable_ip(dev, event, true);
1161 rt_flush_dev(dev); 1164 rt_flush_dev(dev);
1162 return NOTIFY_DONE; 1165 return NOTIFY_DONE;
1163 } 1166 }
@@ -1178,14 +1181,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1178 rt_cache_flush(net); 1181 rt_cache_flush(net);
1179 break; 1182 break;
1180 case NETDEV_DOWN: 1183 case NETDEV_DOWN:
1181 fib_disable_ip(dev, event); 1184 fib_disable_ip(dev, event, false);
1182 break; 1185 break;
1183 case NETDEV_CHANGE: 1186 case NETDEV_CHANGE:
1184 flags = dev_get_flags(dev); 1187 flags = dev_get_flags(dev);
1185 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1188 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1186 fib_sync_up(dev, RTNH_F_LINKDOWN); 1189 fib_sync_up(dev, RTNH_F_LINKDOWN);
1187 else 1190 else
1188 fib_sync_down_dev(dev, event); 1191 fib_sync_down_dev(dev, event, false);
1189 /* fall through */ 1192 /* fall through */
1190 case NETDEV_CHANGEMTU: 1193 case NETDEV_CHANGEMTU:
1191 rt_cache_flush(net); 1194 rt_cache_flush(net);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 064bd3caaa4f..d97268e8ff10 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -57,8 +57,7 @@ static unsigned int fib_info_cnt;
57static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 57static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
58 58
59#ifdef CONFIG_IP_ROUTE_MULTIPATH 59#ifdef CONFIG_IP_ROUTE_MULTIPATH
60 60u32 fib_multipath_secret __read_mostly;
61static DEFINE_SPINLOCK(fib_multipath_lock);
62 61
63#define for_nexthops(fi) { \ 62#define for_nexthops(fi) { \
64 int nhsel; const struct fib_nh *nh; \ 63 int nhsel; const struct fib_nh *nh; \
@@ -532,7 +531,67 @@ errout:
532 return ret; 531 return ret;
533} 532}
534 533
535#endif 534static void fib_rebalance(struct fib_info *fi)
535{
536 int total;
537 int w;
538 struct in_device *in_dev;
539
540 if (fi->fib_nhs < 2)
541 return;
542
543 total = 0;
544 for_nexthops(fi) {
545 if (nh->nh_flags & RTNH_F_DEAD)
546 continue;
547
548 in_dev = __in_dev_get_rtnl(nh->nh_dev);
549
550 if (in_dev &&
551 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
552 nh->nh_flags & RTNH_F_LINKDOWN)
553 continue;
554
555 total += nh->nh_weight;
556 } endfor_nexthops(fi);
557
558 w = 0;
559 change_nexthops(fi) {
560 int upper_bound;
561
562 in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
563
564 if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
565 upper_bound = -1;
566 } else if (in_dev &&
567 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
568 nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
569 upper_bound = -1;
570 } else {
571 w += nexthop_nh->nh_weight;
572 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
573 total) - 1;
574 }
575
576 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
577 } endfor_nexthops(fi);
578
579 net_get_random_once(&fib_multipath_secret,
580 sizeof(fib_multipath_secret));
581}
582
583static inline void fib_add_weight(struct fib_info *fi,
584 const struct fib_nh *nh)
585{
586 fi->fib_weight += nh->nh_weight;
587}
588
589#else /* CONFIG_IP_ROUTE_MULTIPATH */
590
591#define fib_rebalance(fi) do { } while (0)
592#define fib_add_weight(fi, nh) do { } while (0)
593
594#endif /* CONFIG_IP_ROUTE_MULTIPATH */
536 595
537static int fib_encap_match(struct net *net, u16 encap_type, 596static int fib_encap_match(struct net *net, u16 encap_type,
538 struct nlattr *encap, 597 struct nlattr *encap,
@@ -864,14 +923,21 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
864 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 923 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
865 fib_prefsrc != cfg->fc_dst) { 924 fib_prefsrc != cfg->fc_dst) {
866 u32 tb_id = cfg->fc_table; 925 u32 tb_id = cfg->fc_table;
926 int rc;
867 927
868 if (tb_id == RT_TABLE_MAIN) 928 if (tb_id == RT_TABLE_MAIN)
869 tb_id = RT_TABLE_LOCAL; 929 tb_id = RT_TABLE_LOCAL;
870 930
871 if (inet_addr_type_table(cfg->fc_nlinfo.nl_net, 931 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
872 fib_prefsrc, tb_id) != RTN_LOCAL) { 932 fib_prefsrc, tb_id);
873 return false; 933
934 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
935 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
936 fib_prefsrc, RT_TABLE_LOCAL);
874 } 937 }
938
939 if (rc != RTN_LOCAL)
940 return false;
875 } 941 }
876 return true; 942 return true;
877} 943}
@@ -1094,8 +1160,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
1094 1160
1095 change_nexthops(fi) { 1161 change_nexthops(fi) {
1096 fib_info_update_nh_saddr(net, nexthop_nh); 1162 fib_info_update_nh_saddr(net, nexthop_nh);
1163 fib_add_weight(fi, nexthop_nh);
1097 } endfor_nexthops(fi) 1164 } endfor_nexthops(fi)
1098 1165
1166 fib_rebalance(fi);
1167
1099link_it: 1168link_it:
1100 ofi = fib_find_info(fi); 1169 ofi = fib_find_info(fi);
1101 if (ofi) { 1170 if (ofi) {
@@ -1281,7 +1350,13 @@ int fib_sync_down_addr(struct net *net, __be32 local)
1281 return ret; 1350 return ret;
1282} 1351}
1283 1352
1284int fib_sync_down_dev(struct net_device *dev, unsigned long event) 1353/* Event force Flags Description
1354 * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host
1355 * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
1356 * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed
1357 * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed
1358 */
1359int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
1285{ 1360{
1286 int ret = 0; 1361 int ret = 0;
1287 int scope = RT_SCOPE_NOWHERE; 1362 int scope = RT_SCOPE_NOWHERE;
@@ -1290,8 +1365,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
1290 struct hlist_head *head = &fib_info_devhash[hash]; 1365 struct hlist_head *head = &fib_info_devhash[hash];
1291 struct fib_nh *nh; 1366 struct fib_nh *nh;
1292 1367
1293 if (event == NETDEV_UNREGISTER || 1368 if (force)
1294 event == NETDEV_DOWN)
1295 scope = -1; 1369 scope = -1;
1296 1370
1297 hlist_for_each_entry(nh, head, nh_hash) { 1371 hlist_for_each_entry(nh, head, nh_hash) {
@@ -1317,12 +1391,6 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
1317 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; 1391 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
1318 break; 1392 break;
1319 } 1393 }
1320#ifdef CONFIG_IP_ROUTE_MULTIPATH
1321 spin_lock_bh(&fib_multipath_lock);
1322 fi->fib_power -= nexthop_nh->nh_power;
1323 nexthop_nh->nh_power = 0;
1324 spin_unlock_bh(&fib_multipath_lock);
1325#endif
1326 dead++; 1394 dead++;
1327 } 1395 }
1328#ifdef CONFIG_IP_ROUTE_MULTIPATH 1396#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1345,6 +1413,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
1345 } 1413 }
1346 ret++; 1414 ret++;
1347 } 1415 }
1416
1417 fib_rebalance(fi);
1348 } 1418 }
1349 1419
1350 return ret; 1420 return ret;
@@ -1440,6 +1510,13 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1440 if (!(dev->flags & IFF_UP)) 1510 if (!(dev->flags & IFF_UP))
1441 return 0; 1511 return 0;
1442 1512
1513 if (nh_flags & RTNH_F_DEAD) {
1514 unsigned int flags = dev_get_flags(dev);
1515
1516 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1517 nh_flags |= RTNH_F_LINKDOWN;
1518 }
1519
1443 prev_fi = NULL; 1520 prev_fi = NULL;
1444 hash = fib_devindex_hashfn(dev->ifindex); 1521 hash = fib_devindex_hashfn(dev->ifindex);
1445 head = &fib_info_devhash[hash]; 1522 head = &fib_info_devhash[hash];
@@ -1467,20 +1544,15 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1467 !__in_dev_get_rtnl(dev)) 1544 !__in_dev_get_rtnl(dev))
1468 continue; 1545 continue;
1469 alive++; 1546 alive++;
1470#ifdef CONFIG_IP_ROUTE_MULTIPATH
1471 spin_lock_bh(&fib_multipath_lock);
1472 nexthop_nh->nh_power = 0;
1473 nexthop_nh->nh_flags &= ~nh_flags; 1547 nexthop_nh->nh_flags &= ~nh_flags;
1474 spin_unlock_bh(&fib_multipath_lock);
1475#else
1476 nexthop_nh->nh_flags &= ~nh_flags;
1477#endif
1478 } endfor_nexthops(fi) 1548 } endfor_nexthops(fi)
1479 1549
1480 if (alive > 0) { 1550 if (alive > 0) {
1481 fi->fib_flags &= ~nh_flags; 1551 fi->fib_flags &= ~nh_flags;
1482 ret++; 1552 ret++;
1483 } 1553 }
1554
1555 fib_rebalance(fi);
1484 } 1556 }
1485 1557
1486 return ret; 1558 return ret;
@@ -1488,62 +1560,41 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1488 1560
1489#ifdef CONFIG_IP_ROUTE_MULTIPATH 1561#ifdef CONFIG_IP_ROUTE_MULTIPATH
1490 1562
1491/* 1563void fib_select_multipath(struct fib_result *res, int hash)
1492 * The algorithm is suboptimal, but it provides really
1493 * fair weighted route distribution.
1494 */
1495void fib_select_multipath(struct fib_result *res)
1496{ 1564{
1497 struct fib_info *fi = res->fi; 1565 struct fib_info *fi = res->fi;
1498 struct in_device *in_dev;
1499 int w;
1500
1501 spin_lock_bh(&fib_multipath_lock);
1502 if (fi->fib_power <= 0) {
1503 int power = 0;
1504 change_nexthops(fi) {
1505 in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
1506 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1507 continue;
1508 if (in_dev &&
1509 IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1510 nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
1511 continue;
1512 power += nexthop_nh->nh_weight;
1513 nexthop_nh->nh_power = nexthop_nh->nh_weight;
1514 } endfor_nexthops(fi);
1515 fi->fib_power = power;
1516 if (power <= 0) {
1517 spin_unlock_bh(&fib_multipath_lock);
1518 /* Race condition: route has just become dead. */
1519 res->nh_sel = 0;
1520 return;
1521 }
1522 }
1523
1524
1525 /* w should be random number [0..fi->fib_power-1],
1526 * it is pretty bad approximation.
1527 */
1528 1566
1529 w = jiffies % fi->fib_power; 1567 for_nexthops(fi) {
1568 if (hash > atomic_read(&nh->nh_upper_bound))
1569 continue;
1530 1570
1531 change_nexthops(fi) { 1571 res->nh_sel = nhsel;
1532 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && 1572 return;
1533 nexthop_nh->nh_power) {
1534 w -= nexthop_nh->nh_power;
1535 if (w <= 0) {
1536 nexthop_nh->nh_power--;
1537 fi->fib_power--;
1538 res->nh_sel = nhsel;
1539 spin_unlock_bh(&fib_multipath_lock);
1540 return;
1541 }
1542 }
1543 } endfor_nexthops(fi); 1573 } endfor_nexthops(fi);
1544 1574
1545 /* Race condition: route has just become dead. */ 1575 /* Race condition: route has just become dead. */
1546 res->nh_sel = 0; 1576 res->nh_sel = 0;
1547 spin_unlock_bh(&fib_multipath_lock);
1548} 1577}
1549#endif 1578#endif
1579
1580void fib_select_path(struct net *net, struct fib_result *res,
1581 struct flowi4 *fl4, int mp_hash)
1582{
1583#ifdef CONFIG_IP_ROUTE_MULTIPATH
1584 if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
1585 if (mp_hash < 0)
1586 mp_hash = get_hash_from_flowi4(fl4) >> 1;
1587
1588 fib_select_multipath(res, mp_hash);
1589 }
1590 else
1591#endif
1592 if (!res->prefixlen &&
1593 res->table->tb_num_default > 1 &&
1594 res->type == RTN_UNICAST && !fl4->flowi4_oif)
1595 fib_select_default(fl4, res);
1596
1597 if (!fl4->saddr)
1598 fl4->saddr = FIB_RES_PREFSRC(net, *res);
1599}
1600EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6c2af797f2f9..744e5936c10d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1569,7 +1569,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
1569 do { 1569 do {
1570 /* record parent and next child index */ 1570 /* record parent and next child index */
1571 pn = n; 1571 pn = n;
1572 cindex = key ? get_index(key, pn) : 0; 1572 cindex = (key > pn->key) ? get_index(key, pn) : 0;
1573 1573
1574 if (cindex >> pn->bits) 1574 if (cindex >> pn->bits)
1575 break; 1575 break;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5aa46d4b44ef..5a8ee3282550 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
36 SKB_GSO_TCP_ECN | 36 SKB_GSO_TCP_ECN |
37 SKB_GSO_GRE | 37 SKB_GSO_GRE |
38 SKB_GSO_GRE_CSUM | 38 SKB_GSO_GRE_CSUM |
39 SKB_GSO_IPIP))) 39 SKB_GSO_IPIP |
40 SKB_GSO_SIT)))
40 goto out; 41 goto out;
41 42
42 if (!skb->encapsulation) 43 if (!skb->encapsulation)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5eb8ac4089d..36e26977c908 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -96,7 +96,7 @@
96#include <net/xfrm.h> 96#include <net/xfrm.h>
97#include <net/inet_common.h> 97#include <net/inet_common.h>
98#include <net/ip_fib.h> 98#include <net/ip_fib.h>
99#include <net/vrf.h> 99#include <net/l3mdev.h>
100 100
101/* 101/*
102 * Build xmit assembly blocks 102 * Build xmit assembly blocks
@@ -309,7 +309,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
309 309
310 rc = false; 310 rc = false;
311 if (icmp_global_allow()) { 311 if (icmp_global_allow()) {
312 int vif = vrf_master_ifindex(dst->dev); 312 int vif = l3mdev_master_ifindex(dst->dev);
313 struct inet_peer *peer; 313 struct inet_peer *peer;
314 314
315 peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); 315 peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
@@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
427 fl4.flowi4_mark = mark; 427 fl4.flowi4_mark = mark;
428 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 428 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
429 fl4.flowi4_proto = IPPROTO_ICMP; 429 fl4.flowi4_proto = IPPROTO_ICMP;
430 fl4.flowi4_oif = vrf_master_ifindex(skb->dev); 430 fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
431 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 431 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
432 rt = ip_route_output_key(net, &fl4); 432 rt = ip_route_output_key(net, &fl4);
433 if (IS_ERR(rt)) 433 if (IS_ERR(rt))
@@ -440,6 +440,22 @@ out_unlock:
440 icmp_xmit_unlock(sk); 440 icmp_xmit_unlock(sk);
441} 441}
442 442
443#ifdef CONFIG_IP_ROUTE_MULTIPATH
444
445/* Source and destination is swapped. See ip_multipath_icmp_hash */
446static int icmp_multipath_hash_skb(const struct sk_buff *skb)
447{
448 const struct iphdr *iph = ip_hdr(skb);
449
450 return fib_multipath_hash(iph->daddr, iph->saddr);
451}
452
453#else
454
455#define icmp_multipath_hash_skb(skb) (-1)
456
457#endif
458
443static struct rtable *icmp_route_lookup(struct net *net, 459static struct rtable *icmp_route_lookup(struct net *net,
444 struct flowi4 *fl4, 460 struct flowi4 *fl4,
445 struct sk_buff *skb_in, 461 struct sk_buff *skb_in,
@@ -461,10 +477,11 @@ static struct rtable *icmp_route_lookup(struct net *net,
461 fl4->flowi4_proto = IPPROTO_ICMP; 477 fl4->flowi4_proto = IPPROTO_ICMP;
462 fl4->fl4_icmp_type = type; 478 fl4->fl4_icmp_type = type;
463 fl4->fl4_icmp_code = code; 479 fl4->fl4_icmp_code = code;
464 fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev); 480 fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
465 481
466 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); 482 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
467 rt = __ip_route_output_key(net, fl4); 483 rt = __ip_route_output_key_hash(net, fl4,
484 icmp_multipath_hash_skb(skb_in));
468 if (IS_ERR(rt)) 485 if (IS_ERR(rt))
469 return rt; 486 return rt;
470 487
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d38b8b61eaee..6baf36e11808 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -397,7 +397,7 @@ static int igmpv3_sendpack(struct sk_buff *skb)
397 397
398 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); 398 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
399 399
400 return ip_local_out(skb); 400 return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
401} 401}
402 402
403static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) 403static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
@@ -739,7 +739,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
739 ih->group = group; 739 ih->group = group;
740 ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); 740 ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
741 741
742 return ip_local_out(skb); 742 return ip_local_out(net, skb->sk, skb);
743} 743}
744 744
745static void igmp_gq_timer_expire(unsigned long data) 745static void igmp_gq_timer_expire(unsigned long data)
@@ -2392,11 +2392,11 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2392 struct ip_sf_socklist *psl; 2392 struct ip_sf_socklist *psl;
2393 struct net *net = sock_net(sk); 2393 struct net *net = sock_net(sk);
2394 2394
2395 ASSERT_RTNL();
2396
2395 if (!ipv4_is_multicast(addr)) 2397 if (!ipv4_is_multicast(addr))
2396 return -EINVAL; 2398 return -EINVAL;
2397 2399
2398 rtnl_lock();
2399
2400 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2400 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
2401 imr.imr_address.s_addr = msf->imsf_interface; 2401 imr.imr_address.s_addr = msf->imsf_interface;
2402 imr.imr_ifindex = 0; 2402 imr.imr_ifindex = 0;
@@ -2417,7 +2417,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2417 goto done; 2417 goto done;
2418 msf->imsf_fmode = pmc->sfmode; 2418 msf->imsf_fmode = pmc->sfmode;
2419 psl = rtnl_dereference(pmc->sflist); 2419 psl = rtnl_dereference(pmc->sflist);
2420 rtnl_unlock();
2421 if (!psl) { 2420 if (!psl) {
2422 len = 0; 2421 len = 0;
2423 count = 0; 2422 count = 0;
@@ -2436,7 +2435,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2436 return -EFAULT; 2435 return -EFAULT;
2437 return 0; 2436 return 0;
2438done: 2437done:
2439 rtnl_unlock();
2440 return err; 2438 return err;
2441} 2439}
2442 2440
@@ -2450,6 +2448,8 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2450 struct inet_sock *inet = inet_sk(sk); 2448 struct inet_sock *inet = inet_sk(sk);
2451 struct ip_sf_socklist *psl; 2449 struct ip_sf_socklist *psl;
2452 2450
2451 ASSERT_RTNL();
2452
2453 psin = (struct sockaddr_in *)&gsf->gf_group; 2453 psin = (struct sockaddr_in *)&gsf->gf_group;
2454 if (psin->sin_family != AF_INET) 2454 if (psin->sin_family != AF_INET)
2455 return -EINVAL; 2455 return -EINVAL;
@@ -2457,8 +2457,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2457 if (!ipv4_is_multicast(addr)) 2457 if (!ipv4_is_multicast(addr))
2458 return -EINVAL; 2458 return -EINVAL;
2459 2459
2460 rtnl_lock();
2461
2462 err = -EADDRNOTAVAIL; 2460 err = -EADDRNOTAVAIL;
2463 2461
2464 for_each_pmc_rtnl(inet, pmc) { 2462 for_each_pmc_rtnl(inet, pmc) {
@@ -2470,7 +2468,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2470 goto done; 2468 goto done;
2471 gsf->gf_fmode = pmc->sfmode; 2469 gsf->gf_fmode = pmc->sfmode;
2472 psl = rtnl_dereference(pmc->sflist); 2470 psl = rtnl_dereference(pmc->sflist);
2473 rtnl_unlock();
2474 count = psl ? psl->sl_count : 0; 2471 count = psl ? psl->sl_count : 0;
2475 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; 2472 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
2476 gsf->gf_numsrc = count; 2473 gsf->gf_numsrc = count;
@@ -2490,7 +2487,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2490 } 2487 }
2491 return 0; 2488 return 0;
2492done: 2489done:
2493 rtnl_unlock();
2494 return err; 2490 return err;
2495} 2491}
2496 2492
@@ -2569,7 +2565,7 @@ void ip_mc_drop_socket(struct sock *sk)
2569} 2565}
2570 2566
2571/* called with rcu_read_lock() */ 2567/* called with rcu_read_lock() */
2572int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) 2568int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u8 proto)
2573{ 2569{
2574 struct ip_mc_list *im; 2570 struct ip_mc_list *im;
2575 struct ip_mc_list __rcu **mc_hash; 2571 struct ip_mc_list __rcu **mc_hash;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7bb9c39e0a4d..46b9c887bede 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -330,14 +330,12 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
330 if (error) 330 if (error)
331 goto out_err; 331 goto out_err;
332 } 332 }
333 req = reqsk_queue_remove(queue); 333 req = reqsk_queue_remove(queue, sk);
334 newsk = req->sk; 334 newsk = req->sk;
335 335
336 sk_acceptq_removed(sk);
337 if (sk->sk_protocol == IPPROTO_TCP && 336 if (sk->sk_protocol == IPPROTO_TCP &&
338 tcp_rsk(req)->tfo_listener && 337 tcp_rsk(req)->tfo_listener) {
339 queue->fastopenq) { 338 spin_lock_bh(&queue->fastopenq.lock);
340 spin_lock_bh(&queue->fastopenq->lock);
341 if (tcp_rsk(req)->tfo_listener) { 339 if (tcp_rsk(req)->tfo_listener) {
342 /* We are still waiting for the final ACK from 3WHS 340 /* We are still waiting for the final ACK from 3WHS
343 * so can't free req now. Instead, we set req->sk to 341 * so can't free req now. Instead, we set req->sk to
@@ -348,7 +346,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
348 req->sk = NULL; 346 req->sk = NULL;
349 req = NULL; 347 req = NULL;
350 } 348 }
351 spin_unlock_bh(&queue->fastopenq->lock); 349 spin_unlock_bh(&queue->fastopenq.lock);
352 } 350 }
353out: 351out:
354 release_sock(sk); 352 release_sock(sk);
@@ -408,7 +406,7 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
408} 406}
409EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 407EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
410 408
411struct dst_entry *inet_csk_route_req(struct sock *sk, 409struct dst_entry *inet_csk_route_req(const struct sock *sk,
412 struct flowi4 *fl4, 410 struct flowi4 *fl4,
413 const struct request_sock *req) 411 const struct request_sock *req)
414{ 412{
@@ -439,7 +437,7 @@ no_route:
439} 437}
440EXPORT_SYMBOL_GPL(inet_csk_route_req); 438EXPORT_SYMBOL_GPL(inet_csk_route_req);
441 439
442struct dst_entry *inet_csk_route_child_sock(struct sock *sk, 440struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
443 struct sock *newsk, 441 struct sock *newsk,
444 const struct request_sock *req) 442 const struct request_sock *req)
445{ 443{
@@ -478,65 +476,12 @@ no_route:
478} 476}
479EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 477EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
480 478
481static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
482 const u32 rnd, const u32 synq_hsize)
483{
484 return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
485}
486
487#if IS_ENABLED(CONFIG_IPV6) 479#if IS_ENABLED(CONFIG_IPV6)
488#define AF_INET_FAMILY(fam) ((fam) == AF_INET) 480#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
489#else 481#else
490#define AF_INET_FAMILY(fam) true 482#define AF_INET_FAMILY(fam) true
491#endif 483#endif
492 484
493/* Note: this is temporary :
494 * req sock will no longer be in listener hash table
495*/
496struct request_sock *inet_csk_search_req(struct sock *sk,
497 const __be16 rport,
498 const __be32 raddr,
499 const __be32 laddr)
500{
501 struct inet_connection_sock *icsk = inet_csk(sk);
502 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
503 struct request_sock *req;
504 u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd,
505 lopt->nr_table_entries);
506
507 spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
508 for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
509 const struct inet_request_sock *ireq = inet_rsk(req);
510
511 if (ireq->ir_rmt_port == rport &&
512 ireq->ir_rmt_addr == raddr &&
513 ireq->ir_loc_addr == laddr &&
514 AF_INET_FAMILY(req->rsk_ops->family)) {
515 atomic_inc(&req->rsk_refcnt);
516 WARN_ON(req->sk);
517 break;
518 }
519 }
520 spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
521
522 return req;
523}
524EXPORT_SYMBOL_GPL(inet_csk_search_req);
525
526void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
527 unsigned long timeout)
528{
529 struct inet_connection_sock *icsk = inet_csk(sk);
530 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
531 const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
532 inet_rsk(req)->ir_rmt_port,
533 lopt->hash_rnd, lopt->nr_table_entries);
534
535 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
536 inet_csk_reqsk_queue_added(sk, timeout);
537}
538EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
539
540/* Only thing we need from tcp.h */ 485/* Only thing we need from tcp.h */
541extern int sysctl_tcp_synack_retries; 486extern int sysctl_tcp_synack_retries;
542 487
@@ -563,7 +508,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
563 req->num_timeout >= rskq_defer_accept - 1; 508 req->num_timeout >= rskq_defer_accept - 1;
564} 509}
565 510
566int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) 511int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
567{ 512{
568 int err = req->rsk_ops->rtx_syn_ack(parent, req); 513 int err = req->rsk_ops->rtx_syn_ack(parent, req);
569 514
@@ -573,26 +518,20 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
573} 518}
574EXPORT_SYMBOL(inet_rtx_syn_ack); 519EXPORT_SYMBOL(inet_rtx_syn_ack);
575 520
576/* return true if req was found in the syn_table[] */ 521/* return true if req was found in the ehash table */
577static bool reqsk_queue_unlink(struct request_sock_queue *queue, 522static bool reqsk_queue_unlink(struct request_sock_queue *queue,
578 struct request_sock *req) 523 struct request_sock *req)
579{ 524{
580 struct listen_sock *lopt = queue->listen_opt; 525 struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
581 struct request_sock **prev;
582 bool found = false; 526 bool found = false;
583 527
584 spin_lock(&queue->syn_wait_lock); 528 if (sk_hashed(req_to_sk(req))) {
529 spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
585 530
586 for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; 531 spin_lock(lock);
587 prev = &(*prev)->dl_next) { 532 found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
588 if (*prev == req) { 533 spin_unlock(lock);
589 *prev = req->dl_next;
590 found = true;
591 break;
592 }
593 } 534 }
594
595 spin_unlock(&queue->syn_wait_lock);
596 if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 535 if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
597 reqsk_put(req); 536 reqsk_put(req);
598 return found; 537 return found;
@@ -607,21 +546,25 @@ void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
607} 546}
608EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 547EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
609 548
549void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
550{
551 inet_csk_reqsk_queue_drop(sk, req);
552 reqsk_put(req);
553}
554EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
555
610static void reqsk_timer_handler(unsigned long data) 556static void reqsk_timer_handler(unsigned long data)
611{ 557{
612 struct request_sock *req = (struct request_sock *)data; 558 struct request_sock *req = (struct request_sock *)data;
613 struct sock *sk_listener = req->rsk_listener; 559 struct sock *sk_listener = req->rsk_listener;
614 struct inet_connection_sock *icsk = inet_csk(sk_listener); 560 struct inet_connection_sock *icsk = inet_csk(sk_listener);
615 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 561 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
616 struct listen_sock *lopt = queue->listen_opt;
617 int qlen, expire = 0, resend = 0; 562 int qlen, expire = 0, resend = 0;
618 int max_retries, thresh; 563 int max_retries, thresh;
619 u8 defer_accept; 564 u8 defer_accept;
620 565
621 if (sk_listener->sk_state != TCP_LISTEN || !lopt) { 566 if (sk_state_load(sk_listener) != TCP_LISTEN)
622 reqsk_put(req); 567 goto drop;
623 return;
624 }
625 568
626 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
627 thresh = max_retries; 570 thresh = max_retries;
@@ -642,9 +585,9 @@ static void reqsk_timer_handler(unsigned long data)
642 * embrions; and abort old ones without pity, if old 585 * embrions; and abort old ones without pity, if old
643 * ones are about to clog our table. 586 * ones are about to clog our table.
644 */ 587 */
645 qlen = listen_sock_qlen(lopt); 588 qlen = reqsk_queue_len(queue);
646 if (qlen >> (lopt->max_qlen_log - 1)) { 589 if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
647 int young = listen_sock_young(lopt) << 1; 590 int young = reqsk_queue_len_young(queue) << 1;
648 591
649 while (thresh > 2) { 592 while (thresh > 2) {
650 if (qlen < young) 593 if (qlen < young)
@@ -666,41 +609,40 @@ static void reqsk_timer_handler(unsigned long data)
666 unsigned long timeo; 609 unsigned long timeo;
667 610
668 if (req->num_timeout++ == 0) 611 if (req->num_timeout++ == 0)
669 atomic_inc(&lopt->young_dec); 612 atomic_dec(&queue->young);
670 timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 613 timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
671 mod_timer_pinned(&req->rsk_timer, jiffies + timeo); 614 mod_timer_pinned(&req->rsk_timer, jiffies + timeo);
672 return; 615 return;
673 } 616 }
674 inet_csk_reqsk_queue_drop(sk_listener, req); 617drop:
675 reqsk_put(req); 618 inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
676} 619}
677 620
678void reqsk_queue_hash_req(struct request_sock_queue *queue, 621static void reqsk_queue_hash_req(struct request_sock *req,
679 u32 hash, struct request_sock *req, 622 unsigned long timeout)
680 unsigned long timeout)
681{ 623{
682 struct listen_sock *lopt = queue->listen_opt;
683
684 req->num_retrans = 0; 624 req->num_retrans = 0;
685 req->num_timeout = 0; 625 req->num_timeout = 0;
686 req->sk = NULL; 626 req->sk = NULL;
687 627
688 setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); 628 setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
689 mod_timer_pinned(&req->rsk_timer, jiffies + timeout); 629 mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
690 req->rsk_hash = hash;
691 630
631 inet_ehash_insert(req_to_sk(req), NULL);
692 /* before letting lookups find us, make sure all req fields 632 /* before letting lookups find us, make sure all req fields
693 * are committed to memory and refcnt initialized. 633 * are committed to memory and refcnt initialized.
694 */ 634 */
695 smp_wmb(); 635 smp_wmb();
696 atomic_set(&req->rsk_refcnt, 2); 636 atomic_set(&req->rsk_refcnt, 2 + 1);
637}
697 638
698 spin_lock(&queue->syn_wait_lock); 639void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
699 req->dl_next = lopt->syn_table[hash]; 640 unsigned long timeout)
700 lopt->syn_table[hash] = req; 641{
701 spin_unlock(&queue->syn_wait_lock); 642 reqsk_queue_hash_req(req, timeout);
643 inet_csk_reqsk_queue_added(sk);
702} 644}
703EXPORT_SYMBOL(reqsk_queue_hash_req); 645EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
704 646
705/** 647/**
706 * inet_csk_clone_lock - clone an inet socket, and lock its clone 648 * inet_csk_clone_lock - clone an inet socket, and lock its clone
@@ -791,16 +733,14 @@ void inet_csk_prepare_forced_close(struct sock *sk)
791} 733}
792EXPORT_SYMBOL(inet_csk_prepare_forced_close); 734EXPORT_SYMBOL(inet_csk_prepare_forced_close);
793 735
794int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 736int inet_csk_listen_start(struct sock *sk, int backlog)
795{ 737{
796 struct inet_sock *inet = inet_sk(sk);
797 struct inet_connection_sock *icsk = inet_csk(sk); 738 struct inet_connection_sock *icsk = inet_csk(sk);
798 int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); 739 struct inet_sock *inet = inet_sk(sk);
799 740
800 if (rc != 0) 741 reqsk_queue_alloc(&icsk->icsk_accept_queue);
801 return rc;
802 742
803 sk->sk_max_ack_backlog = 0; 743 sk->sk_max_ack_backlog = backlog;
804 sk->sk_ack_backlog = 0; 744 sk->sk_ack_backlog = 0;
805 inet_csk_delack_init(sk); 745 inet_csk_delack_init(sk);
806 746
@@ -809,7 +749,7 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
809 * It is OK, because this socket enters to hash table only 749 * It is OK, because this socket enters to hash table only
810 * after validation is complete. 750 * after validation is complete.
811 */ 751 */
812 sk->sk_state = TCP_LISTEN; 752 sk_state_store(sk, TCP_LISTEN);
813 if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 753 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
814 inet->inet_sport = htons(inet->inet_num); 754 inet->inet_sport = htons(inet->inet_num);
815 755
@@ -820,11 +760,76 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
820 } 760 }
821 761
822 sk->sk_state = TCP_CLOSE; 762 sk->sk_state = TCP_CLOSE;
823 __reqsk_queue_destroy(&icsk->icsk_accept_queue);
824 return -EADDRINUSE; 763 return -EADDRINUSE;
825} 764}
826EXPORT_SYMBOL_GPL(inet_csk_listen_start); 765EXPORT_SYMBOL_GPL(inet_csk_listen_start);
827 766
767static void inet_child_forget(struct sock *sk, struct request_sock *req,
768 struct sock *child)
769{
770 sk->sk_prot->disconnect(child, O_NONBLOCK);
771
772 sock_orphan(child);
773
774 percpu_counter_inc(sk->sk_prot->orphan_count);
775
776 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
777 BUG_ON(tcp_sk(child)->fastopen_rsk != req);
778 BUG_ON(sk != req->rsk_listener);
779
780 /* Paranoid, to prevent race condition if
781 * an inbound pkt destined for child is
782 * blocked by sock lock in tcp_v4_rcv().
783 * Also to satisfy an assertion in
784 * tcp_v4_destroy_sock().
785 */
786 tcp_sk(child)->fastopen_rsk = NULL;
787 }
788 inet_csk_destroy_sock(child);
789 reqsk_put(req);
790}
791
792void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
793 struct sock *child)
794{
795 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
796
797 spin_lock(&queue->rskq_lock);
798 if (unlikely(sk->sk_state != TCP_LISTEN)) {
799 inet_child_forget(sk, req, child);
800 } else {
801 req->sk = child;
802 req->dl_next = NULL;
803 if (queue->rskq_accept_head == NULL)
804 queue->rskq_accept_head = req;
805 else
806 queue->rskq_accept_tail->dl_next = req;
807 queue->rskq_accept_tail = req;
808 sk_acceptq_added(sk);
809 }
810 spin_unlock(&queue->rskq_lock);
811}
812EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
813
814struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
815 struct request_sock *req, bool own_req)
816{
817 if (own_req) {
818 inet_csk_reqsk_queue_drop(sk, req);
819 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
820 inet_csk_reqsk_queue_add(sk, req, child);
821 /* Warning: caller must not call reqsk_put(req);
822 * child stole last reference on it.
823 */
824 return child;
825 }
826 /* Too bad, another child took ownership of the request, undo. */
827 bh_unlock_sock(child);
828 sock_put(child);
829 return NULL;
830}
831EXPORT_SYMBOL(inet_csk_complete_hashdance);
832
828/* 833/*
829 * This routine closes sockets which have been at least partially 834 * This routine closes sockets which have been at least partially
830 * opened, but not yet accepted. 835 * opened, but not yet accepted.
@@ -833,11 +838,7 @@ void inet_csk_listen_stop(struct sock *sk)
833{ 838{
834 struct inet_connection_sock *icsk = inet_csk(sk); 839 struct inet_connection_sock *icsk = inet_csk(sk);
835 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 840 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
836 struct request_sock *acc_req; 841 struct request_sock *next, *req;
837 struct request_sock *req;
838
839 /* make all the listen_opt local to us */
840 acc_req = reqsk_queue_yank_acceptq(queue);
841 842
842 /* Following specs, it would be better either to send FIN 843 /* Following specs, it would be better either to send FIN
843 * (and enter FIN-WAIT-1, it is normal close) 844 * (and enter FIN-WAIT-1, it is normal close)
@@ -847,57 +848,34 @@ void inet_csk_listen_stop(struct sock *sk)
847 * To be honest, we are not able to make either 848 * To be honest, we are not able to make either
848 * of the variants now. --ANK 849 * of the variants now. --ANK
849 */ 850 */
850 reqsk_queue_destroy(queue); 851 while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
851
852 while ((req = acc_req) != NULL) {
853 struct sock *child = req->sk; 852 struct sock *child = req->sk;
854 853
855 acc_req = req->dl_next;
856
857 local_bh_disable(); 854 local_bh_disable();
858 bh_lock_sock(child); 855 bh_lock_sock(child);
859 WARN_ON(sock_owned_by_user(child)); 856 WARN_ON(sock_owned_by_user(child));
860 sock_hold(child); 857 sock_hold(child);
861 858
862 sk->sk_prot->disconnect(child, O_NONBLOCK); 859 inet_child_forget(sk, req, child);
863
864 sock_orphan(child);
865
866 percpu_counter_inc(sk->sk_prot->orphan_count);
867
868 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
869 BUG_ON(tcp_sk(child)->fastopen_rsk != req);
870 BUG_ON(sk != req->rsk_listener);
871
872 /* Paranoid, to prevent race condition if
873 * an inbound pkt destined for child is
874 * blocked by sock lock in tcp_v4_rcv().
875 * Also to satisfy an assertion in
876 * tcp_v4_destroy_sock().
877 */
878 tcp_sk(child)->fastopen_rsk = NULL;
879 }
880 inet_csk_destroy_sock(child);
881
882 bh_unlock_sock(child); 860 bh_unlock_sock(child);
883 local_bh_enable(); 861 local_bh_enable();
884 sock_put(child); 862 sock_put(child);
885 863
886 sk_acceptq_removed(sk); 864 cond_resched();
887 reqsk_put(req);
888 } 865 }
889 if (queue->fastopenq) { 866 if (queue->fastopenq.rskq_rst_head) {
890 /* Free all the reqs queued in rskq_rst_head. */ 867 /* Free all the reqs queued in rskq_rst_head. */
891 spin_lock_bh(&queue->fastopenq->lock); 868 spin_lock_bh(&queue->fastopenq.lock);
892 acc_req = queue->fastopenq->rskq_rst_head; 869 req = queue->fastopenq.rskq_rst_head;
893 queue->fastopenq->rskq_rst_head = NULL; 870 queue->fastopenq.rskq_rst_head = NULL;
894 spin_unlock_bh(&queue->fastopenq->lock); 871 spin_unlock_bh(&queue->fastopenq.lock);
895 while ((req = acc_req) != NULL) { 872 while (req != NULL) {
896 acc_req = req->dl_next; 873 next = req->dl_next;
897 reqsk_put(req); 874 reqsk_put(req);
875 req = next;
898 } 876 }
899 } 877 }
900 WARN_ON(sk->sk_ack_backlog); 878 WARN_ON_ONCE(sk->sk_ack_backlog);
901} 879}
902EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 880EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
903 881
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c3b1f3a0f4cf..ab9f8a66615d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -730,91 +730,21 @@ static void twsk_build_assert(void)
730#endif 730#endif
731} 731}
732 732
733static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
734 struct netlink_callback *cb,
735 const struct inet_diag_req_v2 *r,
736 const struct nlattr *bc)
737{
738 struct inet_connection_sock *icsk = inet_csk(sk);
739 struct inet_sock *inet = inet_sk(sk);
740 struct inet_diag_entry entry;
741 int j, s_j, reqnum, s_reqnum;
742 struct listen_sock *lopt;
743 int err = 0;
744
745 s_j = cb->args[3];
746 s_reqnum = cb->args[4];
747
748 if (s_j > 0)
749 s_j--;
750
751 entry.family = sk->sk_family;
752
753 spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
754
755 lopt = icsk->icsk_accept_queue.listen_opt;
756 if (!lopt || !listen_sock_qlen(lopt))
757 goto out;
758
759 if (bc) {
760 entry.sport = inet->inet_num;
761 entry.userlocks = sk->sk_userlocks;
762 }
763
764 for (j = s_j; j < lopt->nr_table_entries; j++) {
765 struct request_sock *req, *head = lopt->syn_table[j];
766
767 reqnum = 0;
768 for (req = head; req; reqnum++, req = req->dl_next) {
769 struct inet_request_sock *ireq = inet_rsk(req);
770
771 if (reqnum < s_reqnum)
772 continue;
773 if (r->id.idiag_dport != ireq->ir_rmt_port &&
774 r->id.idiag_dport)
775 continue;
776
777 if (bc) {
778 /* Note: entry.sport and entry.userlocks are already set */
779 entry_fill_addrs(&entry, req_to_sk(req));
780 entry.dport = ntohs(ireq->ir_rmt_port);
781
782 if (!inet_diag_bc_run(bc, &entry))
783 continue;
784 }
785
786 err = inet_req_diag_fill(req_to_sk(req), skb,
787 NETLINK_CB(cb->skb).portid,
788 cb->nlh->nlmsg_seq,
789 NLM_F_MULTI, cb->nlh);
790 if (err < 0) {
791 cb->args[3] = j + 1;
792 cb->args[4] = reqnum;
793 goto out;
794 }
795 }
796
797 s_reqnum = 0;
798 }
799
800out:
801 spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
802
803 return err;
804}
805
806void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, 733void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
807 struct netlink_callback *cb, 734 struct netlink_callback *cb,
808 const struct inet_diag_req_v2 *r, struct nlattr *bc) 735 const struct inet_diag_req_v2 *r, struct nlattr *bc)
809{ 736{
810 struct net *net = sock_net(skb->sk); 737 struct net *net = sock_net(skb->sk);
811 int i, num, s_i, s_num; 738 int i, num, s_i, s_num;
739 u32 idiag_states = r->idiag_states;
812 740
741 if (idiag_states & TCPF_SYN_RECV)
742 idiag_states |= TCPF_NEW_SYN_RECV;
813 s_i = cb->args[1]; 743 s_i = cb->args[1];
814 s_num = num = cb->args[2]; 744 s_num = num = cb->args[2];
815 745
816 if (cb->args[0] == 0) { 746 if (cb->args[0] == 0) {
817 if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) 747 if (!(idiag_states & TCPF_LISTEN))
818 goto skip_listen_ht; 748 goto skip_listen_ht;
819 749
820 for (i = s_i; i < INET_LHTABLE_SIZE; i++) { 750 for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
@@ -844,21 +774,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
844 r->id.idiag_sport) 774 r->id.idiag_sport)
845 goto next_listen; 775 goto next_listen;
846 776
847 if (!(r->idiag_states & TCPF_LISTEN) || 777 if (r->id.idiag_dport ||
848 r->id.idiag_dport ||
849 cb->args[3] > 0) 778 cb->args[3] > 0)
850 goto syn_recv;
851
852 if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
853 spin_unlock_bh(&ilb->lock);
854 goto done;
855 }
856
857syn_recv:
858 if (!(r->idiag_states & TCPF_SYN_RECV))
859 goto next_listen; 779 goto next_listen;
860 780
861 if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) { 781 if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
862 spin_unlock_bh(&ilb->lock); 782 spin_unlock_bh(&ilb->lock);
863 goto done; 783 goto done;
864 } 784 }
@@ -879,7 +799,7 @@ skip_listen_ht:
879 s_i = num = s_num = 0; 799 s_i = num = s_num = 0;
880 } 800 }
881 801
882 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) 802 if (!(idiag_states & ~TCPF_LISTEN))
883 goto out; 803 goto out;
884 804
885 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 805 for (i = s_i; i <= hashinfo->ehash_mask; i++) {
@@ -906,7 +826,7 @@ skip_listen_ht:
906 goto next_normal; 826 goto next_normal;
907 state = (sk->sk_state == TCP_TIME_WAIT) ? 827 state = (sk->sk_state == TCP_TIME_WAIT) ?
908 inet_twsk(sk)->tw_substate : sk->sk_state; 828 inet_twsk(sk)->tw_substate : sk->sk_state;
909 if (!(r->idiag_states & (1 << state))) 829 if (!(idiag_states & (1 << state)))
910 goto next_normal; 830 goto next_normal;
911 if (r->sdiag_family != AF_UNSPEC && 831 if (r->sdiag_family != AF_UNSPEC &&
912 sk->sk_family != r->sdiag_family) 832 sk->sk_family != r->sdiag_family)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index d0a7c0319e3d..fe144dae7372 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -209,12 +209,6 @@ int inet_frags_init(struct inet_frags *f)
209} 209}
210EXPORT_SYMBOL(inet_frags_init); 210EXPORT_SYMBOL(inet_frags_init);
211 211
212void inet_frags_init_net(struct netns_frags *nf)
213{
214 init_frag_mem_limit(nf);
215}
216EXPORT_SYMBOL(inet_frags_init_net);
217
218void inet_frags_fini(struct inet_frags *f) 212void inet_frags_fini(struct inet_frags *f)
219{ 213{
220 cancel_work_sync(&f->frags_work); 214 cancel_work_sync(&f->frags_work);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 89120196a949..ccc5980797fc 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -126,7 +126,7 @@ void inet_put_port(struct sock *sk)
126} 126}
127EXPORT_SYMBOL(inet_put_port); 127EXPORT_SYMBOL(inet_put_port);
128 128
129int __inet_inherit_port(struct sock *sk, struct sock *child) 129int __inet_inherit_port(const struct sock *sk, struct sock *child)
130{ 130{
131 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 131 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
132 unsigned short port = inet_sk(child)->inet_num; 132 unsigned short port = inet_sk(child)->inet_num;
@@ -137,6 +137,10 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
137 137
138 spin_lock(&head->lock); 138 spin_lock(&head->lock);
139 tb = inet_csk(sk)->icsk_bind_hash; 139 tb = inet_csk(sk)->icsk_bind_hash;
140 if (unlikely(!tb)) {
141 spin_unlock(&head->lock);
142 return -ENOENT;
143 }
140 if (tb->port != port) { 144 if (tb->port != port) {
141 /* NOTE: using tproxy and redirecting skbs to a proxy 145 /* NOTE: using tproxy and redirecting skbs to a proxy
142 * on a different listener port breaks the assumption 146 * on a different listener port breaks the assumption
@@ -185,6 +189,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
185 return -1; 189 return -1;
186 score += 4; 190 score += 4;
187 } 191 }
192 if (sk->sk_incoming_cpu == raw_smp_processor_id())
193 score++;
188 } 194 }
189 return score; 195 return score;
190} 196}
@@ -398,14 +404,18 @@ static u32 inet_sk_port_offset(const struct sock *sk)
398 inet->inet_dport); 404 inet->inet_dport);
399} 405}
400 406
401void __inet_hash_nolisten(struct sock *sk, struct sock *osk) 407/* insert a socket into ehash, and eventually remove another one
408 * (The another one can be a SYN_RECV or TIMEWAIT
409 */
410bool inet_ehash_insert(struct sock *sk, struct sock *osk)
402{ 411{
403 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 412 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
404 struct hlist_nulls_head *list; 413 struct hlist_nulls_head *list;
405 struct inet_ehash_bucket *head; 414 struct inet_ehash_bucket *head;
406 spinlock_t *lock; 415 spinlock_t *lock;
416 bool ret = true;
407 417
408 WARN_ON(!sk_unhashed(sk)); 418 WARN_ON_ONCE(!sk_unhashed(sk));
409 419
410 sk->sk_hash = sk_ehashfn(sk); 420 sk->sk_hash = sk_ehashfn(sk);
411 head = inet_ehash_bucket(hashinfo, sk->sk_hash); 421 head = inet_ehash_bucket(hashinfo, sk->sk_hash);
@@ -413,24 +423,41 @@ void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
413 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 423 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
414 424
415 spin_lock(lock); 425 spin_lock(lock);
416 __sk_nulls_add_node_rcu(sk, list);
417 if (osk) { 426 if (osk) {
418 WARN_ON(sk->sk_hash != osk->sk_hash); 427 WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
419 sk_nulls_del_node_init_rcu(osk); 428 ret = sk_nulls_del_node_init_rcu(osk);
420 } 429 }
430 if (ret)
431 __sk_nulls_add_node_rcu(sk, list);
421 spin_unlock(lock); 432 spin_unlock(lock);
422 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 433 return ret;
434}
435
436bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
437{
438 bool ok = inet_ehash_insert(sk, osk);
439
440 if (ok) {
441 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
442 } else {
443 percpu_counter_inc(sk->sk_prot->orphan_count);
444 sk->sk_state = TCP_CLOSE;
445 sock_set_flag(sk, SOCK_DEAD);
446 inet_csk_destroy_sock(sk);
447 }
448 return ok;
423} 449}
424EXPORT_SYMBOL_GPL(__inet_hash_nolisten); 450EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
425 451
426void __inet_hash(struct sock *sk, struct sock *osk) 452void __inet_hash(struct sock *sk, struct sock *osk)
427{ 453{
428 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 454 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
429 struct inet_listen_hashbucket *ilb; 455 struct inet_listen_hashbucket *ilb;
430 456
431 if (sk->sk_state != TCP_LISTEN) 457 if (sk->sk_state != TCP_LISTEN) {
432 return __inet_hash_nolisten(sk, osk); 458 inet_ehash_nolisten(sk, osk);
433 459 return;
460 }
434 WARN_ON(!sk_unhashed(sk)); 461 WARN_ON(!sk_unhashed(sk));
435 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; 462 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
436 463
@@ -551,7 +578,7 @@ ok:
551 inet_bind_hash(sk, tb, port); 578 inet_bind_hash(sk, tb, port);
552 if (sk_unhashed(sk)) { 579 if (sk_unhashed(sk)) {
553 inet_sk(sk)->inet_sport = htons(port); 580 inet_sk(sk)->inet_sport = htons(port);
554 __inet_hash_nolisten(sk, (struct sock *)tw); 581 inet_ehash_nolisten(sk, (struct sock *)tw);
555 } 582 }
556 if (tw) 583 if (tw)
557 inet_twsk_bind_unhash(tw, hinfo); 584 inet_twsk_bind_unhash(tw, hinfo);
@@ -568,7 +595,7 @@ ok:
568 tb = inet_csk(sk)->icsk_bind_hash; 595 tb = inet_csk(sk)->icsk_bind_hash;
569 spin_lock_bh(&head->lock); 596 spin_lock_bh(&head->lock);
570 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 597 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
571 __inet_hash_nolisten(sk, NULL); 598 inet_ehash_nolisten(sk, NULL);
572 spin_unlock_bh(&head->lock); 599 spin_unlock_bh(&head->lock);
573 return 0; 600 return 0;
574 } else { 601 } else {
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 2d3aa408fbdc..da0d7ce85844 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -61,18 +61,18 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
61} 61}
62 62
63 63
64static int ip_forward_finish(struct sock *sk, struct sk_buff *skb) 64static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
65{ 65{
66 struct ip_options *opt = &(IPCB(skb)->opt); 66 struct ip_options *opt = &(IPCB(skb)->opt);
67 67
68 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 68 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
69 IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); 69 IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
70 70
71 if (unlikely(opt->optlen)) 71 if (unlikely(opt->optlen))
72 ip_forward_options(skb); 72 ip_forward_options(skb);
73 73
74 skb_sender_cpu_clear(skb); 74 skb_sender_cpu_clear(skb);
75 return dst_output_sk(sk, skb); 75 return dst_output(net, sk, skb);
76} 76}
77 77
78int ip_forward(struct sk_buff *skb) 78int ip_forward(struct sk_buff *skb)
@@ -81,6 +81,7 @@ int ip_forward(struct sk_buff *skb)
81 struct iphdr *iph; /* Our header */ 81 struct iphdr *iph; /* Our header */
82 struct rtable *rt; /* Route we use */ 82 struct rtable *rt; /* Route we use */
83 struct ip_options *opt = &(IPCB(skb)->opt); 83 struct ip_options *opt = &(IPCB(skb)->opt);
84 struct net *net;
84 85
85 /* that should never happen */ 86 /* that should never happen */
86 if (skb->pkt_type != PACKET_HOST) 87 if (skb->pkt_type != PACKET_HOST)
@@ -99,6 +100,7 @@ int ip_forward(struct sk_buff *skb)
99 return NET_RX_SUCCESS; 100 return NET_RX_SUCCESS;
100 101
101 skb_forward_csum(skb); 102 skb_forward_csum(skb);
103 net = dev_net(skb->dev);
102 104
103 /* 105 /*
104 * According to the RFC, we must first decrease the TTL field. If 106 * According to the RFC, we must first decrease the TTL field. If
@@ -119,7 +121,7 @@ int ip_forward(struct sk_buff *skb)
119 IPCB(skb)->flags |= IPSKB_FORWARDED; 121 IPCB(skb)->flags |= IPSKB_FORWARDED;
120 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); 122 mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
121 if (ip_exceeds_mtu(skb, mtu)) { 123 if (ip_exceeds_mtu(skb, mtu)) {
122 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); 124 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
123 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 125 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
124 htonl(mtu)); 126 htonl(mtu));
125 goto drop; 127 goto drop;
@@ -143,8 +145,9 @@ int ip_forward(struct sk_buff *skb)
143 145
144 skb->priority = rt_tos2priority(iph->tos); 146 skb->priority = rt_tos2priority(iph->tos);
145 147
146 return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, 148 return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
147 skb->dev, rt->dst.dev, ip_forward_finish); 149 net, NULL, skb, skb->dev, rt->dst.dev,
150 ip_forward_finish);
148 151
149sr_failed: 152sr_failed:
150 /* 153 /*
@@ -155,7 +158,7 @@ sr_failed:
155 158
156too_many_hops: 159too_many_hops:
157 /* Tell the sender its packet died... */ 160 /* Tell the sender its packet died... */
158 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS); 161 IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
159 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); 162 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
160drop: 163drop:
161 kfree_skb(skb); 164 kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fa7f15305f9a..1fe55ae81781 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -48,7 +48,7 @@
48#include <linux/inet.h> 48#include <linux/inet.h>
49#include <linux/netfilter_ipv4.h> 49#include <linux/netfilter_ipv4.h>
50#include <net/inet_ecn.h> 50#include <net/inet_ecn.h>
51#include <net/vrf.h> 51#include <net/l3mdev.h>
52 52
53/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 53/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
54 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c 54 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -78,7 +78,7 @@ struct ipq {
78 u8 ecn; /* RFC3168 support */ 78 u8 ecn; /* RFC3168 support */
79 u16 max_df_size; /* largest frag with DF set seen */ 79 u16 max_df_size; /* largest frag with DF set seen */
80 int iif; 80 int iif;
81 int vif; /* VRF device index */ 81 int vif; /* L3 master device index */
82 unsigned int rid; 82 unsigned int rid;
83 struct inet_peer *peer; 83 struct inet_peer *peer;
84}; 84};
@@ -654,11 +654,10 @@ out_fail:
654} 654}
655 655
656/* Process an incoming IP datagram fragment. */ 656/* Process an incoming IP datagram fragment. */
657int ip_defrag(struct sk_buff *skb, u32 user) 657int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
658{ 658{
659 struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; 659 struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
660 int vif = vrf_master_ifindex_rcu(dev); 660 int vif = l3mdev_master_ifindex_rcu(dev);
661 struct net *net = dev_net(dev);
662 struct ipq *qp; 661 struct ipq *qp;
663 662
664 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); 663 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
@@ -683,7 +682,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
683} 682}
684EXPORT_SYMBOL(ip_defrag); 683EXPORT_SYMBOL(ip_defrag);
685 684
686struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) 685struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
687{ 686{
688 struct iphdr iph; 687 struct iphdr iph;
689 int netoff; 688 int netoff;
@@ -712,7 +711,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
712 if (pskb_trim_rcsum(skb, netoff + len)) 711 if (pskb_trim_rcsum(skb, netoff + len))
713 return skb; 712 return skb;
714 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 713 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
715 if (ip_defrag(skb, user)) 714 if (ip_defrag(net, skb, user))
716 return NULL; 715 return NULL;
717 skb_clear_hash(skb); 716 skb_clear_hash(skb);
718 } 717 }
@@ -840,6 +839,8 @@ static void __init ip4_frags_ctl_register(void)
840 839
841static int __net_init ipv4_frags_init_net(struct net *net) 840static int __net_init ipv4_frags_init_net(struct net *net)
842{ 841{
842 int res;
843
843 /* Fragment cache limits. 844 /* Fragment cache limits.
844 * 845 *
845 * The fragment memory accounting code, (tries to) account for 846 * The fragment memory accounting code, (tries to) account for
@@ -863,9 +864,13 @@ static int __net_init ipv4_frags_init_net(struct net *net)
863 */ 864 */
864 net->ipv4.frags.timeout = IP_FRAG_TIME; 865 net->ipv4.frags.timeout = IP_FRAG_TIME;
865 866
866 inet_frags_init_net(&net->ipv4.frags); 867 res = inet_frags_init_net(&net->ipv4.frags);
867 868 if (res)
868 return ip4_frags_ns_ctl_register(net); 869 return res;
870 res = ip4_frags_ns_ctl_register(net);
871 if (res)
872 inet_frags_uninit_net(&net->ipv4.frags);
873 return res;
869} 874}
870 875
871static void __net_exit ipv4_frags_exit_net(struct net *net) 876static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bd0679d90519..614521437e30 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
498 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 498 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
499} 499}
500 500
501static struct rtable *gre_get_rt(struct sk_buff *skb,
502 struct net_device *dev,
503 struct flowi4 *fl,
504 const struct ip_tunnel_key *key)
505{
506 struct net *net = dev_net(dev);
507
508 memset(fl, 0, sizeof(*fl));
509 fl->daddr = key->u.ipv4.dst;
510 fl->saddr = key->u.ipv4.src;
511 fl->flowi4_tos = RT_TOS(key->tos);
512 fl->flowi4_mark = skb->mark;
513 fl->flowi4_proto = IPPROTO_GRE;
514
515 return ip_route_output_key(net, fl);
516}
517
501static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) 518static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
502{ 519{
503 struct ip_tunnel_info *tun_info; 520 struct ip_tunnel_info *tun_info;
504 struct net *net = dev_net(dev);
505 const struct ip_tunnel_key *key; 521 const struct ip_tunnel_key *key;
506 struct flowi4 fl; 522 struct flowi4 fl;
507 struct rtable *rt; 523 struct rtable *rt;
@@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
516 goto err_free_skb; 532 goto err_free_skb;
517 533
518 key = &tun_info->key; 534 key = &tun_info->key;
519 memset(&fl, 0, sizeof(fl)); 535 rt = gre_get_rt(skb, dev, &fl, key);
520 fl.daddr = key->u.ipv4.dst;
521 fl.saddr = key->u.ipv4.src;
522 fl.flowi4_tos = RT_TOS(key->tos);
523 fl.flowi4_mark = skb->mark;
524 fl.flowi4_proto = IPPROTO_GRE;
525
526 rt = ip_route_output_key(net, &fl);
527 if (IS_ERR(rt)) 536 if (IS_ERR(rt))
528 goto err_free_skb; 537 goto err_free_skb;
529 538
@@ -566,6 +575,24 @@ err_free_skb:
566 dev->stats.tx_dropped++; 575 dev->stats.tx_dropped++;
567} 576}
568 577
578static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
579{
580 struct ip_tunnel_info *info = skb_tunnel_info(skb);
581 struct rtable *rt;
582 struct flowi4 fl4;
583
584 if (ip_tunnel_info_af(info) != AF_INET)
585 return -EINVAL;
586
587 rt = gre_get_rt(skb, dev, &fl4, &info->key);
588 if (IS_ERR(rt))
589 return PTR_ERR(rt);
590
591 ip_rt_put(rt);
592 info->key.u.ipv4.src = fl4.saddr;
593 return 0;
594}
595
569static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 596static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
570 struct net_device *dev) 597 struct net_device *dev)
571{ 598{
@@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
1023 .ndo_change_mtu = ip_tunnel_change_mtu, 1050 .ndo_change_mtu = ip_tunnel_change_mtu,
1024 .ndo_get_stats64 = ip_tunnel_get_stats64, 1051 .ndo_get_stats64 = ip_tunnel_get_stats64,
1025 .ndo_get_iflink = ip_tunnel_get_iflink, 1052 .ndo_get_iflink = ip_tunnel_get_iflink,
1053 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1026}; 1054};
1027 1055
1028static void ipgre_tap_setup(struct net_device *dev) 1056static void ipgre_tap_setup(struct net_device *dev)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f4fc8a77aaa7..b1209b63381f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -157,6 +157,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
157 u8 protocol = ip_hdr(skb)->protocol; 157 u8 protocol = ip_hdr(skb)->protocol;
158 struct sock *last = NULL; 158 struct sock *last = NULL;
159 struct net_device *dev = skb->dev; 159 struct net_device *dev = skb->dev;
160 struct net *net = dev_net(dev);
160 161
161 for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { 162 for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
162 struct sock *sk = ra->sk; 163 struct sock *sk = ra->sk;
@@ -167,9 +168,9 @@ bool ip_call_ra_chain(struct sk_buff *skb)
167 if (sk && inet_sk(sk)->inet_num == protocol && 168 if (sk && inet_sk(sk)->inet_num == protocol &&
168 (!sk->sk_bound_dev_if || 169 (!sk->sk_bound_dev_if ||
169 sk->sk_bound_dev_if == dev->ifindex) && 170 sk->sk_bound_dev_if == dev->ifindex) &&
170 net_eq(sock_net(sk), dev_net(dev))) { 171 net_eq(sock_net(sk), net)) {
171 if (ip_is_fragment(ip_hdr(skb))) { 172 if (ip_is_fragment(ip_hdr(skb))) {
172 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) 173 if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
173 return true; 174 return true;
174 } 175 }
175 if (last) { 176 if (last) {
@@ -188,10 +189,8 @@ bool ip_call_ra_chain(struct sk_buff *skb)
188 return false; 189 return false;
189} 190}
190 191
191static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb) 192static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
192{ 193{
193 struct net *net = dev_net(skb->dev);
194
195 __skb_pull(skb, skb_network_header_len(skb)); 194 __skb_pull(skb, skb_network_header_len(skb));
196 195
197 rcu_read_lock(); 196 rcu_read_lock();
@@ -248,14 +247,15 @@ int ip_local_deliver(struct sk_buff *skb)
248 /* 247 /*
249 * Reassemble IP fragments. 248 * Reassemble IP fragments.
250 */ 249 */
250 struct net *net = dev_net(skb->dev);
251 251
252 if (ip_is_fragment(ip_hdr(skb))) { 252 if (ip_is_fragment(ip_hdr(skb))) {
253 if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) 253 if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
254 return 0; 254 return 0;
255 } 255 }
256 256
257 return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb, 257 return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
258 skb->dev, NULL, 258 net, NULL, skb, skb->dev, NULL,
259 ip_local_deliver_finish); 259 ip_local_deliver_finish);
260} 260}
261 261
@@ -311,7 +311,7 @@ drop:
311int sysctl_ip_early_demux __read_mostly = 1; 311int sysctl_ip_early_demux __read_mostly = 1;
312EXPORT_SYMBOL(sysctl_ip_early_demux); 312EXPORT_SYMBOL(sysctl_ip_early_demux);
313 313
314static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb) 314static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
315{ 315{
316 const struct iphdr *iph = ip_hdr(skb); 316 const struct iphdr *iph = ip_hdr(skb);
317 struct rtable *rt; 317 struct rtable *rt;
@@ -337,8 +337,7 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
337 iph->tos, skb->dev); 337 iph->tos, skb->dev);
338 if (unlikely(err)) { 338 if (unlikely(err)) {
339 if (err == -EXDEV) 339 if (err == -EXDEV)
340 NET_INC_STATS_BH(dev_net(skb->dev), 340 NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER);
341 LINUX_MIB_IPRPFILTER);
342 goto drop; 341 goto drop;
343 } 342 }
344 } 343 }
@@ -359,11 +358,9 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
359 358
360 rt = skb_rtable(skb); 359 rt = skb_rtable(skb);
361 if (rt->rt_type == RTN_MULTICAST) { 360 if (rt->rt_type == RTN_MULTICAST) {
362 IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, 361 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
363 skb->len);
364 } else if (rt->rt_type == RTN_BROADCAST) 362 } else if (rt->rt_type == RTN_BROADCAST)
365 IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, 363 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
366 skb->len);
367 364
368 return dst_input(skb); 365 return dst_input(skb);
369 366
@@ -378,6 +375,7 @@ drop:
378int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 375int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
379{ 376{
380 const struct iphdr *iph; 377 const struct iphdr *iph;
378 struct net *net;
381 u32 len; 379 u32 len;
382 380
383 /* When the interface is in promisc. mode, drop all the crap 381 /* When the interface is in promisc. mode, drop all the crap
@@ -387,11 +385,12 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
387 goto drop; 385 goto drop;
388 386
389 387
390 IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); 388 net = dev_net(dev);
389 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
391 390
392 skb = skb_share_check(skb, GFP_ATOMIC); 391 skb = skb_share_check(skb, GFP_ATOMIC);
393 if (!skb) { 392 if (!skb) {
394 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); 393 IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
395 goto out; 394 goto out;
396 } 395 }
397 396
@@ -417,7 +416,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
417 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); 416 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
418 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); 417 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
419 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); 418 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
420 IP_ADD_STATS_BH(dev_net(dev), 419 IP_ADD_STATS_BH(net,
421 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), 420 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
422 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); 421 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
423 422
@@ -431,7 +430,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
431 430
432 len = ntohs(iph->tot_len); 431 len = ntohs(iph->tot_len);
433 if (skb->len < len) { 432 if (skb->len < len) {
434 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); 433 IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
435 goto drop; 434 goto drop;
436 } else if (len < (iph->ihl*4)) 435 } else if (len < (iph->ihl*4))
437 goto inhdr_error; 436 goto inhdr_error;
@@ -441,7 +440,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
441 * Note this now means skb->len holds ntohs(iph->tot_len). 440 * Note this now means skb->len holds ntohs(iph->tot_len).
442 */ 441 */
443 if (pskb_trim_rcsum(skb, len)) { 442 if (pskb_trim_rcsum(skb, len)) {
444 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); 443 IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
445 goto drop; 444 goto drop;
446 } 445 }
447 446
@@ -453,14 +452,14 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
453 /* Must drop socket now because of tproxy. */ 452 /* Must drop socket now because of tproxy. */
454 skb_orphan(skb); 453 skb_orphan(skb);
455 454
456 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, 455 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
457 dev, NULL, 456 net, NULL, skb, dev, NULL,
458 ip_rcv_finish); 457 ip_rcv_finish);
459 458
460csum_error: 459csum_error:
461 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS); 460 IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS);
462inhdr_error: 461inhdr_error:
463 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); 462 IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
464drop: 463drop:
465 kfree_skb(skb); 464 kfree_skb(skb);
466out: 465out:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0138fada0951..4233cbe47052 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,9 +83,10 @@
83int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; 83int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
84EXPORT_SYMBOL(sysctl_ip_default_ttl); 84EXPORT_SYMBOL(sysctl_ip_default_ttl);
85 85
86static int ip_fragment(struct sock *sk, struct sk_buff *skb, 86static int
87 unsigned int mtu, 87ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
88 int (*output)(struct sock *, struct sk_buff *)); 88 unsigned int mtu,
89 int (*output)(struct net *, struct sock *, struct sk_buff *));
89 90
90/* Generate a checksum for an outgoing IP datagram. */ 91/* Generate a checksum for an outgoing IP datagram. */
91void ip_send_check(struct iphdr *iph) 92void ip_send_check(struct iphdr *iph)
@@ -95,32 +96,28 @@ void ip_send_check(struct iphdr *iph)
95} 96}
96EXPORT_SYMBOL(ip_send_check); 97EXPORT_SYMBOL(ip_send_check);
97 98
98static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb) 99int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
99{ 100{
100 struct iphdr *iph = ip_hdr(skb); 101 struct iphdr *iph = ip_hdr(skb);
101 102
102 iph->tot_len = htons(skb->len); 103 iph->tot_len = htons(skb->len);
103 ip_send_check(iph); 104 ip_send_check(iph);
104 return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL, 105 return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
105 skb_dst(skb)->dev, dst_output_sk); 106 net, sk, skb, NULL, skb_dst(skb)->dev,
107 dst_output);
106} 108}
107 109
108int __ip_local_out(struct sk_buff *skb) 110int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
109{
110 return __ip_local_out_sk(skb->sk, skb);
111}
112
113int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
114{ 111{
115 int err; 112 int err;
116 113
117 err = __ip_local_out(skb); 114 err = __ip_local_out(net, sk, skb);
118 if (likely(err == 1)) 115 if (likely(err == 1))
119 err = dst_output_sk(sk, skb); 116 err = dst_output(net, sk, skb);
120 117
121 return err; 118 return err;
122} 119}
123EXPORT_SYMBOL_GPL(ip_local_out_sk); 120EXPORT_SYMBOL_GPL(ip_local_out);
124 121
125static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) 122static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
126{ 123{
@@ -135,11 +132,12 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
135 * Add an ip header to a skbuff and send it out. 132 * Add an ip header to a skbuff and send it out.
136 * 133 *
137 */ 134 */
138int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, 135int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
139 __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) 136 __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
140{ 137{
141 struct inet_sock *inet = inet_sk(sk); 138 struct inet_sock *inet = inet_sk(sk);
142 struct rtable *rt = skb_rtable(skb); 139 struct rtable *rt = skb_rtable(skb);
140 struct net *net = sock_net(sk);
143 struct iphdr *iph; 141 struct iphdr *iph;
144 142
145 /* Build the IP header. */ 143 /* Build the IP header. */
@@ -149,15 +147,17 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
149 iph->version = 4; 147 iph->version = 4;
150 iph->ihl = 5; 148 iph->ihl = 5;
151 iph->tos = inet->tos; 149 iph->tos = inet->tos;
152 if (ip_dont_fragment(sk, &rt->dst))
153 iph->frag_off = htons(IP_DF);
154 else
155 iph->frag_off = 0;
156 iph->ttl = ip_select_ttl(inet, &rt->dst); 150 iph->ttl = ip_select_ttl(inet, &rt->dst);
157 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); 151 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
158 iph->saddr = saddr; 152 iph->saddr = saddr;
159 iph->protocol = sk->sk_protocol; 153 iph->protocol = sk->sk_protocol;
160 ip_select_ident(sock_net(sk), skb, sk); 154 if (ip_dont_fragment(sk, &rt->dst)) {
155 iph->frag_off = htons(IP_DF);
156 iph->id = 0;
157 } else {
158 iph->frag_off = 0;
159 __ip_select_ident(net, iph, 1);
160 }
161 161
162 if (opt && opt->opt.optlen) { 162 if (opt && opt->opt.optlen) {
163 iph->ihl += opt->opt.optlen>>2; 163 iph->ihl += opt->opt.optlen>>2;
@@ -168,11 +168,11 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
168 skb->mark = sk->sk_mark; 168 skb->mark = sk->sk_mark;
169 169
170 /* Send it out. */ 170 /* Send it out. */
171 return ip_local_out(skb); 171 return ip_local_out(net, skb->sk, skb);
172} 172}
173EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 173EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
174 174
175static int ip_finish_output2(struct sock *sk, struct sk_buff *skb) 175static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
176{ 176{
177 struct dst_entry *dst = skb_dst(skb); 177 struct dst_entry *dst = skb_dst(skb);
178 struct rtable *rt = (struct rtable *)dst; 178 struct rtable *rt = (struct rtable *)dst;
@@ -182,9 +182,9 @@ static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
182 u32 nexthop; 182 u32 nexthop;
183 183
184 if (rt->rt_type == RTN_MULTICAST) { 184 if (rt->rt_type == RTN_MULTICAST) {
185 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); 185 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
186 } else if (rt->rt_type == RTN_BROADCAST) 186 } else if (rt->rt_type == RTN_BROADCAST)
187 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len); 187 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
188 188
189 /* Be paranoid, rather than too clever. */ 189 /* Be paranoid, rather than too clever. */
190 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { 190 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -220,8 +220,8 @@ static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
220 return -EINVAL; 220 return -EINVAL;
221} 221}
222 222
223static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb, 223static int ip_finish_output_gso(struct net *net, struct sock *sk,
224 unsigned int mtu) 224 struct sk_buff *skb, unsigned int mtu)
225{ 225{
226 netdev_features_t features; 226 netdev_features_t features;
227 struct sk_buff *segs; 227 struct sk_buff *segs;
@@ -230,7 +230,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
230 /* common case: locally created skb or seglen is <= mtu */ 230 /* common case: locally created skb or seglen is <= mtu */
231 if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || 231 if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
232 skb_gso_network_seglen(skb) <= mtu) 232 skb_gso_network_seglen(skb) <= mtu)
233 return ip_finish_output2(sk, skb); 233 return ip_finish_output2(net, sk, skb);
234 234
235 /* Slowpath - GSO segment length is exceeding the dst MTU. 235 /* Slowpath - GSO segment length is exceeding the dst MTU.
236 * 236 *
@@ -253,7 +253,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
253 int err; 253 int err;
254 254
255 segs->next = NULL; 255 segs->next = NULL;
256 err = ip_fragment(sk, segs, mtu, ip_finish_output2); 256 err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
257 257
258 if (err && ret == 0) 258 if (err && ret == 0)
259 ret = err; 259 ret = err;
@@ -263,7 +263,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
263 return ret; 263 return ret;
264} 264}
265 265
266static int ip_finish_output(struct sock *sk, struct sk_buff *skb) 266static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
267{ 267{
268 unsigned int mtu; 268 unsigned int mtu;
269 269
@@ -271,20 +271,20 @@ static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
271 /* Policy lookup after SNAT yielded a new policy */ 271 /* Policy lookup after SNAT yielded a new policy */
272 if (skb_dst(skb)->xfrm) { 272 if (skb_dst(skb)->xfrm) {
273 IPCB(skb)->flags |= IPSKB_REROUTED; 273 IPCB(skb)->flags |= IPSKB_REROUTED;
274 return dst_output_sk(sk, skb); 274 return dst_output(net, sk, skb);
275 } 275 }
276#endif 276#endif
277 mtu = ip_skb_dst_mtu(skb); 277 mtu = ip_skb_dst_mtu(skb);
278 if (skb_is_gso(skb)) 278 if (skb_is_gso(skb))
279 return ip_finish_output_gso(sk, skb, mtu); 279 return ip_finish_output_gso(net, sk, skb, mtu);
280 280
281 if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) 281 if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
282 return ip_fragment(sk, skb, mtu, ip_finish_output2); 282 return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
283 283
284 return ip_finish_output2(sk, skb); 284 return ip_finish_output2(net, sk, skb);
285} 285}
286 286
287int ip_mc_output(struct sock *sk, struct sk_buff *skb) 287int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
288{ 288{
289 struct rtable *rt = skb_rtable(skb); 289 struct rtable *rt = skb_rtable(skb);
290 struct net_device *dev = rt->dst.dev; 290 struct net_device *dev = rt->dst.dev;
@@ -292,7 +292,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
292 /* 292 /*
293 * If the indicated interface is up and running, send the packet. 293 * If the indicated interface is up and running, send the packet.
294 */ 294 */
295 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); 295 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
296 296
297 skb->dev = dev; 297 skb->dev = dev;
298 skb->protocol = htons(ETH_P_IP); 298 skb->protocol = htons(ETH_P_IP);
@@ -320,7 +320,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
320 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 320 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
321 if (newskb) 321 if (newskb)
322 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, 322 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
323 sk, newskb, NULL, newskb->dev, 323 net, sk, newskb, NULL, newskb->dev,
324 dev_loopback_xmit); 324 dev_loopback_xmit);
325 } 325 }
326 326
@@ -335,26 +335,28 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
335 if (rt->rt_flags&RTCF_BROADCAST) { 335 if (rt->rt_flags&RTCF_BROADCAST) {
336 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 336 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
337 if (newskb) 337 if (newskb)
338 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb, 338 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
339 NULL, newskb->dev, dev_loopback_xmit); 339 net, sk, newskb, NULL, newskb->dev,
340 dev_loopback_xmit);
340 } 341 }
341 342
342 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, 343 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
343 skb->dev, ip_finish_output, 344 net, sk, skb, NULL, skb->dev,
345 ip_finish_output,
344 !(IPCB(skb)->flags & IPSKB_REROUTED)); 346 !(IPCB(skb)->flags & IPSKB_REROUTED));
345} 347}
346 348
347int ip_output(struct sock *sk, struct sk_buff *skb) 349int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
348{ 350{
349 struct net_device *dev = skb_dst(skb)->dev; 351 struct net_device *dev = skb_dst(skb)->dev;
350 352
351 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); 353 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
352 354
353 skb->dev = dev; 355 skb->dev = dev;
354 skb->protocol = htons(ETH_P_IP); 356 skb->protocol = htons(ETH_P_IP);
355 357
356 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, 358 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
357 NULL, dev, 359 net, sk, skb, NULL, dev,
358 ip_finish_output, 360 ip_finish_output,
359 !(IPCB(skb)->flags & IPSKB_REROUTED)); 361 !(IPCB(skb)->flags & IPSKB_REROUTED));
360} 362}
@@ -377,6 +379,7 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
377int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) 379int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
378{ 380{
379 struct inet_sock *inet = inet_sk(sk); 381 struct inet_sock *inet = inet_sk(sk);
382 struct net *net = sock_net(sk);
380 struct ip_options_rcu *inet_opt; 383 struct ip_options_rcu *inet_opt;
381 struct flowi4 *fl4; 384 struct flowi4 *fl4;
382 struct rtable *rt; 385 struct rtable *rt;
@@ -407,7 +410,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
407 * keep trying until route appears or the connection times 410 * keep trying until route appears or the connection times
408 * itself out. 411 * itself out.
409 */ 412 */
410 rt = ip_route_output_ports(sock_net(sk), fl4, sk, 413 rt = ip_route_output_ports(net, fl4, sk,
411 daddr, inet->inet_saddr, 414 daddr, inet->inet_saddr,
412 inet->inet_dport, 415 inet->inet_dport,
413 inet->inet_sport, 416 inet->inet_sport,
@@ -444,20 +447,20 @@ packet_routed:
444 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); 447 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
445 } 448 }
446 449
447 ip_select_ident_segs(sock_net(sk), skb, sk, 450 ip_select_ident_segs(net, skb, sk,
448 skb_shinfo(skb)->gso_segs ?: 1); 451 skb_shinfo(skb)->gso_segs ?: 1);
449 452
450 /* TODO : should we use skb->sk here instead of sk ? */ 453 /* TODO : should we use skb->sk here instead of sk ? */
451 skb->priority = sk->sk_priority; 454 skb->priority = sk->sk_priority;
452 skb->mark = sk->sk_mark; 455 skb->mark = sk->sk_mark;
453 456
454 res = ip_local_out(skb); 457 res = ip_local_out(net, sk, skb);
455 rcu_read_unlock(); 458 rcu_read_unlock();
456 return res; 459 return res;
457 460
458no_route: 461no_route:
459 rcu_read_unlock(); 462 rcu_read_unlock();
460 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 463 IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
461 kfree_skb(skb); 464 kfree_skb(skb);
462 return -EHOSTUNREACH; 465 return -EHOSTUNREACH;
463} 466}
@@ -486,29 +489,26 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
486 skb_copy_secmark(to, from); 489 skb_copy_secmark(to, from);
487} 490}
488 491
489static int ip_fragment(struct sock *sk, struct sk_buff *skb, 492static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
490 unsigned int mtu, 493 unsigned int mtu,
491 int (*output)(struct sock *, struct sk_buff *)) 494 int (*output)(struct net *, struct sock *, struct sk_buff *))
492{ 495{
493 struct iphdr *iph = ip_hdr(skb); 496 struct iphdr *iph = ip_hdr(skb);
494 497
495 if ((iph->frag_off & htons(IP_DF)) == 0) 498 if ((iph->frag_off & htons(IP_DF)) == 0)
496 return ip_do_fragment(sk, skb, output); 499 return ip_do_fragment(net, sk, skb, output);
497 500
498 if (unlikely(!skb->ignore_df || 501 if (unlikely(!skb->ignore_df ||
499 (IPCB(skb)->frag_max_size && 502 (IPCB(skb)->frag_max_size &&
500 IPCB(skb)->frag_max_size > mtu))) { 503 IPCB(skb)->frag_max_size > mtu))) {
501 struct rtable *rt = skb_rtable(skb); 504 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
502 struct net_device *dev = rt->dst.dev;
503
504 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
505 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 505 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
506 htonl(mtu)); 506 htonl(mtu));
507 kfree_skb(skb); 507 kfree_skb(skb);
508 return -EMSGSIZE; 508 return -EMSGSIZE;
509 } 509 }
510 510
511 return ip_do_fragment(sk, skb, output); 511 return ip_do_fragment(net, sk, skb, output);
512} 512}
513 513
514/* 514/*
@@ -518,8 +518,8 @@ static int ip_fragment(struct sock *sk, struct sk_buff *skb,
518 * single device frame, and queue such a frame for sending. 518 * single device frame, and queue such a frame for sending.
519 */ 519 */
520 520
521int ip_do_fragment(struct sock *sk, struct sk_buff *skb, 521int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
522 int (*output)(struct sock *, struct sk_buff *)) 522 int (*output)(struct net *, struct sock *, struct sk_buff *))
523{ 523{
524 struct iphdr *iph; 524 struct iphdr *iph;
525 int ptr; 525 int ptr;
@@ -533,6 +533,11 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
533 533
534 dev = rt->dst.dev; 534 dev = rt->dst.dev;
535 535
536 /* for offloaded checksums cleanup checksum before fragmentation */
537 if (skb->ip_summed == CHECKSUM_PARTIAL &&
538 (err = skb_checksum_help(skb)))
539 goto fail;
540
536 /* 541 /*
537 * Point into the IP datagram header. 542 * Point into the IP datagram header.
538 */ 543 */
@@ -621,10 +626,10 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
621 ip_send_check(iph); 626 ip_send_check(iph);
622 } 627 }
623 628
624 err = output(sk, skb); 629 err = output(net, sk, skb);
625 630
626 if (!err) 631 if (!err)
627 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); 632 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
628 if (err || !frag) 633 if (err || !frag)
629 break; 634 break;
630 635
@@ -634,7 +639,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
634 } 639 }
635 640
636 if (err == 0) { 641 if (err == 0) {
637 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); 642 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
638 return 0; 643 return 0;
639 } 644 }
640 645
@@ -643,7 +648,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
643 kfree_skb(frag); 648 kfree_skb(frag);
644 frag = skb; 649 frag = skb;
645 } 650 }
646 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 651 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
647 return err; 652 return err;
648 653
649slow_path_clean: 654slow_path_clean:
@@ -657,9 +662,6 @@ slow_path_clean:
657 } 662 }
658 663
659slow_path: 664slow_path:
660 /* for offloaded checksums cleanup checksum before fragmentation */
661 if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
662 goto fail;
663 iph = ip_hdr(skb); 665 iph = ip_hdr(skb);
664 666
665 left = skb->len - hlen; /* Space per frame */ 667 left = skb->len - hlen; /* Space per frame */
@@ -761,19 +763,19 @@ slow_path:
761 763
762 ip_send_check(iph); 764 ip_send_check(iph);
763 765
764 err = output(sk, skb2); 766 err = output(net, sk, skb2);
765 if (err) 767 if (err)
766 goto fail; 768 goto fail;
767 769
768 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); 770 IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
769 } 771 }
770 consume_skb(skb); 772 consume_skb(skb);
771 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); 773 IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
772 return err; 774 return err;
773 775
774fail: 776fail:
775 kfree_skb(skb); 777 kfree_skb(skb);
776 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 778 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
777 return err; 779 return err;
778} 780}
779EXPORT_SYMBOL(ip_do_fragment); 781EXPORT_SYMBOL(ip_do_fragment);
@@ -911,6 +913,7 @@ static int __ip_append_data(struct sock *sk,
911 if (transhdrlen && 913 if (transhdrlen &&
912 length + fragheaderlen <= mtu && 914 length + fragheaderlen <= mtu &&
913 rt->dst.dev->features & NETIF_F_V4_CSUM && 915 rt->dst.dev->features & NETIF_F_V4_CSUM &&
916 !(flags & MSG_MORE) &&
914 !exthdrlen) 917 !exthdrlen)
915 csummode = CHECKSUM_PARTIAL; 918 csummode = CHECKSUM_PARTIAL;
916 919
@@ -1434,7 +1437,7 @@ int ip_send_skb(struct net *net, struct sk_buff *skb)
1434{ 1437{
1435 int err; 1438 int err;
1436 1439
1437 err = ip_local_out(skb); 1440 err = ip_local_out(net, skb->sk, skb);
1438 if (err) { 1441 if (err) {
1439 if (err > 0) 1442 if (err > 0)
1440 err = net_xmit_errno(err); 1443 err = net_xmit_errno(err);
@@ -1561,7 +1564,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
1561 } 1564 }
1562 1565
1563 oif = arg->bound_dev_if; 1566 oif = arg->bound_dev_if;
1564 if (!oif && netif_index_is_vrf(net, skb->skb_iif)) 1567 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
1565 oif = skb->skb_iif; 1568 oif = skb->skb_iif;
1566 1569
1567 flowi4_init_output(&fl4, oif, 1570 flowi4_init_output(&fl4, oif,
@@ -1596,7 +1599,6 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
1596 arg->csumoffset) = csum_fold(csum_add(nskb->csum, 1599 arg->csumoffset) = csum_fold(csum_add(nskb->csum,
1597 arg->csum)); 1600 arg->csum));
1598 nskb->ip_summed = CHECKSUM_NONE; 1601 nskb->ip_summed = CHECKSUM_NONE;
1599 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
1600 ip_push_pending_frames(sk, &fl4); 1602 ip_push_pending_frames(sk, &fl4);
1601 } 1603 }
1602out: 1604out:
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c3c359ad66e3..5f73a7c03e27 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1251,11 +1251,22 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
1251 * the _received_ ones. The set sets the _sent_ ones. 1251 * the _received_ ones. The set sets the _sent_ ones.
1252 */ 1252 */
1253 1253
1254static bool getsockopt_needs_rtnl(int optname)
1255{
1256 switch (optname) {
1257 case IP_MSFILTER:
1258 case MCAST_MSFILTER:
1259 return true;
1260 }
1261 return false;
1262}
1263
1254static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1264static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1255 char __user *optval, int __user *optlen, unsigned int flags) 1265 char __user *optval, int __user *optlen, unsigned int flags)
1256{ 1266{
1257 struct inet_sock *inet = inet_sk(sk); 1267 struct inet_sock *inet = inet_sk(sk);
1258 int val; 1268 bool needs_rtnl = getsockopt_needs_rtnl(optname);
1269 int val, err = 0;
1259 int len; 1270 int len;
1260 1271
1261 if (level != SOL_IP) 1272 if (level != SOL_IP)
@@ -1269,6 +1280,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1269 if (len < 0) 1280 if (len < 0)
1270 return -EINVAL; 1281 return -EINVAL;
1271 1282
1283 if (needs_rtnl)
1284 rtnl_lock();
1272 lock_sock(sk); 1285 lock_sock(sk);
1273 1286
1274 switch (optname) { 1287 switch (optname) {
@@ -1386,39 +1399,35 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1386 case IP_MSFILTER: 1399 case IP_MSFILTER:
1387 { 1400 {
1388 struct ip_msfilter msf; 1401 struct ip_msfilter msf;
1389 int err;
1390 1402
1391 if (len < IP_MSFILTER_SIZE(0)) { 1403 if (len < IP_MSFILTER_SIZE(0)) {
1392 release_sock(sk); 1404 err = -EINVAL;
1393 return -EINVAL; 1405 goto out;
1394 } 1406 }
1395 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { 1407 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1396 release_sock(sk); 1408 err = -EFAULT;
1397 return -EFAULT; 1409 goto out;
1398 } 1410 }
1399 err = ip_mc_msfget(sk, &msf, 1411 err = ip_mc_msfget(sk, &msf,
1400 (struct ip_msfilter __user *)optval, optlen); 1412 (struct ip_msfilter __user *)optval, optlen);
1401 release_sock(sk); 1413 goto out;
1402 return err;
1403 } 1414 }
1404 case MCAST_MSFILTER: 1415 case MCAST_MSFILTER:
1405 { 1416 {
1406 struct group_filter gsf; 1417 struct group_filter gsf;
1407 int err;
1408 1418
1409 if (len < GROUP_FILTER_SIZE(0)) { 1419 if (len < GROUP_FILTER_SIZE(0)) {
1410 release_sock(sk); 1420 err = -EINVAL;
1411 return -EINVAL; 1421 goto out;
1412 } 1422 }
1413 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { 1423 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1414 release_sock(sk); 1424 err = -EFAULT;
1415 return -EFAULT; 1425 goto out;
1416 } 1426 }
1417 err = ip_mc_gsfget(sk, &gsf, 1427 err = ip_mc_gsfget(sk, &gsf,
1418 (struct group_filter __user *)optval, 1428 (struct group_filter __user *)optval,
1419 optlen); 1429 optlen);
1420 release_sock(sk); 1430 goto out;
1421 return err;
1422 } 1431 }
1423 case IP_MULTICAST_ALL: 1432 case IP_MULTICAST_ALL:
1424 val = inet->mc_all; 1433 val = inet->mc_all;
@@ -1485,6 +1494,12 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1485 return -EFAULT; 1494 return -EFAULT;
1486 } 1495 }
1487 return 0; 1496 return 0;
1497
1498out:
1499 release_sock(sk);
1500 if (needs_rtnl)
1501 rtnl_unlock();
1502 return err;
1488} 1503}
1489 1504
1490int ip_getsockopt(struct sock *sk, int level, 1505int ip_getsockopt(struct sock *sk, int level,
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 84dce6a92f93..6cb9009c3d96 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -53,6 +53,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
53 __u8 tos, __u8 ttl, __be16 df, bool xnet) 53 __u8 tos, __u8 ttl, __be16 df, bool xnet)
54{ 54{
55 int pkt_len = skb->len - skb_inner_network_offset(skb); 55 int pkt_len = skb->len - skb_inner_network_offset(skb);
56 struct net *net = dev_net(rt->dst.dev);
56 struct iphdr *iph; 57 struct iphdr *iph;
57 int err; 58 int err;
58 59
@@ -76,10 +77,9 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
76 iph->daddr = dst; 77 iph->daddr = dst;
77 iph->saddr = src; 78 iph->saddr = src;
78 iph->ttl = ttl; 79 iph->ttl = ttl;
79 __ip_select_ident(dev_net(rt->dst.dev), iph, 80 __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
80 skb_shinfo(skb)->gso_segs ?: 1);
81 81
82 err = ip_local_out_sk(sk, skb); 82 err = ip_local_out(net, sk, skb);
83 if (unlikely(net_xmit_eval(err))) 83 if (unlikely(net_xmit_eval(err)))
84 pkt_len = 0; 84 pkt_len = 0;
85 return pkt_len; 85 return pkt_len;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 0c152087ca15..4d8f0b698777 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -197,7 +197,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
197 skb_dst_set(skb, dst); 197 skb_dst_set(skb, dst);
198 skb->dev = skb_dst(skb)->dev; 198 skb->dev = skb_dst(skb)->dev;
199 199
200 err = dst_output(skb); 200 err = dst_output(tunnel->net, skb->sk, skb);
201 if (net_xmit_eval(err) == 0) 201 if (net_xmit_eval(err) == 0)
202 err = skb->len; 202 err = skb->len;
203 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 203 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ed4ef09c2136..0bc7412d9e14 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -146,6 +146,10 @@ u8 root_server_path[256] = { 0, }; /* Path to mount as root */
146/* vendor class identifier */ 146/* vendor class identifier */
147static char vendor_class_identifier[253] __initdata; 147static char vendor_class_identifier[253] __initdata;
148 148
149#if defined(CONFIG_IP_PNP_DHCP)
150static char dhcp_client_identifier[253] __initdata;
151#endif
152
149/* Persistent data: */ 153/* Persistent data: */
150 154
151static int ic_proto_used; /* Protocol used, if any */ 155static int ic_proto_used; /* Protocol used, if any */
@@ -728,6 +732,16 @@ ic_dhcp_init_options(u8 *options)
728 memcpy(e, vendor_class_identifier, len); 732 memcpy(e, vendor_class_identifier, len);
729 e += len; 733 e += len;
730 } 734 }
735 len = strlen(dhcp_client_identifier + 1);
736 /* the minimum length of identifier is 2, include 1 byte type,
737 * and can not be larger than the length of options
738 */
739 if (len >= 1 && len < 312 - (e - options) - 1) {
740 *e++ = 61;
741 *e++ = len + 1;
742 memcpy(e, dhcp_client_identifier, len + 1);
743 e += len + 1;
744 }
731 } 745 }
732 746
733 *e++ = 255; /* End of the list */ 747 *e++ = 255; /* End of the list */
@@ -1557,8 +1571,24 @@ static int __init ic_proto_name(char *name)
1557 return 0; 1571 return 0;
1558 } 1572 }
1559#ifdef CONFIG_IP_PNP_DHCP 1573#ifdef CONFIG_IP_PNP_DHCP
1560 else if (!strcmp(name, "dhcp")) { 1574 else if (!strncmp(name, "dhcp", 4)) {
1575 char *client_id;
1576
1561 ic_proto_enabled &= ~IC_RARP; 1577 ic_proto_enabled &= ~IC_RARP;
1578 client_id = strstr(name, "dhcp,");
1579 if (client_id) {
1580 char *v;
1581
1582 client_id = client_id + 5;
1583 v = strchr(client_id, ',');
1584 if (!v)
1585 return 1;
1586 *v = 0;
1587 if (kstrtou8(client_id, 0, dhcp_client_identifier))
1588 DBG("DHCP: Invalid client identifier type\n");
1589 strncpy(dhcp_client_identifier + 1, v + 1, 251);
1590 *v = ',';
1591 }
1562 return 1; 1592 return 1;
1563 } 1593 }
1564#endif 1594#endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 866ee89f5254..92dd4b74d513 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1678,17 +1678,18 @@ static void ip_encap(struct net *net, struct sk_buff *skb,
1678 nf_reset(skb); 1678 nf_reset(skb);
1679} 1679}
1680 1680
1681static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) 1681static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
1682 struct sk_buff *skb)
1682{ 1683{
1683 struct ip_options *opt = &(IPCB(skb)->opt); 1684 struct ip_options *opt = &(IPCB(skb)->opt);
1684 1685
1685 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1686 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
1686 IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); 1687 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
1687 1688
1688 if (unlikely(opt->optlen)) 1689 if (unlikely(opt->optlen))
1689 ip_forward_options(skb); 1690 ip_forward_options(skb);
1690 1691
1691 return dst_output_sk(sk, skb); 1692 return dst_output(net, sk, skb);
1692} 1693}
1693 1694
1694/* 1695/*
@@ -1745,7 +1746,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1745 * to blackhole. 1746 * to blackhole.
1746 */ 1747 */
1747 1748
1748 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1749 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
1749 ip_rt_put(rt); 1750 ip_rt_put(rt);
1750 goto out_free; 1751 goto out_free;
1751 } 1752 }
@@ -1787,8 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1787 * not mrouter) cannot join to more than one interface - it will 1788 * not mrouter) cannot join to more than one interface - it will
1788 * result in receiving multiple packets. 1789 * result in receiving multiple packets.
1789 */ 1790 */
1790 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, 1791 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
1791 skb->dev, dev, 1792 net, NULL, skb, skb->dev, dev,
1792 ipmr_forward_finish); 1793 ipmr_forward_finish);
1793 return; 1794 return;
1794 1795
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 61eafc9b4545..c3776ff6749f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,9 +17,8 @@
17#include <net/netfilter/nf_queue.h> 17#include <net/netfilter/nf_queue.h>
18 18
19/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 19/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
20int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) 20int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
21{ 21{
22 struct net *net = dev_net(skb_dst(skb)->dev);
23 const struct iphdr *iph = ip_hdr(skb); 22 const struct iphdr *iph = ip_hdr(skb);
24 struct rtable *rt; 23 struct rtable *rt;
25 struct flowi4 fl4 = {}; 24 struct flowi4 fl4 = {};
@@ -104,7 +103,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb,
104 } 103 }
105} 104}
106 105
107static int nf_ip_reroute(struct sk_buff *skb, 106static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
108 const struct nf_queue_entry *entry) 107 const struct nf_queue_entry *entry)
109{ 108{
110 const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); 109 const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -116,7 +115,7 @@ static int nf_ip_reroute(struct sk_buff *skb,
116 skb->mark == rt_info->mark && 115 skb->mark == rt_info->mark &&
117 iph->daddr == rt_info->daddr && 116 iph->daddr == rt_info->daddr &&
118 iph->saddr == rt_info->saddr)) 117 iph->saddr == rt_info->saddr))
119 return ip_route_me_harder(skb, RTN_UNSPEC); 118 return ip_route_me_harder(net, skb, RTN_UNSPEC);
120 } 119 }
121 return 0; 120 return 0;
122} 121}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 690d27d3f2f9..a35584176535 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -75,6 +75,7 @@ endif # NF_TABLES
75 75
76config NF_DUP_IPV4 76config NF_DUP_IPV4
77 tristate "Netfilter IPv4 packet duplication to alternate destination" 77 tristate "Netfilter IPv4 packet duplication to alternate destination"
78 depends on !NF_CONNTRACK || NF_CONNTRACK
78 help 79 help
79 This option enables the nf_dup_ipv4 core, which duplicates an IPv4 80 This option enables the nf_dup_ipv4 core, which duplicates an IPv4
80 packet to be rerouted to another destination. 81 packet to be rerouted to another destination.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8f87fc38ccde..11dccba474b7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -186,7 +186,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
186 if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { 186 if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
187 dprintf("VIA in mismatch (%s vs %s).%s\n", 187 dprintf("VIA in mismatch (%s vs %s).%s\n",
188 indev, arpinfo->iniface, 188 indev, arpinfo->iniface,
189 arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":""); 189 arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : "");
190 return 0; 190 return 0;
191 } 191 }
192 192
@@ -195,7 +195,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
195 if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { 195 if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
196 dprintf("VIA out mismatch (%s vs %s).%s\n", 196 dprintf("VIA out mismatch (%s vs %s).%s\n",
197 outdev, arpinfo->outiface, 197 outdev, arpinfo->outiface,
198 arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":""); 198 arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : "");
199 return 0; 199 return 0;
200 } 200 }
201 201
@@ -247,10 +247,10 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
247} 247}
248 248
249unsigned int arpt_do_table(struct sk_buff *skb, 249unsigned int arpt_do_table(struct sk_buff *skb,
250 unsigned int hook,
251 const struct nf_hook_state *state, 250 const struct nf_hook_state *state,
252 struct xt_table *table) 251 struct xt_table *table)
253{ 252{
253 unsigned int hook = state->hook;
254 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 254 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
255 unsigned int verdict = NF_DROP; 255 unsigned int verdict = NF_DROP;
256 const struct arphdr *arp; 256 const struct arphdr *arp;
@@ -285,6 +285,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
285 */ 285 */
286 e = get_entry(table_base, private->hook_entry[hook]); 286 e = get_entry(table_base, private->hook_entry[hook]);
287 287
288 acpar.net = state->net;
288 acpar.in = state->in; 289 acpar.in = state->in;
289 acpar.out = state->out; 290 acpar.out = state->out;
290 acpar.hooknum = hook; 291 acpar.hooknum = hook;
@@ -467,7 +468,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
467 pos = newpos; 468 pos = newpos;
468 } 469 }
469 } 470 }
470 next: 471next:
471 duprintf("Finished chain %u\n", hook); 472 duprintf("Finished chain %u\n", hook);
472 } 473 }
473 return 1; 474 return 1;
@@ -631,7 +632,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
631 * newinfo). 632 * newinfo).
632 */ 633 */
633static int translate_table(struct xt_table_info *newinfo, void *entry0, 634static int translate_table(struct xt_table_info *newinfo, void *entry0,
634 const struct arpt_replace *repl) 635 const struct arpt_replace *repl)
635{ 636{
636 struct arpt_entry *iter; 637 struct arpt_entry *iter;
637 unsigned int i; 638 unsigned int i;
@@ -891,7 +892,7 @@ static int compat_table_info(const struct xt_table_info *info,
891#endif 892#endif
892 893
893static int get_info(struct net *net, void __user *user, 894static int get_info(struct net *net, void __user *user,
894 const int *len, int compat) 895 const int *len, int compat)
895{ 896{
896 char name[XT_TABLE_MAXNAMELEN]; 897 char name[XT_TABLE_MAXNAMELEN];
897 struct xt_table *t; 898 struct xt_table *t;
@@ -1068,7 +1069,7 @@ static int __do_replace(struct net *net, const char *name,
1068} 1069}
1069 1070
1070static int do_replace(struct net *net, const void __user *user, 1071static int do_replace(struct net *net, const void __user *user,
1071 unsigned int len) 1072 unsigned int len)
1072{ 1073{
1073 int ret; 1074 int ret;
1074 struct arpt_replace tmp; 1075 struct arpt_replace tmp;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 93876d03120c..1897ee160920 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,10 @@ static const struct xt_table packet_filter = {
27 27
28/* The work comes in here from netfilter.c */ 28/* The work comes in here from netfilter.c */
29static unsigned int 29static unsigned int
30arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 30arptable_filter_hook(void *priv, struct sk_buff *skb,
31 const struct nf_hook_state *state) 31 const struct nf_hook_state *state)
32{ 32{
33 const struct net *net = dev_net(state->in ? state->in : state->out); 33 return arpt_do_table(skb, state, state->net->ipv4.arptable_filter);
34
35 return arpt_do_table(skb, ops->hooknum, state,
36 net->ipv4.arptable_filter);
37} 34}
38 35
39static struct nf_hook_ops *arpfilter_ops __read_mostly; 36static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b0a86e73451c..b99affad6ba1 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -102,7 +102,7 @@ ip_packet_match(const struct iphdr *ip,
102 if (FWINV(ret != 0, IPT_INV_VIA_IN)) { 102 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
103 dprintf("VIA in mismatch (%s vs %s).%s\n", 103 dprintf("VIA in mismatch (%s vs %s).%s\n",
104 indev, ipinfo->iniface, 104 indev, ipinfo->iniface,
105 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":""); 105 ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
106 return false; 106 return false;
107 } 107 }
108 108
@@ -111,7 +111,7 @@ ip_packet_match(const struct iphdr *ip,
111 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { 111 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
112 dprintf("VIA out mismatch (%s vs %s).%s\n", 112 dprintf("VIA out mismatch (%s vs %s).%s\n",
113 outdev, ipinfo->outiface, 113 outdev, ipinfo->outiface,
114 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":""); 114 ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
115 return false; 115 return false;
116 } 116 }
117 117
@@ -120,7 +120,7 @@ ip_packet_match(const struct iphdr *ip,
120 FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { 120 FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
121 dprintf("Packet protocol %hi does not match %hi.%s\n", 121 dprintf("Packet protocol %hi does not match %hi.%s\n",
122 ip->protocol, ipinfo->proto, 122 ip->protocol, ipinfo->proto,
123 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); 123 ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
124 return false; 124 return false;
125 } 125 }
126 126
@@ -246,7 +246,8 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
246 return 0; 246 return 0;
247} 247}
248 248
249static void trace_packet(const struct sk_buff *skb, 249static void trace_packet(struct net *net,
250 const struct sk_buff *skb,
250 unsigned int hook, 251 unsigned int hook,
251 const struct net_device *in, 252 const struct net_device *in,
252 const struct net_device *out, 253 const struct net_device *out,
@@ -258,7 +259,6 @@ static void trace_packet(const struct sk_buff *skb,
258 const char *hookname, *chainname, *comment; 259 const char *hookname, *chainname, *comment;
259 const struct ipt_entry *iter; 260 const struct ipt_entry *iter;
260 unsigned int rulenum = 0; 261 unsigned int rulenum = 0;
261 struct net *net = dev_net(in ? in : out);
262 262
263 root = get_entry(private->entries, private->hook_entry[hook]); 263 root = get_entry(private->entries, private->hook_entry[hook]);
264 264
@@ -285,10 +285,10 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
285/* Returns one of the generic firewall policies, like NF_ACCEPT. */ 285/* Returns one of the generic firewall policies, like NF_ACCEPT. */
286unsigned int 286unsigned int
287ipt_do_table(struct sk_buff *skb, 287ipt_do_table(struct sk_buff *skb,
288 unsigned int hook,
289 const struct nf_hook_state *state, 288 const struct nf_hook_state *state,
290 struct xt_table *table) 289 struct xt_table *table)
291{ 290{
291 unsigned int hook = state->hook;
292 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 292 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
293 const struct iphdr *ip; 293 const struct iphdr *ip;
294 /* Initializing verdict to NF_DROP keeps gcc happy. */ 294 /* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -315,6 +315,7 @@ ipt_do_table(struct sk_buff *skb,
315 acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; 315 acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
316 acpar.thoff = ip_hdrlen(skb); 316 acpar.thoff = ip_hdrlen(skb);
317 acpar.hotdrop = false; 317 acpar.hotdrop = false;
318 acpar.net = state->net;
318 acpar.in = state->in; 319 acpar.in = state->in;
319 acpar.out = state->out; 320 acpar.out = state->out;
320 acpar.family = NFPROTO_IPV4; 321 acpar.family = NFPROTO_IPV4;
@@ -378,8 +379,8 @@ ipt_do_table(struct sk_buff *skb,
378#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 379#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
379 /* The packet is traced: log it */ 380 /* The packet is traced: log it */
380 if (unlikely(skb->nf_trace)) 381 if (unlikely(skb->nf_trace))
381 trace_packet(skb, hook, state->in, state->out, 382 trace_packet(state->net, skb, hook, state->in,
382 table->name, private, e); 383 state->out, table->name, private, e);
383#endif 384#endif
384 /* Standard target? */ 385 /* Standard target? */
385 if (!t->u.kernel.target->target) { 386 if (!t->u.kernel.target->target) {
@@ -430,8 +431,8 @@ ipt_do_table(struct sk_buff *skb,
430 } while (!acpar.hotdrop); 431 } while (!acpar.hotdrop);
431 pr_debug("Exiting %s; sp at %u\n", __func__, stackidx); 432 pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
432 433
433 xt_write_recseq_end(addend); 434 xt_write_recseq_end(addend);
434 local_bh_enable(); 435 local_bh_enable();
435 436
436#ifdef DEBUG_ALLOW_ALL 437#ifdef DEBUG_ALLOW_ALL
437 return NF_ACCEPT; 438 return NF_ACCEPT;
@@ -483,7 +484,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
483 unsigned int oldpos, size; 484 unsigned int oldpos, size;
484 485
485 if ((strcmp(t->target.u.user.name, 486 if ((strcmp(t->target.u.user.name,
486 XT_STANDARD_TARGET) == 0) && 487 XT_STANDARD_TARGET) == 0) &&
487 t->verdict < -NF_MAX_VERDICT - 1) { 488 t->verdict < -NF_MAX_VERDICT - 1) {
488 duprintf("mark_source_chains: bad " 489 duprintf("mark_source_chains: bad "
489 "negative verdict (%i)\n", 490 "negative verdict (%i)\n",
@@ -548,7 +549,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
548 pos = newpos; 549 pos = newpos;
549 } 550 }
550 } 551 }
551 next: 552next:
552 duprintf("Finished chain %u\n", hook); 553 duprintf("Finished chain %u\n", hook);
553 } 554 }
554 return 1; 555 return 1;
@@ -803,7 +804,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
803 newinfo) */ 804 newinfo) */
804static int 805static int
805translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, 806translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
806 const struct ipt_replace *repl) 807 const struct ipt_replace *repl)
807{ 808{
808 struct ipt_entry *iter; 809 struct ipt_entry *iter;
809 unsigned int i; 810 unsigned int i;
@@ -1077,7 +1078,7 @@ static int compat_table_info(const struct xt_table_info *info,
1077#endif 1078#endif
1078 1079
1079static int get_info(struct net *net, void __user *user, 1080static int get_info(struct net *net, void __user *user,
1080 const int *len, int compat) 1081 const int *len, int compat)
1081{ 1082{
1082 char name[XT_TABLE_MAXNAMELEN]; 1083 char name[XT_TABLE_MAXNAMELEN];
1083 struct xt_table *t; 1084 struct xt_table *t;
@@ -1303,7 +1304,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1303 1304
1304static int 1305static int
1305do_add_counters(struct net *net, const void __user *user, 1306do_add_counters(struct net *net, const void __user *user,
1306 unsigned int len, int compat) 1307 unsigned int len, int compat)
1307{ 1308{
1308 unsigned int i; 1309 unsigned int i;
1309 struct xt_counters_info tmp; 1310 struct xt_counters_info tmp;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 45cb16a6a4a3..4a9e6db9df8d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -492,14 +492,14 @@ static void arp_print(struct arp_payload *payload)
492{ 492{
493#define HBUFFERLEN 30 493#define HBUFFERLEN 30
494 char hbuffer[HBUFFERLEN]; 494 char hbuffer[HBUFFERLEN];
495 int j,k; 495 int j, k;
496 496
497 for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { 497 for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
498 hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); 498 hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
499 hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); 499 hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
500 hbuffer[k++]=':'; 500 hbuffer[k++] = ':';
501 } 501 }
502 hbuffer[--k]='\0'; 502 hbuffer[--k] = '\0';
503 503
504 pr_debug("src %pI4@%s, dst %pI4\n", 504 pr_debug("src %pI4@%s, dst %pI4\n",
505 &payload->src_ip, hbuffer, &payload->dst_ip); 505 &payload->src_ip, hbuffer, &payload->dst_ip);
@@ -507,14 +507,14 @@ static void arp_print(struct arp_payload *payload)
507#endif 507#endif
508 508
509static unsigned int 509static unsigned int
510arp_mangle(const struct nf_hook_ops *ops, 510arp_mangle(void *priv,
511 struct sk_buff *skb, 511 struct sk_buff *skb,
512 const struct nf_hook_state *state) 512 const struct nf_hook_state *state)
513{ 513{
514 struct arphdr *arp = arp_hdr(skb); 514 struct arphdr *arp = arp_hdr(skb);
515 struct arp_payload *payload; 515 struct arp_payload *payload;
516 struct clusterip_config *c; 516 struct clusterip_config *c;
517 struct net *net = dev_net(state->in ? state->in : state->out); 517 struct net *net = state->net;
518 518
519 /* we don't care about non-ethernet and non-ipv4 ARP */ 519 /* we don't care about non-ethernet and non-ipv4 ARP */
520 if (arp->ar_hrd != htons(ARPHRD_ETHER) || 520 if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 87907d4bd259..1d16c0f28df0 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
59 nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); 59 nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
60 break; 60 break;
61 case IPT_TCP_RESET: 61 case IPT_TCP_RESET:
62 nf_send_reset(skb, hook); 62 nf_send_reset(par->net, skb, hook);
63 case IPT_ICMP_ECHOREPLY: 63 case IPT_ICMP_ECHOREPLY:
64 /* Doesn't happen. */ 64 /* Doesn't happen. */
65 break; 65 break;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 95ea633e8356..5fdc556514ba 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -39,11 +39,14 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
39} 39}
40 40
41static void 41static void
42synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, 42synproxy_send_tcp(const struct synproxy_net *snet,
43 const struct sk_buff *skb, struct sk_buff *nskb,
43 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, 44 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
44 struct iphdr *niph, struct tcphdr *nth, 45 struct iphdr *niph, struct tcphdr *nth,
45 unsigned int tcp_hdr_size) 46 unsigned int tcp_hdr_size)
46{ 47{
48 struct net *net = nf_ct_net(snet->tmpl);
49
47 nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); 50 nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
48 nskb->ip_summed = CHECKSUM_PARTIAL; 51 nskb->ip_summed = CHECKSUM_PARTIAL;
49 nskb->csum_start = (unsigned char *)nth - nskb->head; 52 nskb->csum_start = (unsigned char *)nth - nskb->head;
@@ -51,7 +54,7 @@ synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
51 54
52 skb_dst_set_noref(nskb, skb_dst(skb)); 55 skb_dst_set_noref(nskb, skb_dst(skb));
53 nskb->protocol = htons(ETH_P_IP); 56 nskb->protocol = htons(ETH_P_IP);
54 if (ip_route_me_harder(nskb, RTN_UNSPEC)) 57 if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
55 goto free_nskb; 58 goto free_nskb;
56 59
57 if (nfct) { 60 if (nfct) {
@@ -60,7 +63,7 @@ synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
60 nf_conntrack_get(nfct); 63 nf_conntrack_get(nfct);
61 } 64 }
62 65
63 ip_local_out(nskb); 66 ip_local_out(net, nskb->sk, nskb);
64 return; 67 return;
65 68
66free_nskb: 69free_nskb:
@@ -68,7 +71,8 @@ free_nskb:
68} 71}
69 72
70static void 73static void
71synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, 74synproxy_send_client_synack(const struct synproxy_net *snet,
75 const struct sk_buff *skb, const struct tcphdr *th,
72 const struct synproxy_options *opts) 76 const struct synproxy_options *opts)
73{ 77{
74 struct sk_buff *nskb; 78 struct sk_buff *nskb;
@@ -104,7 +108,7 @@ synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
104 108
105 synproxy_build_options(nth, opts); 109 synproxy_build_options(nth, opts);
106 110
107 synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 111 synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
108 niph, nth, tcp_hdr_size); 112 niph, nth, tcp_hdr_size);
109} 113}
110 114
@@ -148,7 +152,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
148 152
149 synproxy_build_options(nth, opts); 153 synproxy_build_options(nth, opts);
150 154
151 synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, 155 synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
152 niph, nth, tcp_hdr_size); 156 niph, nth, tcp_hdr_size);
153} 157}
154 158
@@ -188,7 +192,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
188 192
189 synproxy_build_options(nth, opts); 193 synproxy_build_options(nth, opts);
190 194
191 synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); 195 synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
192} 196}
193 197
194static void 198static void
@@ -226,8 +230,8 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
226 230
227 synproxy_build_options(nth, opts); 231 synproxy_build_options(nth, opts);
228 232
229 synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 233 synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
230 niph, nth, tcp_hdr_size); 234 niph, nth, tcp_hdr_size);
231} 235}
232 236
233static bool 237static bool
@@ -258,7 +262,7 @@ static unsigned int
258synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) 262synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
259{ 263{
260 const struct xt_synproxy_info *info = par->targinfo; 264 const struct xt_synproxy_info *info = par->targinfo;
261 struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); 265 struct synproxy_net *snet = synproxy_pernet(par->net);
262 struct synproxy_options opts = {}; 266 struct synproxy_options opts = {};
263 struct tcphdr *th, _th; 267 struct tcphdr *th, _th;
264 268
@@ -287,7 +291,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
287 XT_SYNPROXY_OPT_SACK_PERM | 291 XT_SYNPROXY_OPT_SACK_PERM |
288 XT_SYNPROXY_OPT_ECN); 292 XT_SYNPROXY_OPT_ECN);
289 293
290 synproxy_send_client_synack(skb, th, &opts); 294 synproxy_send_client_synack(snet, skb, th, &opts);
291 return NF_DROP; 295 return NF_DROP;
292 296
293 } else if (th->ack && !(th->fin || th->rst || th->syn)) { 297 } else if (th->ack && !(th->fin || th->rst || th->syn)) {
@@ -299,11 +303,11 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
299 return XT_CONTINUE; 303 return XT_CONTINUE;
300} 304}
301 305
302static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, 306static unsigned int ipv4_synproxy_hook(void *priv,
303 struct sk_buff *skb, 307 struct sk_buff *skb,
304 const struct nf_hook_state *nhs) 308 const struct nf_hook_state *nhs)
305{ 309{
306 struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); 310 struct synproxy_net *snet = synproxy_pernet(nhs->net);
307 enum ip_conntrack_info ctinfo; 311 enum ip_conntrack_info ctinfo;
308 struct nf_conn *ct; 312 struct nf_conn *ct;
309 struct nf_conn_synproxy *synproxy; 313 struct nf_conn_synproxy *synproxy;
@@ -433,14 +437,12 @@ static struct xt_target synproxy_tg4_reg __read_mostly = {
433static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { 437static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
434 { 438 {
435 .hook = ipv4_synproxy_hook, 439 .hook = ipv4_synproxy_hook,
436 .owner = THIS_MODULE,
437 .pf = NFPROTO_IPV4, 440 .pf = NFPROTO_IPV4,
438 .hooknum = NF_INET_LOCAL_IN, 441 .hooknum = NF_INET_LOCAL_IN,
439 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, 442 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
440 }, 443 },
441 { 444 {
442 .hook = ipv4_synproxy_hook, 445 .hook = ipv4_synproxy_hook,
443 .owner = THIS_MODULE,
444 .pf = NFPROTO_IPV4, 446 .pf = NFPROTO_IPV4,
445 .hooknum = NF_INET_POST_ROUTING, 447 .hooknum = NF_INET_POST_ROUTING,
446 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, 448 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 14a2aa8b8a14..a787d07f6cb7 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -25,7 +25,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
25 bool r; 25 bool r;
26 pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", 26 pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
27 invert ? '!' : ' ', min, spi, max); 27 invert ? '!' : ' ', min, spi, max);
28 r=(spi >= min && spi <= max) ^ invert; 28 r = (spi >= min && spi <= max) ^ invert;
29 pr_debug(" result %s\n", r ? "PASS" : "FAILED"); 29 pr_debug(" result %s\n", r ? "PASS" : "FAILED");
30 return r; 30 return r;
31} 31}
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 8618fd150c96..78cc64eddfc1 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -32,12 +32,11 @@ static __be32 rpfilter_get_saddr(__be32 addr)
32 return addr; 32 return addr;
33} 33}
34 34
35static bool rpfilter_lookup_reverse(struct flowi4 *fl4, 35static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
36 const struct net_device *dev, u8 flags) 36 const struct net_device *dev, u8 flags)
37{ 37{
38 struct fib_result res; 38 struct fib_result res;
39 bool dev_match; 39 bool dev_match;
40 struct net *net = dev_net(dev);
41 int ret __maybe_unused; 40 int ret __maybe_unused;
42 41
43 if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) 42 if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
@@ -61,9 +60,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
61 if (FIB_RES_DEV(res) == dev) 60 if (FIB_RES_DEV(res) == dev)
62 dev_match = true; 61 dev_match = true;
63#endif 62#endif
64 if (dev_match || flags & XT_RPFILTER_LOOSE) 63 return dev_match || flags & XT_RPFILTER_LOOSE;
65 return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
66 return dev_match;
67} 64}
68 65
69static bool rpfilter_is_local(const struct sk_buff *skb) 66static bool rpfilter_is_local(const struct sk_buff *skb)
@@ -98,7 +95,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
98 flow.flowi4_tos = RT_TOS(iph->tos); 95 flow.flowi4_tos = RT_TOS(iph->tos);
99 flow.flowi4_scope = RT_SCOPE_UNIVERSE; 96 flow.flowi4_scope = RT_SCOPE_UNIVERSE;
100 97
101 return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert; 98 return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
102} 99}
103 100
104static int rpfilter_check(const struct xt_mtchk_param *par) 101static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index a0f3beca52d2..397ef2dd133e 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,19 +33,16 @@ static const struct xt_table packet_filter = {
33}; 33};
34 34
35static unsigned int 35static unsigned int
36iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 36iptable_filter_hook(void *priv, struct sk_buff *skb,
37 const struct nf_hook_state *state) 37 const struct nf_hook_state *state)
38{ 38{
39 const struct net *net; 39 if (state->hook == NF_INET_LOCAL_OUT &&
40
41 if (ops->hooknum == NF_INET_LOCAL_OUT &&
42 (skb->len < sizeof(struct iphdr) || 40 (skb->len < sizeof(struct iphdr) ||
43 ip_hdrlen(skb) < sizeof(struct iphdr))) 41 ip_hdrlen(skb) < sizeof(struct iphdr)))
44 /* root is playing with raw sockets. */ 42 /* root is playing with raw sockets. */
45 return NF_ACCEPT; 43 return NF_ACCEPT;
46 44
47 net = dev_net(state->in ? state->in : state->out); 45 return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
48 return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter);
49} 46}
50 47
51static struct nf_hook_ops *filter_ops __read_mostly; 48static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 62cbb8c5f4a8..ba5d392a13c4 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -39,7 +39,6 @@ static const struct xt_table packet_mangler = {
39static unsigned int 39static unsigned int
40ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) 40ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
41{ 41{
42 struct net_device *out = state->out;
43 unsigned int ret; 42 unsigned int ret;
44 const struct iphdr *iph; 43 const struct iphdr *iph;
45 u_int8_t tos; 44 u_int8_t tos;
@@ -59,8 +58,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
59 daddr = iph->daddr; 58 daddr = iph->daddr;
60 tos = iph->tos; 59 tos = iph->tos;
61 60
62 ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state, 61 ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
63 dev_net(out)->ipv4.iptable_mangle);
64 /* Reroute for ANY change. */ 62 /* Reroute for ANY change. */
65 if (ret != NF_DROP && ret != NF_STOLEN) { 63 if (ret != NF_DROP && ret != NF_STOLEN) {
66 iph = ip_hdr(skb); 64 iph = ip_hdr(skb);
@@ -69,7 +67,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
69 iph->daddr != daddr || 67 iph->daddr != daddr ||
70 skb->mark != mark || 68 skb->mark != mark ||
71 iph->tos != tos) { 69 iph->tos != tos) {
72 err = ip_route_me_harder(skb, RTN_UNSPEC); 70 err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
73 if (err < 0) 71 if (err < 0)
74 ret = NF_DROP_ERR(err); 72 ret = NF_DROP_ERR(err);
75 } 73 }
@@ -80,18 +78,17 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
80 78
81/* The work comes in here from netfilter.c. */ 79/* The work comes in here from netfilter.c. */
82static unsigned int 80static unsigned int
83iptable_mangle_hook(const struct nf_hook_ops *ops, 81iptable_mangle_hook(void *priv,
84 struct sk_buff *skb, 82 struct sk_buff *skb,
85 const struct nf_hook_state *state) 83 const struct nf_hook_state *state)
86{ 84{
87 if (ops->hooknum == NF_INET_LOCAL_OUT) 85 if (state->hook == NF_INET_LOCAL_OUT)
88 return ipt_mangle_out(skb, state); 86 return ipt_mangle_out(skb, state);
89 if (ops->hooknum == NF_INET_POST_ROUTING) 87 if (state->hook == NF_INET_POST_ROUTING)
90 return ipt_do_table(skb, ops->hooknum, state, 88 return ipt_do_table(skb, state,
91 dev_net(state->out)->ipv4.iptable_mangle); 89 state->net->ipv4.iptable_mangle);
92 /* PREROUTING/INPUT/FORWARD: */ 90 /* PREROUTING/INPUT/FORWARD: */
93 return ipt_do_table(skb, ops->hooknum, state, 91 return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
94 dev_net(state->in)->ipv4.iptable_mangle);
95} 92}
96 93
97static struct nf_hook_ops *mangle_ops __read_mostly; 94static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 0d4d9cdf98a4..ae2cd2752046 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -28,49 +28,46 @@ static const struct xt_table nf_nat_ipv4_table = {
28 .af = NFPROTO_IPV4, 28 .af = NFPROTO_IPV4,
29}; 29};
30 30
31static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, 31static unsigned int iptable_nat_do_chain(void *priv,
32 struct sk_buff *skb, 32 struct sk_buff *skb,
33 const struct nf_hook_state *state, 33 const struct nf_hook_state *state,
34 struct nf_conn *ct) 34 struct nf_conn *ct)
35{ 35{
36 struct net *net = nf_ct_net(ct); 36 return ipt_do_table(skb, state, state->net->ipv4.nat_table);
37
38 return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table);
39} 37}
40 38
41static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, 39static unsigned int iptable_nat_ipv4_fn(void *priv,
42 struct sk_buff *skb, 40 struct sk_buff *skb,
43 const struct nf_hook_state *state) 41 const struct nf_hook_state *state)
44{ 42{
45 return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain); 43 return nf_nat_ipv4_fn(priv, skb, state, iptable_nat_do_chain);
46} 44}
47 45
48static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops, 46static unsigned int iptable_nat_ipv4_in(void *priv,
49 struct sk_buff *skb, 47 struct sk_buff *skb,
50 const struct nf_hook_state *state) 48 const struct nf_hook_state *state)
51{ 49{
52 return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain); 50 return nf_nat_ipv4_in(priv, skb, state, iptable_nat_do_chain);
53} 51}
54 52
55static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops, 53static unsigned int iptable_nat_ipv4_out(void *priv,
56 struct sk_buff *skb, 54 struct sk_buff *skb,
57 const struct nf_hook_state *state) 55 const struct nf_hook_state *state)
58{ 56{
59 return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain); 57 return nf_nat_ipv4_out(priv, skb, state, iptable_nat_do_chain);
60} 58}
61 59
62static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops, 60static unsigned int iptable_nat_ipv4_local_fn(void *priv,
63 struct sk_buff *skb, 61 struct sk_buff *skb,
64 const struct nf_hook_state *state) 62 const struct nf_hook_state *state)
65{ 63{
66 return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain); 64 return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
67} 65}
68 66
69static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { 67static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
70 /* Before packet filtering, change destination */ 68 /* Before packet filtering, change destination */
71 { 69 {
72 .hook = iptable_nat_ipv4_in, 70 .hook = iptable_nat_ipv4_in,
73 .owner = THIS_MODULE,
74 .pf = NFPROTO_IPV4, 71 .pf = NFPROTO_IPV4,
75 .hooknum = NF_INET_PRE_ROUTING, 72 .hooknum = NF_INET_PRE_ROUTING,
76 .priority = NF_IP_PRI_NAT_DST, 73 .priority = NF_IP_PRI_NAT_DST,
@@ -78,7 +75,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
78 /* After packet filtering, change source */ 75 /* After packet filtering, change source */
79 { 76 {
80 .hook = iptable_nat_ipv4_out, 77 .hook = iptable_nat_ipv4_out,
81 .owner = THIS_MODULE,
82 .pf = NFPROTO_IPV4, 78 .pf = NFPROTO_IPV4,
83 .hooknum = NF_INET_POST_ROUTING, 79 .hooknum = NF_INET_POST_ROUTING,
84 .priority = NF_IP_PRI_NAT_SRC, 80 .priority = NF_IP_PRI_NAT_SRC,
@@ -86,7 +82,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
86 /* Before packet filtering, change destination */ 82 /* Before packet filtering, change destination */
87 { 83 {
88 .hook = iptable_nat_ipv4_local_fn, 84 .hook = iptable_nat_ipv4_local_fn,
89 .owner = THIS_MODULE,
90 .pf = NFPROTO_IPV4, 85 .pf = NFPROTO_IPV4,
91 .hooknum = NF_INET_LOCAL_OUT, 86 .hooknum = NF_INET_LOCAL_OUT,
92 .priority = NF_IP_PRI_NAT_DST, 87 .priority = NF_IP_PRI_NAT_DST,
@@ -94,7 +89,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
94 /* After packet filtering, change source */ 89 /* After packet filtering, change source */
95 { 90 {
96 .hook = iptable_nat_ipv4_fn, 91 .hook = iptable_nat_ipv4_fn,
97 .owner = THIS_MODULE,
98 .pf = NFPROTO_IPV4, 92 .pf = NFPROTO_IPV4,
99 .hooknum = NF_INET_LOCAL_IN, 93 .hooknum = NF_INET_LOCAL_IN,
100 .priority = NF_IP_PRI_NAT_SRC, 94 .priority = NF_IP_PRI_NAT_SRC,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 0356e6da4bb7..1ba02811acb0 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,19 +20,16 @@ static const struct xt_table packet_raw = {
20 20
21/* The work comes in here from netfilter.c. */ 21/* The work comes in here from netfilter.c. */
22static unsigned int 22static unsigned int
23iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 23iptable_raw_hook(void *priv, struct sk_buff *skb,
24 const struct nf_hook_state *state) 24 const struct nf_hook_state *state)
25{ 25{
26 const struct net *net; 26 if (state->hook == NF_INET_LOCAL_OUT &&
27
28 if (ops->hooknum == NF_INET_LOCAL_OUT &&
29 (skb->len < sizeof(struct iphdr) || 27 (skb->len < sizeof(struct iphdr) ||
30 ip_hdrlen(skb) < sizeof(struct iphdr))) 28 ip_hdrlen(skb) < sizeof(struct iphdr)))
31 /* root is playing with raw sockets. */ 29 /* root is playing with raw sockets. */
32 return NF_ACCEPT; 30 return NF_ACCEPT;
33 31
34 net = dev_net(state->in ? state->in : state->out); 32 return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
35 return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw);
36} 33}
37 34
38static struct nf_hook_ops *rawtable_ops __read_mostly; 35static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 4bce3980ccd9..c2e23d5e9cd4 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,20 +37,16 @@ static const struct xt_table security_table = {
37}; 37};
38 38
39static unsigned int 39static unsigned int
40iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 40iptable_security_hook(void *priv, struct sk_buff *skb,
41 const struct nf_hook_state *state) 41 const struct nf_hook_state *state)
42{ 42{
43 const struct net *net; 43 if (state->hook == NF_INET_LOCAL_OUT &&
44
45 if (ops->hooknum == NF_INET_LOCAL_OUT &&
46 (skb->len < sizeof(struct iphdr) || 44 (skb->len < sizeof(struct iphdr) ||
47 ip_hdrlen(skb) < sizeof(struct iphdr))) 45 ip_hdrlen(skb) < sizeof(struct iphdr)))
48 /* Somebody is playing with raw sockets. */ 46 /* Somebody is playing with raw sockets. */
49 return NF_ACCEPT; 47 return NF_ACCEPT;
50 48
51 net = dev_net(state->in ? state->in : state->out); 49 return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
52 return ipt_do_table(skb, ops->hooknum, state,
53 net->ipv4.iptable_security);
54} 50}
55 51
56static struct nf_hook_ops *sectbl_ops __read_mostly; 52static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -83,7 +79,7 @@ static int __init iptable_security_init(void)
83 int ret; 79 int ret;
84 80
85 ret = register_pernet_subsys(&iptable_security_net_ops); 81 ret = register_pernet_subsys(&iptable_security_net_ops);
86 if (ret < 0) 82 if (ret < 0)
87 return ret; 83 return ret;
88 84
89 sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); 85 sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8a2caaf3940b..461ca926fd39 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
92 return NF_ACCEPT; 92 return NF_ACCEPT;
93} 93}
94 94
95static unsigned int ipv4_helper(const struct nf_hook_ops *ops, 95static unsigned int ipv4_helper(void *priv,
96 struct sk_buff *skb, 96 struct sk_buff *skb,
97 const struct nf_hook_state *state) 97 const struct nf_hook_state *state)
98{ 98{
@@ -119,7 +119,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
119 ct, ctinfo); 119 ct, ctinfo);
120} 120}
121 121
122static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, 122static unsigned int ipv4_confirm(void *priv,
123 struct sk_buff *skb, 123 struct sk_buff *skb,
124 const struct nf_hook_state *state) 124 const struct nf_hook_state *state)
125{ 125{
@@ -143,14 +143,14 @@ out:
143 return nf_conntrack_confirm(skb); 143 return nf_conntrack_confirm(skb);
144} 144}
145 145
146static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, 146static unsigned int ipv4_conntrack_in(void *priv,
147 struct sk_buff *skb, 147 struct sk_buff *skb,
148 const struct nf_hook_state *state) 148 const struct nf_hook_state *state)
149{ 149{
150 return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb); 150 return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
151} 151}
152 152
153static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, 153static unsigned int ipv4_conntrack_local(void *priv,
154 struct sk_buff *skb, 154 struct sk_buff *skb,
155 const struct nf_hook_state *state) 155 const struct nf_hook_state *state)
156{ 156{
@@ -158,7 +158,7 @@ static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
158 if (skb->len < sizeof(struct iphdr) || 158 if (skb->len < sizeof(struct iphdr) ||
159 ip_hdrlen(skb) < sizeof(struct iphdr)) 159 ip_hdrlen(skb) < sizeof(struct iphdr))
160 return NF_ACCEPT; 160 return NF_ACCEPT;
161 return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb); 161 return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
162} 162}
163 163
164/* Connection tracking may drop packets, but never alters them, so 164/* Connection tracking may drop packets, but never alters them, so
@@ -166,42 +166,36 @@ static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
166static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { 166static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
167 { 167 {
168 .hook = ipv4_conntrack_in, 168 .hook = ipv4_conntrack_in,
169 .owner = THIS_MODULE,
170 .pf = NFPROTO_IPV4, 169 .pf = NFPROTO_IPV4,
171 .hooknum = NF_INET_PRE_ROUTING, 170 .hooknum = NF_INET_PRE_ROUTING,
172 .priority = NF_IP_PRI_CONNTRACK, 171 .priority = NF_IP_PRI_CONNTRACK,
173 }, 172 },
174 { 173 {
175 .hook = ipv4_conntrack_local, 174 .hook = ipv4_conntrack_local,
176 .owner = THIS_MODULE,
177 .pf = NFPROTO_IPV4, 175 .pf = NFPROTO_IPV4,
178 .hooknum = NF_INET_LOCAL_OUT, 176 .hooknum = NF_INET_LOCAL_OUT,
179 .priority = NF_IP_PRI_CONNTRACK, 177 .priority = NF_IP_PRI_CONNTRACK,
180 }, 178 },
181 { 179 {
182 .hook = ipv4_helper, 180 .hook = ipv4_helper,
183 .owner = THIS_MODULE,
184 .pf = NFPROTO_IPV4, 181 .pf = NFPROTO_IPV4,
185 .hooknum = NF_INET_POST_ROUTING, 182 .hooknum = NF_INET_POST_ROUTING,
186 .priority = NF_IP_PRI_CONNTRACK_HELPER, 183 .priority = NF_IP_PRI_CONNTRACK_HELPER,
187 }, 184 },
188 { 185 {
189 .hook = ipv4_confirm, 186 .hook = ipv4_confirm,
190 .owner = THIS_MODULE,
191 .pf = NFPROTO_IPV4, 187 .pf = NFPROTO_IPV4,
192 .hooknum = NF_INET_POST_ROUTING, 188 .hooknum = NF_INET_POST_ROUTING,
193 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 189 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
194 }, 190 },
195 { 191 {
196 .hook = ipv4_helper, 192 .hook = ipv4_helper,
197 .owner = THIS_MODULE,
198 .pf = NFPROTO_IPV4, 193 .pf = NFPROTO_IPV4,
199 .hooknum = NF_INET_LOCAL_IN, 194 .hooknum = NF_INET_LOCAL_IN,
200 .priority = NF_IP_PRI_CONNTRACK_HELPER, 195 .priority = NF_IP_PRI_CONNTRACK_HELPER,
201 }, 196 },
202 { 197 {
203 .hook = ipv4_confirm, 198 .hook = ipv4_confirm,
204 .owner = THIS_MODULE,
205 .pf = NFPROTO_IPV4, 199 .pf = NFPROTO_IPV4,
206 .hooknum = NF_INET_LOCAL_IN, 200 .hooknum = NF_INET_LOCAL_IN,
207 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 201 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index cdde3ec496e9..c567e1b5d799 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -30,7 +30,7 @@ static inline struct nf_icmp_net *icmp_pernet(struct net *net)
30} 30}
31 31
32static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 32static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
33 struct nf_conntrack_tuple *tuple) 33 struct net *net, struct nf_conntrack_tuple *tuple)
34{ 34{
35 const struct icmphdr *hp; 35 const struct icmphdr *hp;
36 struct icmphdr _hdr; 36 struct icmphdr _hdr;
@@ -144,7 +144,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
144 if (!nf_ct_get_tuplepr(skb, 144 if (!nf_ct_get_tuplepr(skb,
145 skb_network_offset(skb) + ip_hdrlen(skb) 145 skb_network_offset(skb) + ip_hdrlen(skb)
146 + sizeof(struct icmphdr), 146 + sizeof(struct icmphdr),
147 PF_INET, &origtuple)) { 147 PF_INET, net, &origtuple)) {
148 pr_debug("icmp_error_message: failed to get tuple\n"); 148 pr_debug("icmp_error_message: failed to get tuple\n");
149 return -NF_ACCEPT; 149 return -NF_ACCEPT;
150 } 150 }
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 9306ec4fab41..6fb869f646bf 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,14 +22,15 @@
22#endif 22#endif
23#include <net/netfilter/nf_conntrack_zones.h> 23#include <net/netfilter/nf_conntrack_zones.h>
24 24
25static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) 25static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
26 u_int32_t user)
26{ 27{
27 int err; 28 int err;
28 29
29 skb_orphan(skb); 30 skb_orphan(skb);
30 31
31 local_bh_disable(); 32 local_bh_disable();
32 err = ip_defrag(skb, user); 33 err = ip_defrag(net, skb, user);
33 local_bh_enable(); 34 local_bh_enable();
34 35
35 if (!err) { 36 if (!err) {
@@ -61,15 +62,14 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
61 return IP_DEFRAG_CONNTRACK_OUT + zone_id; 62 return IP_DEFRAG_CONNTRACK_OUT + zone_id;
62} 63}
63 64
64static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, 65static unsigned int ipv4_conntrack_defrag(void *priv,
65 struct sk_buff *skb, 66 struct sk_buff *skb,
66 const struct nf_hook_state *state) 67 const struct nf_hook_state *state)
67{ 68{
68 struct sock *sk = skb->sk; 69 struct sock *sk = skb->sk;
69 struct inet_sock *inet = inet_sk(skb->sk);
70 70
71 if (sk && (sk->sk_family == PF_INET) && 71 if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
72 inet->nodefrag) 72 inet_sk(sk)->nodefrag)
73 return NF_ACCEPT; 73 return NF_ACCEPT;
74 74
75#if IS_ENABLED(CONFIG_NF_CONNTRACK) 75#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -83,9 +83,9 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
83 /* Gather fragments. */ 83 /* Gather fragments. */
84 if (ip_is_fragment(ip_hdr(skb))) { 84 if (ip_is_fragment(ip_hdr(skb))) {
85 enum ip_defrag_users user = 85 enum ip_defrag_users user =
86 nf_ct_defrag_user(ops->hooknum, skb); 86 nf_ct_defrag_user(state->hook, skb);
87 87
88 if (nf_ct_ipv4_gather_frags(skb, user)) 88 if (nf_ct_ipv4_gather_frags(state->net, skb, user))
89 return NF_STOLEN; 89 return NF_STOLEN;
90 } 90 }
91 return NF_ACCEPT; 91 return NF_ACCEPT;
@@ -94,14 +94,12 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
94static struct nf_hook_ops ipv4_defrag_ops[] = { 94static struct nf_hook_ops ipv4_defrag_ops[] = {
95 { 95 {
96 .hook = ipv4_conntrack_defrag, 96 .hook = ipv4_conntrack_defrag,
97 .owner = THIS_MODULE,
98 .pf = NFPROTO_IPV4, 97 .pf = NFPROTO_IPV4,
99 .hooknum = NF_INET_PRE_ROUTING, 98 .hooknum = NF_INET_PRE_ROUTING,
100 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 99 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
101 }, 100 },
102 { 101 {
103 .hook = ipv4_conntrack_defrag, 102 .hook = ipv4_conntrack_defrag,
104 .owner = THIS_MODULE,
105 .pf = NFPROTO_IPV4, 103 .pf = NFPROTO_IPV4,
106 .hooknum = NF_INET_LOCAL_OUT, 104 .hooknum = NF_INET_LOCAL_OUT,
107 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 105 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 2d79e6e8d934..ceb187308120 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -23,25 +23,10 @@
23#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
24#endif 24#endif
25 25
26static struct net *pick_net(struct sk_buff *skb) 26static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
27{ 27 const struct in_addr *gw, int oif)
28#ifdef CONFIG_NET_NS
29 const struct dst_entry *dst;
30
31 if (skb->dev != NULL)
32 return dev_net(skb->dev);
33 dst = skb_dst(skb);
34 if (dst != NULL && dst->dev != NULL)
35 return dev_net(dst->dev);
36#endif
37 return &init_net;
38}
39
40static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
41 int oif)
42{ 28{
43 const struct iphdr *iph = ip_hdr(skb); 29 const struct iphdr *iph = ip_hdr(skb);
44 struct net *net = pick_net(skb);
45 struct rtable *rt; 30 struct rtable *rt;
46 struct flowi4 fl4; 31 struct flowi4 fl4;
47 32
@@ -65,7 +50,7 @@ static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
65 return true; 50 return true;
66} 51}
67 52
68void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, 53void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
69 const struct in_addr *gw, int oif) 54 const struct in_addr *gw, int oif)
70{ 55{
71 struct iphdr *iph; 56 struct iphdr *iph;
@@ -105,9 +90,9 @@ void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
105 --iph->ttl; 90 --iph->ttl;
106 ip_send_check(iph); 91 ip_send_check(iph);
107 92
108 if (nf_dup_ipv4_route(skb, gw, oif)) { 93 if (nf_dup_ipv4_route(net, skb, gw, oif)) {
109 __this_cpu_write(nf_skb_duplicated, true); 94 __this_cpu_write(nf_skb_duplicated, true);
110 ip_local_out(skb); 95 ip_local_out(net, skb->sk, skb);
111 __this_cpu_write(nf_skb_duplicated, false); 96 __this_cpu_write(nf_skb_duplicated, false);
112 } else { 97 } else {
113 kfree_skb(skb); 98 kfree_skb(skb);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 22f4579b0c2a..5075b7ecd26d 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,9 +255,9 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
255EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 255EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
256 256
257unsigned int 257unsigned int
258nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 258nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
259 const struct nf_hook_state *state, 259 const struct nf_hook_state *state,
260 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 260 unsigned int (*do_chain)(void *priv,
261 struct sk_buff *skb, 261 struct sk_buff *skb,
262 const struct nf_hook_state *state, 262 const struct nf_hook_state *state,
263 struct nf_conn *ct)) 263 struct nf_conn *ct))
@@ -266,7 +266,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
266 enum ip_conntrack_info ctinfo; 266 enum ip_conntrack_info ctinfo;
267 struct nf_conn_nat *nat; 267 struct nf_conn_nat *nat;
268 /* maniptype == SRC for postrouting. */ 268 /* maniptype == SRC for postrouting. */
269 enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); 269 enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
270 270
271 /* We never see fragments: conntrack defrags on pre-routing 271 /* We never see fragments: conntrack defrags on pre-routing
272 * and local-out, and nf_nat_out protects post-routing. 272 * and local-out, and nf_nat_out protects post-routing.
@@ -295,7 +295,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
295 case IP_CT_RELATED_REPLY: 295 case IP_CT_RELATED_REPLY:
296 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { 296 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
297 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 297 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
298 ops->hooknum)) 298 state->hook))
299 return NF_DROP; 299 return NF_DROP;
300 else 300 else
301 return NF_ACCEPT; 301 return NF_ACCEPT;
@@ -308,21 +308,21 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
308 if (!nf_nat_initialized(ct, maniptype)) { 308 if (!nf_nat_initialized(ct, maniptype)) {
309 unsigned int ret; 309 unsigned int ret;
310 310
311 ret = do_chain(ops, skb, state, ct); 311 ret = do_chain(priv, skb, state, ct);
312 if (ret != NF_ACCEPT) 312 if (ret != NF_ACCEPT)
313 return ret; 313 return ret;
314 314
315 if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) 315 if (nf_nat_initialized(ct, HOOK2MANIP(state->hook)))
316 break; 316 break;
317 317
318 ret = nf_nat_alloc_null_binding(ct, ops->hooknum); 318 ret = nf_nat_alloc_null_binding(ct, state->hook);
319 if (ret != NF_ACCEPT) 319 if (ret != NF_ACCEPT)
320 return ret; 320 return ret;
321 } else { 321 } else {
322 pr_debug("Already setup manip %s for ct %p\n", 322 pr_debug("Already setup manip %s for ct %p\n",
323 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 323 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
324 ct); 324 ct);
325 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, 325 if (nf_nat_oif_changed(state->hook, ctinfo, nat,
326 state->out)) 326 state->out))
327 goto oif_changed; 327 goto oif_changed;
328 } 328 }
@@ -332,11 +332,11 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
332 /* ESTABLISHED */ 332 /* ESTABLISHED */
333 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 333 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
334 ctinfo == IP_CT_ESTABLISHED_REPLY); 334 ctinfo == IP_CT_ESTABLISHED_REPLY);
335 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) 335 if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
336 goto oif_changed; 336 goto oif_changed;
337 } 337 }
338 338
339 return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); 339 return nf_nat_packet(ct, ctinfo, state->hook, skb);
340 340
341oif_changed: 341oif_changed:
342 nf_ct_kill_acct(ct, ctinfo, skb); 342 nf_ct_kill_acct(ct, ctinfo, skb);
@@ -345,9 +345,9 @@ oif_changed:
345EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn); 345EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
346 346
347unsigned int 347unsigned int
348nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, 348nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
349 const struct nf_hook_state *state, 349 const struct nf_hook_state *state,
350 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 350 unsigned int (*do_chain)(void *priv,
351 struct sk_buff *skb, 351 struct sk_buff *skb,
352 const struct nf_hook_state *state, 352 const struct nf_hook_state *state,
353 struct nf_conn *ct)) 353 struct nf_conn *ct))
@@ -355,7 +355,7 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
355 unsigned int ret; 355 unsigned int ret;
356 __be32 daddr = ip_hdr(skb)->daddr; 356 __be32 daddr = ip_hdr(skb)->daddr;
357 357
358 ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); 358 ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
359 if (ret != NF_DROP && ret != NF_STOLEN && 359 if (ret != NF_DROP && ret != NF_STOLEN &&
360 daddr != ip_hdr(skb)->daddr) 360 daddr != ip_hdr(skb)->daddr)
361 skb_dst_drop(skb); 361 skb_dst_drop(skb);
@@ -365,9 +365,9 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
365EXPORT_SYMBOL_GPL(nf_nat_ipv4_in); 365EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
366 366
367unsigned int 367unsigned int
368nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, 368nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
369 const struct nf_hook_state *state, 369 const struct nf_hook_state *state,
370 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 370 unsigned int (*do_chain)(void *priv,
371 struct sk_buff *skb, 371 struct sk_buff *skb,
372 const struct nf_hook_state *state, 372 const struct nf_hook_state *state,
373 struct nf_conn *ct)) 373 struct nf_conn *ct))
@@ -384,7 +384,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
384 ip_hdrlen(skb) < sizeof(struct iphdr)) 384 ip_hdrlen(skb) < sizeof(struct iphdr))
385 return NF_ACCEPT; 385 return NF_ACCEPT;
386 386
387 ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); 387 ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
388#ifdef CONFIG_XFRM 388#ifdef CONFIG_XFRM
389 if (ret != NF_DROP && ret != NF_STOLEN && 389 if (ret != NF_DROP && ret != NF_STOLEN &&
390 !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 390 !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -396,7 +396,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
396 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 396 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
397 ct->tuplehash[dir].tuple.src.u.all != 397 ct->tuplehash[dir].tuple.src.u.all !=
398 ct->tuplehash[!dir].tuple.dst.u.all)) { 398 ct->tuplehash[!dir].tuple.dst.u.all)) {
399 err = nf_xfrm_me_harder(skb, AF_INET); 399 err = nf_xfrm_me_harder(state->net, skb, AF_INET);
400 if (err < 0) 400 if (err < 0)
401 ret = NF_DROP_ERR(err); 401 ret = NF_DROP_ERR(err);
402 } 402 }
@@ -407,9 +407,9 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
407EXPORT_SYMBOL_GPL(nf_nat_ipv4_out); 407EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
408 408
409unsigned int 409unsigned int
410nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 410nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
411 const struct nf_hook_state *state, 411 const struct nf_hook_state *state,
412 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 412 unsigned int (*do_chain)(void *priv,
413 struct sk_buff *skb, 413 struct sk_buff *skb,
414 const struct nf_hook_state *state, 414 const struct nf_hook_state *state,
415 struct nf_conn *ct)) 415 struct nf_conn *ct))
@@ -424,14 +424,14 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
424 ip_hdrlen(skb) < sizeof(struct iphdr)) 424 ip_hdrlen(skb) < sizeof(struct iphdr))
425 return NF_ACCEPT; 425 return NF_ACCEPT;
426 426
427 ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); 427 ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
428 if (ret != NF_DROP && ret != NF_STOLEN && 428 if (ret != NF_DROP && ret != NF_STOLEN &&
429 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 429 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
430 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 430 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
431 431
432 if (ct->tuplehash[dir].tuple.dst.u3.ip != 432 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
433 ct->tuplehash[!dir].tuple.src.u3.ip) { 433 ct->tuplehash[!dir].tuple.src.u3.ip) {
434 err = ip_route_me_harder(skb, RTN_UNSPEC); 434 err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
435 if (err < 0) 435 if (err < 0)
436 ret = NF_DROP_ERR(err); 436 ret = NF_DROP_ERR(err);
437 } 437 }
@@ -440,7 +440,7 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
440 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 440 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
441 ct->tuplehash[dir].tuple.dst.u.all != 441 ct->tuplehash[dir].tuple.dst.u.all !=
442 ct->tuplehash[!dir].tuple.src.u.all) { 442 ct->tuplehash[!dir].tuple.src.u.all) {
443 err = nf_xfrm_me_harder(skb, AF_INET); 443 err = nf_xfrm_me_harder(state->net, skb, AF_INET);
444 if (err < 0) 444 if (err < 0)
445 ret = NF_DROP_ERR(err); 445 ret = NF_DROP_ERR(err);
446 } 446 }
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 657d2307f031..b3ca21b2ba9b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -45,7 +45,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
45 struct net *net = nf_ct_net(ct); 45 struct net *net = nf_ct_net(ct);
46 const struct nf_conn *master = ct->master; 46 const struct nf_conn *master = ct->master;
47 struct nf_conntrack_expect *other_exp; 47 struct nf_conntrack_expect *other_exp;
48 struct nf_conntrack_tuple t; 48 struct nf_conntrack_tuple t = {};
49 const struct nf_ct_pptp_master *ct_pptp_info; 49 const struct nf_ct_pptp_master *ct_pptp_info;
50 const struct nf_nat_pptp *nat_pptp_info; 50 const struct nf_nat_pptp *nat_pptp_info;
51 struct nf_nat_range range; 51 struct nf_nat_range range;
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 7c676671329d..ddb894ac1458 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1156,7 +1156,7 @@ static int snmp_parse_mangle(unsigned char *msg,
1156 } 1156 }
1157 1157
1158 if (obj->type == SNMP_IPADDR) 1158 if (obj->type == SNMP_IPADDR)
1159 mangle_address(ctx.begin, ctx.pointer - 4 , map, check); 1159 mangle_address(ctx.begin, ctx.pointer - 4, map, check);
1160 1160
1161 kfree(obj->id); 1161 kfree(obj->id);
1162 kfree(obj); 1162 kfree(obj);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 3262e41ff76f..c747b2d9eb77 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -99,7 +99,7 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
99EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); 99EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
100 100
101/* Send RST reply */ 101/* Send RST reply */
102void nf_send_reset(struct sk_buff *oldskb, int hook) 102void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
103{ 103{
104 struct sk_buff *nskb; 104 struct sk_buff *nskb;
105 const struct iphdr *oiph; 105 const struct iphdr *oiph;
@@ -129,7 +129,7 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
129 ip4_dst_hoplimit(skb_dst(nskb))); 129 ip4_dst_hoplimit(skb_dst(nskb)));
130 nf_reject_ip_tcphdr_put(nskb, oldskb, oth); 130 nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
131 131
132 if (ip_route_me_harder(nskb, RTN_UNSPEC)) 132 if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
133 goto free_nskb; 133 goto free_nskb;
134 134
135 /* "Never happens" */ 135 /* "Never happens" */
@@ -157,7 +157,7 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
157 dev_queue_xmit(nskb); 157 dev_queue_xmit(nskb);
158 } else 158 } else
159#endif 159#endif
160 ip_local_out(nskb); 160 ip_local_out(net, nskb->sk, nskb);
161 161
162 return; 162 return;
163 163
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 8412268bbad1..9d09d4f59545 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -15,15 +15,15 @@
15#include <net/netfilter/nf_tables.h> 15#include <net/netfilter/nf_tables.h>
16 16
17static unsigned int 17static unsigned int
18nft_do_chain_arp(const struct nf_hook_ops *ops, 18nft_do_chain_arp(void *priv,
19 struct sk_buff *skb, 19 struct sk_buff *skb,
20 const struct nf_hook_state *state) 20 const struct nf_hook_state *state)
21{ 21{
22 struct nft_pktinfo pkt; 22 struct nft_pktinfo pkt;
23 23
24 nft_set_pktinfo(&pkt, ops, skb, state); 24 nft_set_pktinfo(&pkt, skb, state);
25 25
26 return nft_do_chain(&pkt, ops); 26 return nft_do_chain(&pkt, priv);
27} 27}
28 28
29static struct nft_af_info nft_af_arp __read_mostly = { 29static struct nft_af_info nft_af_arp __read_mostly = {
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index aa180d3a69a5..ca9dc3c46c4f 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -18,18 +18,18 @@
18#include <net/ip.h> 18#include <net/ip.h>
19#include <net/netfilter/nf_tables_ipv4.h> 19#include <net/netfilter/nf_tables_ipv4.h>
20 20
21static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, 21static unsigned int nft_do_chain_ipv4(void *priv,
22 struct sk_buff *skb, 22 struct sk_buff *skb,
23 const struct nf_hook_state *state) 23 const struct nf_hook_state *state)
24{ 24{
25 struct nft_pktinfo pkt; 25 struct nft_pktinfo pkt;
26 26
27 nft_set_pktinfo_ipv4(&pkt, ops, skb, state); 27 nft_set_pktinfo_ipv4(&pkt, skb, state);
28 28
29 return nft_do_chain(&pkt, ops); 29 return nft_do_chain(&pkt, priv);
30} 30}
31 31
32static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, 32static unsigned int nft_ipv4_output(void *priv,
33 struct sk_buff *skb, 33 struct sk_buff *skb,
34 const struct nf_hook_state *state) 34 const struct nf_hook_state *state)
35{ 35{
@@ -41,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
41 return NF_ACCEPT; 41 return NF_ACCEPT;
42 } 42 }
43 43
44 return nft_do_chain_ipv4(ops, skb, state); 44 return nft_do_chain_ipv4(priv, skb, state);
45} 45}
46 46
47struct nft_af_info nft_af_ipv4 __read_mostly = { 47struct nft_af_info nft_af_ipv4 __read_mostly = {
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index bf5c30ae14e4..f5c66a7a4bf2 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -26,44 +26,44 @@
26#include <net/netfilter/nf_nat_l3proto.h> 26#include <net/netfilter/nf_nat_l3proto.h>
27#include <net/ip.h> 27#include <net/ip.h>
28 28
29static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, 29static unsigned int nft_nat_do_chain(void *priv,
30 struct sk_buff *skb, 30 struct sk_buff *skb,
31 const struct nf_hook_state *state, 31 const struct nf_hook_state *state,
32 struct nf_conn *ct) 32 struct nf_conn *ct)
33{ 33{
34 struct nft_pktinfo pkt; 34 struct nft_pktinfo pkt;
35 35
36 nft_set_pktinfo_ipv4(&pkt, ops, skb, state); 36 nft_set_pktinfo_ipv4(&pkt, skb, state);
37 37
38 return nft_do_chain(&pkt, ops); 38 return nft_do_chain(&pkt, priv);
39} 39}
40 40
41static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops, 41static unsigned int nft_nat_ipv4_fn(void *priv,
42 struct sk_buff *skb, 42 struct sk_buff *skb,
43 const struct nf_hook_state *state) 43 const struct nf_hook_state *state)
44{ 44{
45 return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain); 45 return nf_nat_ipv4_fn(priv, skb, state, nft_nat_do_chain);
46} 46}
47 47
48static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops, 48static unsigned int nft_nat_ipv4_in(void *priv,
49 struct sk_buff *skb, 49 struct sk_buff *skb,
50 const struct nf_hook_state *state) 50 const struct nf_hook_state *state)
51{ 51{
52 return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain); 52 return nf_nat_ipv4_in(priv, skb, state, nft_nat_do_chain);
53} 53}
54 54
55static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops, 55static unsigned int nft_nat_ipv4_out(void *priv,
56 struct sk_buff *skb, 56 struct sk_buff *skb,
57 const struct nf_hook_state *state) 57 const struct nf_hook_state *state)
58{ 58{
59 return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain); 59 return nf_nat_ipv4_out(priv, skb, state, nft_nat_do_chain);
60} 60}
61 61
62static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops, 62static unsigned int nft_nat_ipv4_local_fn(void *priv,
63 struct sk_buff *skb, 63 struct sk_buff *skb,
64 const struct nf_hook_state *state) 64 const struct nf_hook_state *state)
65{ 65{
66 return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain); 66 return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
67} 67}
68 68
69static const struct nf_chain_type nft_chain_nat_ipv4 = { 69static const struct nf_chain_type nft_chain_nat_ipv4 = {
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index e335b0afdaf3..2375b0a8be46 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -21,7 +21,7 @@
21#include <net/route.h> 21#include <net/route.h>
22#include <net/ip.h> 22#include <net/ip.h>
23 23
24static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, 24static unsigned int nf_route_table_hook(void *priv,
25 struct sk_buff *skb, 25 struct sk_buff *skb,
26 const struct nf_hook_state *state) 26 const struct nf_hook_state *state)
27{ 27{
@@ -37,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
37 ip_hdrlen(skb) < sizeof(struct iphdr)) 37 ip_hdrlen(skb) < sizeof(struct iphdr))
38 return NF_ACCEPT; 38 return NF_ACCEPT;
39 39
40 nft_set_pktinfo_ipv4(&pkt, ops, skb, state); 40 nft_set_pktinfo_ipv4(&pkt, skb, state);
41 41
42 mark = skb->mark; 42 mark = skb->mark;
43 iph = ip_hdr(skb); 43 iph = ip_hdr(skb);
@@ -45,7 +45,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
45 daddr = iph->daddr; 45 daddr = iph->daddr;
46 tos = iph->tos; 46 tos = iph->tos;
47 47
48 ret = nft_do_chain(&pkt, ops); 48 ret = nft_do_chain(&pkt, priv);
49 if (ret != NF_DROP && ret != NF_QUEUE) { 49 if (ret != NF_DROP && ret != NF_QUEUE) {
50 iph = ip_hdr(skb); 50 iph = ip_hdr(skb);
51 51
@@ -53,7 +53,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
53 iph->daddr != daddr || 53 iph->daddr != daddr ||
54 skb->mark != mark || 54 skb->mark != mark ||
55 iph->tos != tos) 55 iph->tos != tos)
56 if (ip_route_me_harder(skb, RTN_UNSPEC)) 56 if (ip_route_me_harder(state->net, skb, RTN_UNSPEC))
57 ret = NF_DROP; 57 ret = NF_DROP;
58 } 58 }
59 return ret; 59 return ret;
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index b45932d43b69..bf855e64fc45 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
30 }; 30 };
31 int oif = regs->data[priv->sreg_dev]; 31 int oif = regs->data[priv->sreg_dev];
32 32
33 nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif); 33 nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
34} 34}
35 35
36static int nft_dup_ipv4_init(const struct nft_ctx *ctx, 36static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 40e414c4ca56..b72ffc58e255 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -26,7 +26,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
26 memset(&range, 0, sizeof(range)); 26 memset(&range, 0, sizeof(range));
27 range.flags = priv->flags; 27 range.flags = priv->flags;
28 28
29 regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, 29 regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook,
30 &range, pkt->out); 30 &range, pkt->out);
31} 31}
32 32
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index d8d795df9c13..c09d4381427e 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -36,7 +36,7 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
36 mr.range[0].flags |= priv->flags; 36 mr.range[0].flags |= priv->flags;
37 37
38 regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, 38 regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr,
39 pkt->ops->hooknum); 39 pkt->hook);
40} 40}
41 41
42static struct nft_expr_type nft_redir_ipv4_type; 42static struct nft_expr_type nft_redir_ipv4_type;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index b07e58b51158..c24f41c816b3 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,11 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
27 27
28 switch (priv->type) { 28 switch (priv->type) {
29 case NFT_REJECT_ICMP_UNREACH: 29 case NFT_REJECT_ICMP_UNREACH:
30 nf_send_unreach(pkt->skb, priv->icmp_code, 30 nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook);
31 pkt->ops->hooknum);
32 break; 31 break;
33 case NFT_REJECT_TCP_RST: 32 case NFT_REJECT_TCP_RST:
34 nf_send_reset(pkt->skb, pkt->ops->hooknum); 33 nf_send_reset(pkt->net, pkt->skb, pkt->hook);
35 break; 34 break;
36 default: 35 default:
37 break; 36 break;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 561cd4b8fc6e..63e5be0abd86 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,13 +406,16 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
406 ip_select_ident(net, skb, NULL); 406 ip_select_ident(net, skb, NULL);
407 407
408 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 408 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
409 skb->transport_header += iphlen;
410 if (iph->protocol == IPPROTO_ICMP &&
411 length >= iphlen + sizeof(struct icmphdr))
412 icmp_out_count(net, ((struct icmphdr *)
413 skb_transport_header(skb))->type);
409 } 414 }
410 if (iph->protocol == IPPROTO_ICMP)
411 icmp_out_count(net, ((struct icmphdr *)
412 skb_transport_header(skb))->type);
413 415
414 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, 416 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
415 NULL, rt->dst.dev, dst_output_sk); 417 net, sk, skb, NULL, rt->dst.dev,
418 dst_output);
416 if (err > 0) 419 if (err > 0)
417 err = net_xmit_errno(err); 420 err = net_xmit_errno(err);
418 if (err) 421 if (err)
@@ -483,6 +486,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
483static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 486static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
484{ 487{
485 struct inet_sock *inet = inet_sk(sk); 488 struct inet_sock *inet = inet_sk(sk);
489 struct net *net = sock_net(sk);
486 struct ipcm_cookie ipc; 490 struct ipcm_cookie ipc;
487 struct rtable *rt = NULL; 491 struct rtable *rt = NULL;
488 struct flowi4 fl4; 492 struct flowi4 fl4;
@@ -542,7 +546,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
542 ipc.oif = sk->sk_bound_dev_if; 546 ipc.oif = sk->sk_bound_dev_if;
543 547
544 if (msg->msg_controllen) { 548 if (msg->msg_controllen) {
545 err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); 549 err = ip_cmsg_send(net, msg, &ipc, false);
546 if (err) 550 if (err)
547 goto out; 551 goto out;
548 if (ipc.opt) 552 if (ipc.opt)
@@ -597,6 +601,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
597 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), 601 (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
598 daddr, saddr, 0, 0); 602 daddr, saddr, 0, 0);
599 603
604 if (!saddr && ipc.oif)
605 l3mdev_get_saddr(net, ipc.oif, &fl4);
606
600 if (!inet->hdrincl) { 607 if (!inet->hdrincl) {
601 rfv.msg = msg; 608 rfv.msg = msg;
602 rfv.hlen = 0; 609 rfv.hlen = 0;
@@ -607,7 +614,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
607 } 614 }
608 615
609 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 616 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
610 rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 617 rt = ip_route_output_flow(net, &fl4, sk);
611 if (IS_ERR(rt)) { 618 if (IS_ERR(rt)) {
612 err = PTR_ERR(rt); 619 err = PTR_ERR(rt);
613 rt = NULL; 620 rt = NULL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c81deb85acb4..85f184e429c6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,7 @@
112#endif 112#endif
113#include <net/secure_seq.h> 113#include <net/secure_seq.h>
114#include <net/ip_tunnels.h> 114#include <net/ip_tunnels.h>
115#include <net/vrf.h> 115#include <net/l3mdev.h>
116 116
117#define RT_FL_TOS(oldflp4) \ 117#define RT_FL_TOS(oldflp4) \
118 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) 118 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -847,7 +847,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
847 return; 847 return;
848 } 848 }
849 log_martians = IN_DEV_LOG_MARTIANS(in_dev); 849 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
850 vif = vrf_master_ifindex_rcu(rt->dst.dev); 850 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
851 rcu_read_unlock(); 851 rcu_read_unlock();
852 852
853 net = dev_net(rt->dst.dev); 853 net = dev_net(rt->dst.dev);
@@ -941,7 +941,7 @@ static int ip_error(struct sk_buff *skb)
941 } 941 }
942 942
943 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 943 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
944 vrf_master_ifindex(skb->dev), 1); 944 l3mdev_master_ifindex(skb->dev), 1);
945 945
946 send = true; 946 send = true;
947 if (peer) { 947 if (peer) {
@@ -1152,7 +1152,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1152 dst_set_expires(&rt->dst, 0); 1152 dst_set_expires(&rt->dst, 0);
1153} 1153}
1154 1154
1155static int ip_rt_bug(struct sock *sk, struct sk_buff *skb) 1155static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
1156{ 1156{
1157 pr_debug("%s: %pI4 -> %pI4, %s\n", 1157 pr_debug("%s: %pI4 -> %pI4, %s\n",
1158 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 1158 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -1438,12 +1438,34 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1438} 1438}
1439 1439
1440static struct rtable *rt_dst_alloc(struct net_device *dev, 1440static struct rtable *rt_dst_alloc(struct net_device *dev,
1441 unsigned int flags, u16 type,
1441 bool nopolicy, bool noxfrm, bool will_cache) 1442 bool nopolicy, bool noxfrm, bool will_cache)
1442{ 1443{
1443 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, 1444 struct rtable *rt;
1444 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | 1445
1445 (nopolicy ? DST_NOPOLICY : 0) | 1446 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1446 (noxfrm ? DST_NOXFRM : 0)); 1447 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
1448 (nopolicy ? DST_NOPOLICY : 0) |
1449 (noxfrm ? DST_NOXFRM : 0));
1450
1451 if (rt) {
1452 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1453 rt->rt_flags = flags;
1454 rt->rt_type = type;
1455 rt->rt_is_input = 0;
1456 rt->rt_iif = 0;
1457 rt->rt_pmtu = 0;
1458 rt->rt_gateway = 0;
1459 rt->rt_uses_gateway = 0;
1460 rt->rt_table_id = 0;
1461 INIT_LIST_HEAD(&rt->rt_uncached);
1462
1463 rt->dst.output = ip_output;
1464 if (flags & RTCF_LOCAL)
1465 rt->dst.input = ip_local_deliver;
1466 }
1467
1468 return rt;
1447} 1469}
1448 1470
1449/* called in rcu_read_lock() section */ 1471/* called in rcu_read_lock() section */
@@ -1452,6 +1474,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1452{ 1474{
1453 struct rtable *rth; 1475 struct rtable *rth;
1454 struct in_device *in_dev = __in_dev_get_rcu(dev); 1476 struct in_device *in_dev = __in_dev_get_rcu(dev);
1477 unsigned int flags = RTCF_MULTICAST;
1455 u32 itag = 0; 1478 u32 itag = 0;
1456 int err; 1479 int err;
1457 1480
@@ -1464,9 +1487,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1464 skb->protocol != htons(ETH_P_IP)) 1487 skb->protocol != htons(ETH_P_IP))
1465 goto e_inval; 1488 goto e_inval;
1466 1489
1467 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) 1490 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1468 if (ipv4_is_loopback(saddr)) 1491 goto e_inval;
1469 goto e_inval;
1470 1492
1471 if (ipv4_is_zeronet(saddr)) { 1493 if (ipv4_is_zeronet(saddr)) {
1472 if (!ipv4_is_local_multicast(daddr)) 1494 if (!ipv4_is_local_multicast(daddr))
@@ -1477,7 +1499,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1477 if (err < 0) 1499 if (err < 0)
1478 goto e_err; 1500 goto e_err;
1479 } 1501 }
1480 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, 1502 if (our)
1503 flags |= RTCF_LOCAL;
1504
1505 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1481 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); 1506 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1482 if (!rth) 1507 if (!rth)
1483 goto e_nobufs; 1508 goto e_nobufs;
@@ -1486,20 +1511,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1486 rth->dst.tclassid = itag; 1511 rth->dst.tclassid = itag;
1487#endif 1512#endif
1488 rth->dst.output = ip_rt_bug; 1513 rth->dst.output = ip_rt_bug;
1489
1490 rth->rt_genid = rt_genid_ipv4(dev_net(dev));
1491 rth->rt_flags = RTCF_MULTICAST;
1492 rth->rt_type = RTN_MULTICAST;
1493 rth->rt_is_input= 1; 1514 rth->rt_is_input= 1;
1494 rth->rt_iif = 0;
1495 rth->rt_pmtu = 0;
1496 rth->rt_gateway = 0;
1497 rth->rt_uses_gateway = 0;
1498 INIT_LIST_HEAD(&rth->rt_uncached);
1499 if (our) {
1500 rth->dst.input= ip_local_deliver;
1501 rth->rt_flags |= RTCF_LOCAL;
1502 }
1503 1515
1504#ifdef CONFIG_IP_MROUTE 1516#ifdef CONFIG_IP_MROUTE
1505 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 1517 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -1608,7 +1620,7 @@ static int __mkroute_input(struct sk_buff *skb,
1608 } 1620 }
1609 } 1621 }
1610 1622
1611 rth = rt_dst_alloc(out_dev->dev, 1623 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
1612 IN_DEV_CONF_GET(in_dev, NOPOLICY), 1624 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1613 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); 1625 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1614 if (!rth) { 1626 if (!rth) {
@@ -1616,19 +1628,12 @@ static int __mkroute_input(struct sk_buff *skb,
1616 goto cleanup; 1628 goto cleanup;
1617 } 1629 }
1618 1630
1619 rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
1620 rth->rt_flags = 0;
1621 rth->rt_type = res->type;
1622 rth->rt_is_input = 1; 1631 rth->rt_is_input = 1;
1623 rth->rt_iif = 0; 1632 if (res->table)
1624 rth->rt_pmtu = 0; 1633 rth->rt_table_id = res->table->tb_id;
1625 rth->rt_gateway = 0;
1626 rth->rt_uses_gateway = 0;
1627 INIT_LIST_HEAD(&rth->rt_uncached);
1628 RT_CACHE_STAT_INC(in_slow_tot); 1634 RT_CACHE_STAT_INC(in_slow_tot);
1629 1635
1630 rth->dst.input = ip_forward; 1636 rth->dst.input = ip_forward;
1631 rth->dst.output = ip_output;
1632 1637
1633 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); 1638 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
1634 if (lwtunnel_output_redirect(rth->dst.lwtstate)) { 1639 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
@@ -1646,6 +1651,48 @@ out:
1646 return err; 1651 return err;
1647} 1652}
1648 1653
1654#ifdef CONFIG_IP_ROUTE_MULTIPATH
1655
1656/* To make ICMP packets follow the right flow, the multipath hash is
1657 * calculated from the inner IP addresses in reverse order.
1658 */
1659static int ip_multipath_icmp_hash(struct sk_buff *skb)
1660{
1661 const struct iphdr *outer_iph = ip_hdr(skb);
1662 struct icmphdr _icmph;
1663 const struct icmphdr *icmph;
1664 struct iphdr _inner_iph;
1665 const struct iphdr *inner_iph;
1666
1667 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
1668 goto standard_hash;
1669
1670 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1671 &_icmph);
1672 if (!icmph)
1673 goto standard_hash;
1674
1675 if (icmph->type != ICMP_DEST_UNREACH &&
1676 icmph->type != ICMP_REDIRECT &&
1677 icmph->type != ICMP_TIME_EXCEEDED &&
1678 icmph->type != ICMP_PARAMETERPROB) {
1679 goto standard_hash;
1680 }
1681
1682 inner_iph = skb_header_pointer(skb,
1683 outer_iph->ihl * 4 + sizeof(_icmph),
1684 sizeof(_inner_iph), &_inner_iph);
1685 if (!inner_iph)
1686 goto standard_hash;
1687
1688 return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
1689
1690standard_hash:
1691 return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
1692}
1693
1694#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1695
1649static int ip_mkroute_input(struct sk_buff *skb, 1696static int ip_mkroute_input(struct sk_buff *skb,
1650 struct fib_result *res, 1697 struct fib_result *res,
1651 const struct flowi4 *fl4, 1698 const struct flowi4 *fl4,
@@ -1653,8 +1700,15 @@ static int ip_mkroute_input(struct sk_buff *skb,
1653 __be32 daddr, __be32 saddr, u32 tos) 1700 __be32 daddr, __be32 saddr, u32 tos)
1654{ 1701{
1655#ifdef CONFIG_IP_ROUTE_MULTIPATH 1702#ifdef CONFIG_IP_ROUTE_MULTIPATH
1656 if (res->fi && res->fi->fib_nhs > 1) 1703 if (res->fi && res->fi->fib_nhs > 1) {
1657 fib_select_multipath(res); 1704 int h;
1705
1706 if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
1707 h = ip_multipath_icmp_hash(skb);
1708 else
1709 h = fib_multipath_hash(saddr, daddr);
1710 fib_select_multipath(res, h);
1711 }
1658#endif 1712#endif
1659 1713
1660 /* create a routing cache entry */ 1714 /* create a routing cache entry */
@@ -1706,6 +1760,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1706 goto martian_source; 1760 goto martian_source;
1707 1761
1708 res.fi = NULL; 1762 res.fi = NULL;
1763 res.table = NULL;
1709 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) 1764 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
1710 goto brd_input; 1765 goto brd_input;
1711 1766
@@ -1733,7 +1788,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1733 * Now we are ready to route packet. 1788 * Now we are ready to route packet.
1734 */ 1789 */
1735 fl4.flowi4_oif = 0; 1790 fl4.flowi4_oif = 0;
1736 fl4.flowi4_iif = vrf_master_ifindex_rcu(dev) ? : dev->ifindex; 1791 fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
1737 fl4.flowi4_mark = skb->mark; 1792 fl4.flowi4_mark = skb->mark;
1738 fl4.flowi4_tos = tos; 1793 fl4.flowi4_tos = tos;
1739 fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 1794 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -1754,7 +1809,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1754 err = fib_validate_source(skb, saddr, daddr, tos, 1809 err = fib_validate_source(skb, saddr, daddr, tos,
1755 0, dev, in_dev, &itag); 1810 0, dev, in_dev, &itag);
1756 if (err < 0) 1811 if (err < 0)
1757 goto martian_source_keep_err; 1812 goto martian_source;
1758 goto local_input; 1813 goto local_input;
1759 } 1814 }
1760 1815
@@ -1776,7 +1831,7 @@ brd_input:
1776 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 1831 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1777 in_dev, &itag); 1832 in_dev, &itag);
1778 if (err < 0) 1833 if (err < 0)
1779 goto martian_source_keep_err; 1834 goto martian_source;
1780 } 1835 }
1781 flags |= RTCF_BROADCAST; 1836 flags |= RTCF_BROADCAST;
1782 res.type = RTN_BROADCAST; 1837 res.type = RTN_BROADCAST;
@@ -1796,26 +1851,18 @@ local_input:
1796 } 1851 }
1797 } 1852 }
1798 1853
1799 rth = rt_dst_alloc(net->loopback_dev, 1854 rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
1800 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); 1855 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
1801 if (!rth) 1856 if (!rth)
1802 goto e_nobufs; 1857 goto e_nobufs;
1803 1858
1804 rth->dst.input= ip_local_deliver;
1805 rth->dst.output= ip_rt_bug; 1859 rth->dst.output= ip_rt_bug;
1806#ifdef CONFIG_IP_ROUTE_CLASSID 1860#ifdef CONFIG_IP_ROUTE_CLASSID
1807 rth->dst.tclassid = itag; 1861 rth->dst.tclassid = itag;
1808#endif 1862#endif
1809
1810 rth->rt_genid = rt_genid_ipv4(net);
1811 rth->rt_flags = flags|RTCF_LOCAL;
1812 rth->rt_type = res.type;
1813 rth->rt_is_input = 1; 1863 rth->rt_is_input = 1;
1814 rth->rt_iif = 0; 1864 if (res.table)
1815 rth->rt_pmtu = 0; 1865 rth->rt_table_id = res.table->tb_id;
1816 rth->rt_gateway = 0;
1817 rth->rt_uses_gateway = 0;
1818 INIT_LIST_HEAD(&rth->rt_uncached);
1819 1866
1820 RT_CACHE_STAT_INC(in_slow_tot); 1867 RT_CACHE_STAT_INC(in_slow_tot);
1821 if (res.type == RTN_UNREACHABLE) { 1868 if (res.type == RTN_UNREACHABLE) {
@@ -1837,6 +1884,7 @@ no_route:
1837 RT_CACHE_STAT_INC(in_no_route); 1884 RT_CACHE_STAT_INC(in_no_route);
1838 res.type = RTN_UNREACHABLE; 1885 res.type = RTN_UNREACHABLE;
1839 res.fi = NULL; 1886 res.fi = NULL;
1887 res.table = NULL;
1840 goto local_input; 1888 goto local_input;
1841 1889
1842 /* 1890 /*
@@ -1859,8 +1907,6 @@ e_nobufs:
1859 goto out; 1907 goto out;
1860 1908
1861martian_source: 1909martian_source:
1862 err = -EINVAL;
1863martian_source_keep_err:
1864 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 1910 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
1865 goto out; 1911 goto out;
1866} 1912}
@@ -1988,28 +2034,19 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1988 } 2034 }
1989 2035
1990add: 2036add:
1991 rth = rt_dst_alloc(dev_out, 2037 rth = rt_dst_alloc(dev_out, flags, type,
1992 IN_DEV_CONF_GET(in_dev, NOPOLICY), 2038 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1993 IN_DEV_CONF_GET(in_dev, NOXFRM), 2039 IN_DEV_CONF_GET(in_dev, NOXFRM),
1994 do_cache); 2040 do_cache);
1995 if (!rth) 2041 if (!rth)
1996 return ERR_PTR(-ENOBUFS); 2042 return ERR_PTR(-ENOBUFS);
1997 2043
1998 rth->dst.output = ip_output;
1999
2000 rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
2001 rth->rt_flags = flags;
2002 rth->rt_type = type;
2003 rth->rt_is_input = 0;
2004 rth->rt_iif = orig_oif ? : 0; 2044 rth->rt_iif = orig_oif ? : 0;
2005 rth->rt_pmtu = 0; 2045 if (res->table)
2006 rth->rt_gateway = 0; 2046 rth->rt_table_id = res->table->tb_id;
2007 rth->rt_uses_gateway = 0; 2047
2008 INIT_LIST_HEAD(&rth->rt_uncached);
2009 RT_CACHE_STAT_INC(out_slow_tot); 2048 RT_CACHE_STAT_INC(out_slow_tot);
2010 2049
2011 if (flags & RTCF_LOCAL)
2012 rth->dst.input = ip_local_deliver;
2013 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2050 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2014 if (flags & RTCF_LOCAL && 2051 if (flags & RTCF_LOCAL &&
2015 !(dev_out->flags & IFF_LOOPBACK)) { 2052 !(dev_out->flags & IFF_LOOPBACK)) {
@@ -2038,7 +2075,8 @@ add:
2038 * Major route resolver routine. 2075 * Major route resolver routine.
2039 */ 2076 */
2040 2077
2041struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) 2078struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2079 int mp_hash)
2042{ 2080{
2043 struct net_device *dev_out = NULL; 2081 struct net_device *dev_out = NULL;
2044 __u8 tos = RT_FL_TOS(fl4); 2082 __u8 tos = RT_FL_TOS(fl4);
@@ -2137,11 +2175,10 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2137 fl4->saddr = inet_select_addr(dev_out, 0, 2175 fl4->saddr = inet_select_addr(dev_out, 0,
2138 RT_SCOPE_HOST); 2176 RT_SCOPE_HOST);
2139 } 2177 }
2140 if (netif_is_vrf(dev_out) && 2178
2141 !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) { 2179 rth = l3mdev_get_rtable(dev_out, fl4);
2142 rth = vrf_dev_get_rth(dev_out); 2180 if (rth)
2143 goto out; 2181 goto out;
2144 }
2145 } 2182 }
2146 2183
2147 if (!fl4->daddr) { 2184 if (!fl4->daddr) {
@@ -2159,7 +2196,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2159 if (err) { 2196 if (err) {
2160 res.fi = NULL; 2197 res.fi = NULL;
2161 res.table = NULL; 2198 res.table = NULL;
2162 if (fl4->flowi4_oif) { 2199 if (fl4->flowi4_oif &&
2200 !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
2163 /* Apparently, routing tables are wrong. Assume, 2201 /* Apparently, routing tables are wrong. Assume,
2164 that the destination is on link. 2202 that the destination is on link.
2165 2203
@@ -2201,18 +2239,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2201 goto make_route; 2239 goto make_route;
2202 } 2240 }
2203 2241
2204#ifdef CONFIG_IP_ROUTE_MULTIPATH 2242 fib_select_path(net, &res, fl4, mp_hash);
2205 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
2206 fib_select_multipath(&res);
2207 else
2208#endif
2209 if (!res.prefixlen &&
2210 res.table->tb_num_default > 1 &&
2211 res.type == RTN_UNICAST && !fl4->flowi4_oif)
2212 fib_select_default(fl4, &res);
2213
2214 if (!fl4->saddr)
2215 fl4->saddr = FIB_RES_PREFSRC(net, res);
2216 2243
2217 dev_out = FIB_RES_DEV(res); 2244 dev_out = FIB_RES_DEV(res);
2218 fl4->flowi4_oif = dev_out->ifindex; 2245 fl4->flowi4_oif = dev_out->ifindex;
@@ -2225,7 +2252,7 @@ out:
2225 rcu_read_unlock(); 2252 rcu_read_unlock();
2226 return rth; 2253 return rth;
2227} 2254}
2228EXPORT_SYMBOL_GPL(__ip_route_output_key); 2255EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
2229 2256
2230static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) 2257static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2231{ 2258{
@@ -2277,7 +2304,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2277 2304
2278 new->__use = 1; 2305 new->__use = 1;
2279 new->input = dst_discard; 2306 new->input = dst_discard;
2280 new->output = dst_discard_sk; 2307 new->output = dst_discard_out;
2281 2308
2282 new->dev = ort->dst.dev; 2309 new->dev = ort->dst.dev;
2283 if (new->dev) 2310 if (new->dev)
@@ -2303,7 +2330,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2303} 2330}
2304 2331
2305struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, 2332struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2306 struct sock *sk) 2333 const struct sock *sk)
2307{ 2334{
2308 struct rtable *rt = __ip_route_output_key(net, flp4); 2335 struct rtable *rt = __ip_route_output_key(net, flp4);
2309 2336
@@ -2319,7 +2346,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2319} 2346}
2320EXPORT_SYMBOL_GPL(ip_route_output_flow); 2347EXPORT_SYMBOL_GPL(ip_route_output_flow);
2321 2348
2322static int rt_fill_info(struct net *net, __be32 dst, __be32 src, 2349static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
2323 struct flowi4 *fl4, struct sk_buff *skb, u32 portid, 2350 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
2324 u32 seq, int event, int nowait, unsigned int flags) 2351 u32 seq, int event, int nowait, unsigned int flags)
2325{ 2352{
@@ -2339,8 +2366,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2339 r->rtm_dst_len = 32; 2366 r->rtm_dst_len = 32;
2340 r->rtm_src_len = 0; 2367 r->rtm_src_len = 0;
2341 r->rtm_tos = fl4->flowi4_tos; 2368 r->rtm_tos = fl4->flowi4_tos;
2342 r->rtm_table = RT_TABLE_MAIN; 2369 r->rtm_table = table_id;
2343 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) 2370 if (nla_put_u32(skb, RTA_TABLE, table_id))
2344 goto nla_put_failure; 2371 goto nla_put_failure;
2345 r->rtm_type = rt->rt_type; 2372 r->rtm_type = rt->rt_type;
2346 r->rtm_scope = RT_SCOPE_UNIVERSE; 2373 r->rtm_scope = RT_SCOPE_UNIVERSE;
@@ -2445,6 +2472,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2445 int err; 2472 int err;
2446 int mark; 2473 int mark;
2447 struct sk_buff *skb; 2474 struct sk_buff *skb;
2475 u32 table_id = RT_TABLE_MAIN;
2448 2476
2449 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2477 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2450 if (err < 0) 2478 if (err < 0)
@@ -2480,6 +2508,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2480 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; 2508 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2481 fl4.flowi4_mark = mark; 2509 fl4.flowi4_mark = mark;
2482 2510
2511 if (netif_index_is_l3_master(net, fl4.flowi4_oif))
2512 fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
2513
2483 if (iif) { 2514 if (iif) {
2484 struct net_device *dev; 2515 struct net_device *dev;
2485 2516
@@ -2514,7 +2545,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2514 if (rtm->rtm_flags & RTM_F_NOTIFY) 2545 if (rtm->rtm_flags & RTM_F_NOTIFY)
2515 rt->rt_flags |= RTCF_NOTIFY; 2546 rt->rt_flags |= RTCF_NOTIFY;
2516 2547
2517 err = rt_fill_info(net, dst, src, &fl4, skb, 2548 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2549 table_id = rt->rt_table_id;
2550
2551 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
2518 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 2552 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2519 RTM_NEWROUTE, 0, 0); 2553 RTM_NEWROUTE, 0, 0);
2520 if (err < 0) 2554 if (err < 0)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d70b1f603692..4cbe9f0a4281 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -192,15 +192,11 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
192} 192}
193EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); 193EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
194 194
195__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb, 195__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
196 __u16 *mssp)
197{ 196{
198 const struct iphdr *iph = ip_hdr(skb); 197 const struct iphdr *iph = ip_hdr(skb);
199 const struct tcphdr *th = tcp_hdr(skb); 198 const struct tcphdr *th = tcp_hdr(skb);
200 199
201 tcp_synq_overflow(sk);
202 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
203
204 return __cookie_v4_init_sequence(iph, th, mssp); 200 return __cookie_v4_init_sequence(iph, th, mssp);
205} 201}
206 202
@@ -225,10 +221,13 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
225{ 221{
226 struct inet_connection_sock *icsk = inet_csk(sk); 222 struct inet_connection_sock *icsk = inet_csk(sk);
227 struct sock *child; 223 struct sock *child;
224 bool own_req;
228 225
229 child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); 226 child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
227 NULL, &own_req);
230 if (child) { 228 if (child) {
231 atomic_set(&req->rsk_refcnt, 1); 229 atomic_set(&req->rsk_refcnt, 1);
230 sock_rps_save_rxhash(child, skb);
232 inet_csk_reqsk_queue_add(sk, req, child); 231 inet_csk_reqsk_queue_add(sk, req, child);
233 } else { 232 } else {
234 reqsk_free(req); 233 reqsk_free(req);
@@ -288,6 +287,10 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
288} 287}
289EXPORT_SYMBOL(cookie_ecn_ok); 288EXPORT_SYMBOL(cookie_ecn_ok);
290 289
290/* On input, sk is a listener.
291 * Output is listener if incoming packet would not create a child
292 * NULL if memory could not be allocated.
293 */
291struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) 294struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
292{ 295{
293 struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; 296 struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
@@ -326,7 +329,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
326 goto out; 329 goto out;
327 330
328 ret = NULL; 331 ret = NULL;
329 req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */ 332 req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
330 if (!req) 333 if (!req)
331 goto out; 334 goto out;
332 335
@@ -345,7 +348,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
345 ireq->wscale_ok = tcp_opt.wscale_ok; 348 ireq->wscale_ok = tcp_opt.wscale_ok;
346 ireq->tstamp_ok = tcp_opt.saw_tstamp; 349 ireq->tstamp_ok = tcp_opt.saw_tstamp;
347 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; 350 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
348 treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; 351 treq->snt_synack.v64 = 0;
349 treq->tfo_listener = false; 352 treq->tfo_listener = false;
350 353
351 ireq->ir_iif = sk->sk_bound_dev_if; 354 ireq->ir_iif = sk->sk_bound_dev_if;
@@ -381,10 +384,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
381 } 384 }
382 385
383 /* Try to redo what tcp_v4_send_synack did. */ 386 /* Try to redo what tcp_v4_send_synack did. */
384 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); 387 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
385 388
386 tcp_select_initial_window(tcp_full_space(sk), req->mss, 389 tcp_select_initial_window(tcp_full_space(sk), req->mss,
387 &req->rcv_wnd, &req->window_clamp, 390 &req->rsk_rcv_wnd, &req->rsk_window_clamp,
388 ireq->wscale_ok, &rcv_wscale, 391 ireq->wscale_ok, &rcv_wscale,
389 dst_metric(&rt->dst, RTAX_INITRWND)); 392 dst_metric(&rt->dst, RTAX_INITRWND));
390 393
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 894da3a70aff..a0bd7a55193e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -48,14 +48,14 @@ static void set_local_port_range(struct net *net, int range[2])
48{ 48{
49 bool same_parity = !((range[0] ^ range[1]) & 1); 49 bool same_parity = !((range[0] ^ range[1]) & 1);
50 50
51 write_seqlock(&net->ipv4.ip_local_ports.lock); 51 write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
52 if (same_parity && !net->ipv4.ip_local_ports.warned) { 52 if (same_parity && !net->ipv4.ip_local_ports.warned) {
53 net->ipv4.ip_local_ports.warned = true; 53 net->ipv4.ip_local_ports.warned = true;
54 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); 54 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
55 } 55 }
56 net->ipv4.ip_local_ports.range[0] = range[0]; 56 net->ipv4.ip_local_ports.range[0] = range[0];
57 net->ipv4.ip_local_ports.range[1] = range[1]; 57 net->ipv4.ip_local_ports.range[1] = range[1];
58 write_sequnlock(&net->ipv4.ip_local_ports.lock); 58 write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
59} 59}
60 60
61/* Validate changes from /proc interface. */ 61/* Validate changes from /proc interface. */
@@ -496,6 +496,13 @@ static struct ctl_table ipv4_table[] = {
496 .proc_handler = proc_dointvec 496 .proc_handler = proc_dointvec
497 }, 497 },
498 { 498 {
499 .procname = "tcp_recovery",
500 .data = &sysctl_tcp_recovery,
501 .maxlen = sizeof(int),
502 .mode = 0644,
503 .proc_handler = proc_dointvec,
504 },
505 {
499 .procname = "tcp_reordering", 506 .procname = "tcp_reordering",
500 .data = &sysctl_tcp_reordering, 507 .data = &sysctl_tcp_reordering,
501 .maxlen = sizeof(int), 508 .maxlen = sizeof(int),
@@ -577,6 +584,13 @@ static struct ctl_table ipv4_table[] = {
577 .proc_handler = proc_dointvec 584 .proc_handler = proc_dointvec
578 }, 585 },
579 { 586 {
587 .procname = "tcp_min_rtt_wlen",
588 .data = &sysctl_tcp_min_rtt_wlen,
589 .maxlen = sizeof(int),
590 .mode = 0644,
591 .proc_handler = proc_dointvec
592 },
593 {
580 .procname = "tcp_low_latency", 594 .procname = "tcp_low_latency",
581 .data = &sysctl_tcp_low_latency, 595 .data = &sysctl_tcp_low_latency,
582 .maxlen = sizeof(int), 596 .maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8b8fa184f75..c1728771cf89 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -388,6 +388,7 @@ void tcp_init_sock(struct sock *sk)
388 388
389 icsk->icsk_rto = TCP_TIMEOUT_INIT; 389 icsk->icsk_rto = TCP_TIMEOUT_INIT;
390 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); 390 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
391 tp->rtt_min[0].rtt = ~0U;
391 392
392 /* So many TCP implementations out there (incorrectly) count the 393 /* So many TCP implementations out there (incorrectly) count the
393 * initial SYN frame in their delayed-ACK and congestion control 394 * initial SYN frame in their delayed-ACK and congestion control
@@ -450,11 +451,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
450 unsigned int mask; 451 unsigned int mask;
451 struct sock *sk = sock->sk; 452 struct sock *sk = sock->sk;
452 const struct tcp_sock *tp = tcp_sk(sk); 453 const struct tcp_sock *tp = tcp_sk(sk);
454 int state;
453 455
454 sock_rps_record_flow(sk); 456 sock_rps_record_flow(sk);
455 457
456 sock_poll_wait(file, sk_sleep(sk), wait); 458 sock_poll_wait(file, sk_sleep(sk), wait);
457 if (sk->sk_state == TCP_LISTEN) 459
460 state = sk_state_load(sk);
461 if (state == TCP_LISTEN)
458 return inet_csk_listen_poll(sk); 462 return inet_csk_listen_poll(sk);
459 463
460 /* Socket is not locked. We are protected from async events 464 /* Socket is not locked. We are protected from async events
@@ -491,14 +495,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
491 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent 495 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
492 * blocking on fresh not-connected or disconnected socket. --ANK 496 * blocking on fresh not-connected or disconnected socket. --ANK
493 */ 497 */
494 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) 498 if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
495 mask |= POLLHUP; 499 mask |= POLLHUP;
496 if (sk->sk_shutdown & RCV_SHUTDOWN) 500 if (sk->sk_shutdown & RCV_SHUTDOWN)
497 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 501 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
498 502
499 /* Connected or passive Fast Open socket? */ 503 /* Connected or passive Fast Open socket? */
500 if (sk->sk_state != TCP_SYN_SENT && 504 if (state != TCP_SYN_SENT &&
501 (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) { 505 (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
502 int target = sock_rcvlowat(sk, 0, INT_MAX); 506 int target = sock_rcvlowat(sk, 0, INT_MAX);
503 507
504 if (tp->urg_seq == tp->copied_seq && 508 if (tp->urg_seq == tp->copied_seq &&
@@ -506,9 +510,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
506 tp->urg_data) 510 tp->urg_data)
507 target++; 511 target++;
508 512
509 /* Potential race condition. If read of tp below will
510 * escape above sk->sk_state, we can be illegally awaken
511 * in SYN_* states. */
512 if (tp->rcv_nxt - tp->copied_seq >= target) 513 if (tp->rcv_nxt - tp->copied_seq >= target)
513 mask |= POLLIN | POLLRDNORM; 514 mask |= POLLIN | POLLRDNORM;
514 515
@@ -900,7 +901,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
900 */ 901 */
901 if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && 902 if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
902 !tcp_passive_fastopen(sk)) { 903 !tcp_passive_fastopen(sk)) {
903 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) 904 err = sk_stream_wait_connect(sk, &timeo);
905 if (err != 0)
904 goto out_err; 906 goto out_err;
905 } 907 }
906 908
@@ -967,7 +969,8 @@ new_segment:
967 969
968 copied += copy; 970 copied += copy;
969 offset += copy; 971 offset += copy;
970 if (!(size -= copy)) { 972 size -= copy;
973 if (!size) {
971 tcp_tx_timestamp(sk, skb); 974 tcp_tx_timestamp(sk, skb);
972 goto out; 975 goto out;
973 } 976 }
@@ -988,7 +991,8 @@ wait_for_memory:
988 tcp_push(sk, flags & ~MSG_MORE, mss_now, 991 tcp_push(sk, flags & ~MSG_MORE, mss_now,
989 TCP_NAGLE_PUSH, size_goal); 992 TCP_NAGLE_PUSH, size_goal);
990 993
991 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 994 err = sk_stream_wait_memory(sk, &timeo);
995 if (err != 0)
992 goto do_error; 996 goto do_error;
993 997
994 mss_now = tcp_send_mss(sk, &size_goal, flags); 998 mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1111,7 +1115,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1111 */ 1115 */
1112 if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && 1116 if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
1113 !tcp_passive_fastopen(sk)) { 1117 !tcp_passive_fastopen(sk)) {
1114 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) 1118 err = sk_stream_wait_connect(sk, &timeo);
1119 if (err != 0)
1115 goto do_error; 1120 goto do_error;
1116 } 1121 }
1117 1122
@@ -1267,7 +1272,8 @@ wait_for_memory:
1267 tcp_push(sk, flags & ~MSG_MORE, mss_now, 1272 tcp_push(sk, flags & ~MSG_MORE, mss_now,
1268 TCP_NAGLE_PUSH, size_goal); 1273 TCP_NAGLE_PUSH, size_goal);
1269 1274
1270 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1275 err = sk_stream_wait_memory(sk, &timeo);
1276 if (err != 0)
1271 goto do_error; 1277 goto do_error;
1272 1278
1273 mss_now = tcp_send_mss(sk, &size_goal, flags); 1279 mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1767,7 +1773,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1767 1773
1768 /* __ Restore normal policy in scheduler __ */ 1774 /* __ Restore normal policy in scheduler __ */
1769 1775
1770 if ((chunk = len - tp->ucopy.len) != 0) { 1776 chunk = len - tp->ucopy.len;
1777 if (chunk != 0) {
1771 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); 1778 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1772 len -= chunk; 1779 len -= chunk;
1773 copied += chunk; 1780 copied += chunk;
@@ -1778,7 +1785,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1778do_prequeue: 1785do_prequeue:
1779 tcp_prequeue_process(sk); 1786 tcp_prequeue_process(sk);
1780 1787
1781 if ((chunk = len - tp->ucopy.len) != 0) { 1788 chunk = len - tp->ucopy.len;
1789 if (chunk != 0) {
1782 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); 1790 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1783 len -= chunk; 1791 len -= chunk;
1784 copied += chunk; 1792 copied += chunk;
@@ -1926,7 +1934,7 @@ void tcp_set_state(struct sock *sk, int state)
1926 /* Change state AFTER socket is unhashed to avoid closed 1934 /* Change state AFTER socket is unhashed to avoid closed
1927 * socket sitting in hash tables. 1935 * socket sitting in hash tables.
1928 */ 1936 */
1929 sk->sk_state = state; 1937 sk_state_store(sk, state);
1930 1938
1931#ifdef STATE_TRACE 1939#ifdef STATE_TRACE
1932 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); 1940 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2230,7 +2238,8 @@ int tcp_disconnect(struct sock *sk, int flags)
2230 sk->sk_shutdown = 0; 2238 sk->sk_shutdown = 0;
2231 sock_reset_flag(sk, SOCK_DONE); 2239 sock_reset_flag(sk, SOCK_DONE);
2232 tp->srtt_us = 0; 2240 tp->srtt_us = 0;
2233 if ((tp->write_seq += tp->max_window + 2) == 0) 2241 tp->write_seq += tp->max_window + 2;
2242 if (tp->write_seq == 0)
2234 tp->write_seq = 1; 2243 tp->write_seq = 1;
2235 icsk->icsk_backoff = 0; 2244 icsk->icsk_backoff = 0;
2236 tp->snd_cwnd = 2; 2245 tp->snd_cwnd = 2;
@@ -2253,13 +2262,6 @@ int tcp_disconnect(struct sock *sk, int flags)
2253} 2262}
2254EXPORT_SYMBOL(tcp_disconnect); 2263EXPORT_SYMBOL(tcp_disconnect);
2255 2264
2256void tcp_sock_destruct(struct sock *sk)
2257{
2258 inet_sock_destruct(sk);
2259
2260 kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
2261}
2262
2263static inline bool tcp_can_repair_sock(const struct sock *sk) 2265static inline bool tcp_can_repair_sock(const struct sock *sk)
2264{ 2266{
2265 return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && 2267 return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
@@ -2581,7 +2583,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2581 TCPF_LISTEN))) { 2583 TCPF_LISTEN))) {
2582 tcp_fastopen_init_key_once(true); 2584 tcp_fastopen_init_key_once(true);
2583 2585
2584 err = fastopen_init_queue(sk, val); 2586 fastopen_queue_tune(sk, val);
2585 } else { 2587 } else {
2586 err = -EINVAL; 2588 err = -EINVAL;
2587 } 2589 }
@@ -2642,7 +2644,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2642 if (sk->sk_type != SOCK_STREAM) 2644 if (sk->sk_type != SOCK_STREAM)
2643 return; 2645 return;
2644 2646
2645 info->tcpi_state = sk->sk_state; 2647 info->tcpi_state = sk_state_load(sk);
2648
2646 info->tcpi_ca_state = icsk->icsk_ca_state; 2649 info->tcpi_ca_state = icsk->icsk_ca_state;
2647 info->tcpi_retransmits = icsk->icsk_retransmits; 2650 info->tcpi_retransmits = icsk->icsk_retransmits;
2648 info->tcpi_probes = icsk->icsk_probes_out; 2651 info->tcpi_probes = icsk->icsk_probes_out;
@@ -2670,7 +2673,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2670 info->tcpi_snd_mss = tp->mss_cache; 2673 info->tcpi_snd_mss = tp->mss_cache;
2671 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; 2674 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2672 2675
2673 if (sk->sk_state == TCP_LISTEN) { 2676 if (info->tcpi_state == TCP_LISTEN) {
2674 info->tcpi_unacked = sk->sk_ack_backlog; 2677 info->tcpi_unacked = sk->sk_ack_backlog;
2675 info->tcpi_sacked = sk->sk_max_ack_backlog; 2678 info->tcpi_sacked = sk->sk_max_ack_backlog;
2676 } else { 2679 } else {
@@ -2849,10 +2852,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2849 break; 2852 break;
2850 2853
2851 case TCP_FASTOPEN: 2854 case TCP_FASTOPEN:
2852 if (icsk->icsk_accept_queue.fastopenq) 2855 val = icsk->icsk_accept_queue.fastopenq.max_qlen;
2853 val = icsk->icsk_accept_queue.fastopenq->max_qlen;
2854 else
2855 val = 0;
2856 break; 2856 break;
2857 2857
2858 case TCP_TIMESTAMP: 2858 case TCP_TIMESTAMP:
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 93c4dc3ab23f..882caa4e72bc 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -173,6 +173,10 @@ out:
173 */ 173 */
174 if (ca->get_info) 174 if (ca->get_info)
175 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); 175 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
176 if (ca->flags & TCP_CONG_NEEDS_ECN)
177 INET_ECN_xmit(sk);
178 else
179 INET_ECN_dontxmit(sk);
176} 180}
177 181
178void tcp_init_congestion_control(struct sock *sk) 182void tcp_init_congestion_control(struct sock *sk)
@@ -181,6 +185,10 @@ void tcp_init_congestion_control(struct sock *sk)
181 185
182 if (icsk->icsk_ca_ops->init) 186 if (icsk->icsk_ca_ops->init)
183 icsk->icsk_ca_ops->init(sk); 187 icsk->icsk_ca_ops->init(sk);
188 if (tcp_ca_needs_ecn(sk))
189 INET_ECN_xmit(sk);
190 else
191 INET_ECN_dontxmit(sk);
184} 192}
185 193
186static void tcp_reinit_congestion_control(struct sock *sk, 194static void tcp_reinit_congestion_control(struct sock *sk,
@@ -192,8 +200,8 @@ static void tcp_reinit_congestion_control(struct sock *sk,
192 icsk->icsk_ca_ops = ca; 200 icsk->icsk_ca_ops = ca;
193 icsk->icsk_ca_setsockopt = 1; 201 icsk->icsk_ca_setsockopt = 1;
194 202
195 if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) 203 if (sk->sk_state != TCP_CLOSE)
196 icsk->icsk_ca_ops->init(sk); 204 tcp_init_congestion_control(sk);
197} 205}
198 206
199/* Manage refcounts on socket close. */ 207/* Manage refcounts on socket close. */
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 7092a61c4dc8..7e538f71f5fb 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -209,7 +209,7 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
209 209
210 /* alpha = (1 - g) * alpha + g * F */ 210 /* alpha = (1 - g) * alpha + g * F */
211 211
212 alpha -= alpha >> dctcp_shift_g; 212 alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
213 if (bytes_ecn) { 213 if (bytes_ecn) {
214 /* If dctcp_shift_g == 1, a 32bit value would overflow 214 /* If dctcp_shift_g == 1, a 32bit value would overflow
215 * after 8 Mbytes. 215 * after 8 Mbytes.
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f34946177..b31604086edd 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,7 +21,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
21{ 21{
22 struct tcp_info *info = _info; 22 struct tcp_info *info = _info;
23 23
24 if (sk->sk_state == TCP_LISTEN) { 24 if (sk_state_load(sk) == TCP_LISTEN) {
25 r->idiag_rqueue = sk->sk_ack_backlog; 25 r->idiag_rqueue = sk->sk_ack_backlog;
26 r->idiag_wqueue = sk->sk_max_ack_backlog; 26 r->idiag_wqueue = sk->sk_max_ack_backlog;
27 } else if (sk->sk_type == SOCK_STREAM) { 27 } else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f9c0fb84e435..55be6ac70cff 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -124,27 +124,29 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
124 return false; 124 return false;
125} 125}
126 126
127static bool tcp_fastopen_create_child(struct sock *sk, 127static struct sock *tcp_fastopen_create_child(struct sock *sk,
128 struct sk_buff *skb, 128 struct sk_buff *skb,
129 struct dst_entry *dst, 129 struct dst_entry *dst,
130 struct request_sock *req) 130 struct request_sock *req)
131{ 131{
132 struct tcp_sock *tp; 132 struct tcp_sock *tp;
133 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 133 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
134 struct sock *child; 134 struct sock *child;
135 u32 end_seq; 135 u32 end_seq;
136 bool own_req;
136 137
137 req->num_retrans = 0; 138 req->num_retrans = 0;
138 req->num_timeout = 0; 139 req->num_timeout = 0;
139 req->sk = NULL; 140 req->sk = NULL;
140 141
141 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 142 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
143 NULL, &own_req);
142 if (!child) 144 if (!child)
143 return false; 145 return NULL;
144 146
145 spin_lock(&queue->fastopenq->lock); 147 spin_lock(&queue->fastopenq.lock);
146 queue->fastopenq->qlen++; 148 queue->fastopenq.qlen++;
147 spin_unlock(&queue->fastopenq->lock); 149 spin_unlock(&queue->fastopenq.lock);
148 150
149 /* Initialize the child socket. Have to fix some values to take 151 /* Initialize the child socket. Have to fix some values to take
150 * into account the child is a Fast Open socket and is created 152 * into account the child is a Fast Open socket and is created
@@ -161,15 +163,13 @@ static bool tcp_fastopen_create_child(struct sock *sk,
161 tp->snd_wnd = ntohs(tcp_hdr(skb)->window); 163 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
162 164
163 /* Activate the retrans timer so that SYNACK can be retransmitted. 165 /* Activate the retrans timer so that SYNACK can be retransmitted.
164 * The request socket is not added to the SYN table of the parent 166 * The request socket is not added to the ehash
165 * because it's been added to the accept queue directly. 167 * because it's been added to the accept queue directly.
166 */ 168 */
167 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, 169 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
168 TCP_TIMEOUT_INIT, TCP_RTO_MAX); 170 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
169 171
170 atomic_set(&req->rsk_refcnt, 1); 172 atomic_set(&req->rsk_refcnt, 2);
171 /* Add the child socket directly into the accept queue */
172 inet_csk_reqsk_queue_add(sk, req, child);
173 173
174 /* Now finish processing the fastopen child socket. */ 174 /* Now finish processing the fastopen child socket. */
175 inet_csk(child)->icsk_af_ops->rebuild_header(child); 175 inet_csk(child)->icsk_af_ops->rebuild_header(child);
@@ -178,12 +178,10 @@ static bool tcp_fastopen_create_child(struct sock *sk,
178 tcp_init_metrics(child); 178 tcp_init_metrics(child);
179 tcp_init_buffer_space(child); 179 tcp_init_buffer_space(child);
180 180
181 /* Queue the data carried in the SYN packet. We need to first 181 /* Queue the data carried in the SYN packet.
182 * bump skb's refcnt because the caller will attempt to free it. 182 * We used to play tricky games with skb_get().
183 * Note that IPv6 might also have used skb_get() trick 183 * With lockless listener, it is a dead end.
184 * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts) 184 * Do not think about it.
185 * So we need to eventually get a clone of the packet,
186 * before inserting it in sk_receive_queue.
187 * 185 *
188 * XXX (TFO) - we honor a zero-payload TFO request for now, 186 * XXX (TFO) - we honor a zero-payload TFO request for now,
189 * (any reason not to?) but no need to queue the skb since 187 * (any reason not to?) but no need to queue the skb since
@@ -191,12 +189,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
191 */ 189 */
192 end_seq = TCP_SKB_CB(skb)->end_seq; 190 end_seq = TCP_SKB_CB(skb)->end_seq;
193 if (end_seq != TCP_SKB_CB(skb)->seq + 1) { 191 if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
194 struct sk_buff *skb2; 192 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
195
196 if (unlikely(skb_shared(skb)))
197 skb2 = skb_clone(skb, GFP_ATOMIC);
198 else
199 skb2 = skb_get(skb);
200 193
201 if (likely(skb2)) { 194 if (likely(skb2)) {
202 skb_dst_drop(skb2); 195 skb_dst_drop(skb2);
@@ -214,11 +207,10 @@ static bool tcp_fastopen_create_child(struct sock *sk,
214 } 207 }
215 } 208 }
216 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq; 209 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
217 sk->sk_data_ready(sk); 210 /* tcp_conn_request() is sending the SYNACK,
218 bh_unlock_sock(child); 211 * and queues the child into listener accept queue.
219 sock_put(child); 212 */
220 WARN_ON(!req->sk); 213 return child;
221 return true;
222} 214}
223 215
224static bool tcp_fastopen_queue_check(struct sock *sk) 216static bool tcp_fastopen_queue_check(struct sock *sk)
@@ -235,8 +227,8 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
235 * between qlen overflow causing Fast Open to be disabled 227 * between qlen overflow causing Fast Open to be disabled
236 * temporarily vs a server not supporting Fast Open at all. 228 * temporarily vs a server not supporting Fast Open at all.
237 */ 229 */
238 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; 230 fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
239 if (!fastopenq || fastopenq->max_qlen == 0) 231 if (fastopenq->max_qlen == 0)
240 return false; 232 return false;
241 233
242 if (fastopenq->qlen >= fastopenq->max_qlen) { 234 if (fastopenq->qlen >= fastopenq->max_qlen) {
@@ -261,13 +253,14 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
261 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open 253 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
262 * cookie request (foc->len == 0). 254 * cookie request (foc->len == 0).
263 */ 255 */
264bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, 256struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
265 struct request_sock *req, 257 struct request_sock *req,
266 struct tcp_fastopen_cookie *foc, 258 struct tcp_fastopen_cookie *foc,
267 struct dst_entry *dst) 259 struct dst_entry *dst)
268{ 260{
269 struct tcp_fastopen_cookie valid_foc = { .len = -1 }; 261 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
270 bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; 262 bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
263 struct sock *child;
271 264
272 if (foc->len == 0) /* Client requests a cookie */ 265 if (foc->len == 0) /* Client requests a cookie */
273 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); 266 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
@@ -276,7 +269,7 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
276 (syn_data || foc->len >= 0) && 269 (syn_data || foc->len >= 0) &&
277 tcp_fastopen_queue_check(sk))) { 270 tcp_fastopen_queue_check(sk))) {
278 foc->len = -1; 271 foc->len = -1;
279 return false; 272 return NULL;
280 } 273 }
281 274
282 if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) 275 if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
@@ -296,11 +289,12 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
296 * data in SYN_RECV state. 289 * data in SYN_RECV state.
297 */ 290 */
298fastopen: 291fastopen:
299 if (tcp_fastopen_create_child(sk, skb, dst, req)) { 292 child = tcp_fastopen_create_child(sk, skb, dst, req);
293 if (child) {
300 foc->len = -1; 294 foc->len = -1;
301 NET_INC_STATS_BH(sock_net(sk), 295 NET_INC_STATS_BH(sock_net(sk),
302 LINUX_MIB_TCPFASTOPENPASSIVE); 296 LINUX_MIB_TCPFASTOPENPASSIVE);
303 return true; 297 return child;
304 } 298 }
305 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 299 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
306 } else if (foc->len > 0) /* Client presents an invalid cookie */ 300 } else if (foc->len > 0) /* Client presents an invalid cookie */
@@ -308,6 +302,5 @@ fastopen:
308 302
309 valid_foc.exp = foc->exp; 303 valid_foc.exp = foc->exp;
310 *foc = valid_foc; 304 *foc = valid_foc;
311 return false; 305 return NULL;
312} 306}
313EXPORT_SYMBOL(tcp_try_fastopen);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8f515bb19c4..fdd88c3803a6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -95,6 +95,7 @@ int sysctl_tcp_stdurg __read_mostly;
95int sysctl_tcp_rfc1337 __read_mostly; 95int sysctl_tcp_rfc1337 __read_mostly;
96int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 96int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
97int sysctl_tcp_frto __read_mostly = 2; 97int sysctl_tcp_frto __read_mostly = 2;
98int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
98 99
99int sysctl_tcp_thin_dupack __read_mostly; 100int sysctl_tcp_thin_dupack __read_mostly;
100 101
@@ -880,6 +881,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
880 881
881 if (metric > 0) 882 if (metric > 0)
882 tcp_disable_early_retrans(tp); 883 tcp_disable_early_retrans(tp);
884 tp->rack.reord = 1;
883} 885}
884 886
885/* This must be called before lost_out is incremented */ 887/* This must be called before lost_out is incremented */
@@ -905,8 +907,7 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
905 } 907 }
906} 908}
907 909
908static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, 910void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
909 struct sk_buff *skb)
910{ 911{
911 tcp_verify_retransmit_hint(tp, skb); 912 tcp_verify_retransmit_hint(tp, skb);
912 913
@@ -1047,70 +1048,6 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1047 return !before(start_seq, end_seq - tp->max_window); 1048 return !before(start_seq, end_seq - tp->max_window);
1048} 1049}
1049 1050
1050/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
1051 * Event "B". Later note: FACK people cheated me again 8), we have to account
1052 * for reordering! Ugly, but should help.
1053 *
1054 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
1055 * less than what is now known to be received by the other end (derived from
1056 * highest SACK block). Also calculate the lowest snd_nxt among the remaining
1057 * retransmitted skbs to avoid some costly processing per ACKs.
1058 */
1059static void tcp_mark_lost_retrans(struct sock *sk, int *flag)
1060{
1061 const struct inet_connection_sock *icsk = inet_csk(sk);
1062 struct tcp_sock *tp = tcp_sk(sk);
1063 struct sk_buff *skb;
1064 int cnt = 0;
1065 u32 new_low_seq = tp->snd_nxt;
1066 u32 received_upto = tcp_highest_sack_seq(tp);
1067
1068 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1069 !after(received_upto, tp->lost_retrans_low) ||
1070 icsk->icsk_ca_state != TCP_CA_Recovery)
1071 return;
1072
1073 tcp_for_write_queue(skb, sk) {
1074 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
1075
1076 if (skb == tcp_send_head(sk))
1077 break;
1078 if (cnt == tp->retrans_out)
1079 break;
1080 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1081 continue;
1082
1083 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1084 continue;
1085
1086 /* TODO: We would like to get rid of tcp_is_fack(tp) only
1087 * constraint here (see above) but figuring out that at
1088 * least tp->reordering SACK blocks reside between ack_seq
1089 * and received_upto is not easy task to do cheaply with
1090 * the available datastructures.
1091 *
1092 * Whether FACK should check here for tp->reordering segs
1093 * in-between one could argue for either way (it would be
1094 * rather simple to implement as we could count fack_count
1095 * during the walk and do tp->fackets_out - fack_count).
1096 */
1097 if (after(received_upto, ack_seq)) {
1098 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1099 tp->retrans_out -= tcp_skb_pcount(skb);
1100 *flag |= FLAG_LOST_RETRANS;
1101 tcp_skb_mark_lost_uncond_verify(tp, skb);
1102 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1103 } else {
1104 if (before(ack_seq, new_low_seq))
1105 new_low_seq = ack_seq;
1106 cnt += tcp_skb_pcount(skb);
1107 }
1108 }
1109
1110 if (tp->retrans_out)
1111 tp->lost_retrans_low = new_low_seq;
1112}
1113
1114static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, 1051static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1115 struct tcp_sack_block_wire *sp, int num_sacks, 1052 struct tcp_sack_block_wire *sp, int num_sacks,
1116 u32 prior_snd_una) 1053 u32 prior_snd_una)
@@ -1236,6 +1173,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
1236 return sacked; 1173 return sacked;
1237 1174
1238 if (!(sacked & TCPCB_SACKED_ACKED)) { 1175 if (!(sacked & TCPCB_SACKED_ACKED)) {
1176 tcp_rack_advance(tp, xmit_time, sacked);
1177
1239 if (sacked & TCPCB_SACKED_RETRANS) { 1178 if (sacked & TCPCB_SACKED_RETRANS) {
1240 /* If the segment is not tagged as lost, 1179 /* If the segment is not tagged as lost,
1241 * we do not clear RETRANS, believing 1180 * we do not clear RETRANS, believing
@@ -1837,7 +1776,6 @@ advance_sp:
1837 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1776 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1838 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); 1777 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1839 1778
1840 tcp_mark_lost_retrans(sk, &state->flag);
1841 tcp_verify_left_out(tp); 1779 tcp_verify_left_out(tp);
1842out: 1780out:
1843 1781
@@ -2314,14 +2252,29 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2314 tp->snd_cwnd_stamp = tcp_time_stamp; 2252 tp->snd_cwnd_stamp = tcp_time_stamp;
2315} 2253}
2316 2254
2255static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
2256{
2257 return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2258 before(tp->rx_opt.rcv_tsecr, when);
2259}
2260
2261/* skb is spurious retransmitted if the returned timestamp echo
2262 * reply is prior to the skb transmission time
2263 */
2264static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
2265 const struct sk_buff *skb)
2266{
2267 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
2268 tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
2269}
2270
2317/* Nothing was retransmitted or returned timestamp is less 2271/* Nothing was retransmitted or returned timestamp is less
2318 * than timestamp of the first retransmission. 2272 * than timestamp of the first retransmission.
2319 */ 2273 */
2320static inline bool tcp_packet_delayed(const struct tcp_sock *tp) 2274static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2321{ 2275{
2322 return !tp->retrans_stamp || 2276 return !tp->retrans_stamp ||
2323 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 2277 tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
2324 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
2325} 2278}
2326 2279
2327/* Undo procedures. */ 2280/* Undo procedures. */
@@ -2853,6 +2806,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2853 } 2806 }
2854 } 2807 }
2855 2808
2809 /* Use RACK to detect loss */
2810 if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
2811 tcp_rack_mark_lost(sk))
2812 flag |= FLAG_LOST_RETRANS;
2813
2856 /* E. Process state. */ 2814 /* E. Process state. */
2857 switch (icsk->icsk_ca_state) { 2815 switch (icsk->icsk_ca_state) {
2858 case TCP_CA_Recovery: 2816 case TCP_CA_Recovery:
@@ -2915,8 +2873,69 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2915 tcp_xmit_retransmit_queue(sk); 2873 tcp_xmit_retransmit_queue(sk);
2916} 2874}
2917 2875
2876/* Kathleen Nichols' algorithm for tracking the minimum value of
2877 * a data stream over some fixed time interval. (E.g., the minimum
2878 * RTT over the past five minutes.) It uses constant space and constant
2879 * time per update yet almost always delivers the same minimum as an
2880 * implementation that has to keep all the data in the window.
2881 *
2882 * The algorithm keeps track of the best, 2nd best & 3rd best min
2883 * values, maintaining an invariant that the measurement time of the
2884 * n'th best >= n-1'th best. It also makes sure that the three values
2885 * are widely separated in the time window since that bounds the worse
2886 * case error when that data is monotonically increasing over the window.
2887 *
2888 * Upon getting a new min, we can forget everything earlier because it
2889 * has no value - the new min is <= everything else in the window by
2890 * definition and it's the most recent. So we restart fresh on every new min
2891 * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
2892 * best.
2893 */
2894static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
2895{
2896 const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
2897 struct rtt_meas *m = tcp_sk(sk)->rtt_min;
2898 struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
2899 u32 elapsed;
2900
2901 /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
2902 if (unlikely(rttm.rtt <= m[0].rtt))
2903 m[0] = m[1] = m[2] = rttm;
2904 else if (rttm.rtt <= m[1].rtt)
2905 m[1] = m[2] = rttm;
2906 else if (rttm.rtt <= m[2].rtt)
2907 m[2] = rttm;
2908
2909 elapsed = now - m[0].ts;
2910 if (unlikely(elapsed > wlen)) {
2911 /* Passed entire window without a new min so make 2nd choice
2912 * the new min & 3rd choice the new 2nd. So forth and so on.
2913 */
2914 m[0] = m[1];
2915 m[1] = m[2];
2916 m[2] = rttm;
2917 if (now - m[0].ts > wlen) {
2918 m[0] = m[1];
2919 m[1] = rttm;
2920 if (now - m[0].ts > wlen)
2921 m[0] = rttm;
2922 }
2923 } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
2924 /* Passed a quarter of the window without a new min so
2925 * take 2nd choice from the 2nd quarter of the window.
2926 */
2927 m[2] = m[1] = rttm;
2928 } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
2929 /* Passed half the window without a new min so take the 3rd
2930 * choice from the last half of the window.
2931 */
2932 m[2] = rttm;
2933 }
2934}
2935
2918static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, 2936static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2919 long seq_rtt_us, long sack_rtt_us) 2937 long seq_rtt_us, long sack_rtt_us,
2938 long ca_rtt_us)
2920{ 2939{
2921 const struct tcp_sock *tp = tcp_sk(sk); 2940 const struct tcp_sock *tp = tcp_sk(sk);
2922 2941
@@ -2925,9 +2944,6 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2925 * Karn's algorithm forbids taking RTT if some retransmitted data 2944 * Karn's algorithm forbids taking RTT if some retransmitted data
2926 * is acked (RFC6298). 2945 * is acked (RFC6298).
2927 */ 2946 */
2928 if (flag & FLAG_RETRANS_DATA_ACKED)
2929 seq_rtt_us = -1L;
2930
2931 if (seq_rtt_us < 0) 2947 if (seq_rtt_us < 0)
2932 seq_rtt_us = sack_rtt_us; 2948 seq_rtt_us = sack_rtt_us;
2933 2949
@@ -2939,11 +2955,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2939 */ 2955 */
2940 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 2956 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2941 flag & FLAG_ACKED) 2957 flag & FLAG_ACKED)
2942 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr); 2958 seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
2943 2959 tp->rx_opt.rcv_tsecr);
2944 if (seq_rtt_us < 0) 2960 if (seq_rtt_us < 0)
2945 return false; 2961 return false;
2946 2962
2963 /* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is
2964 * always taken together with ACK, SACK, or TS-opts. Any negative
2965 * values will be skipped with the seq_rtt_us < 0 check above.
2966 */
2967 tcp_update_rtt_min(sk, ca_rtt_us);
2947 tcp_rtt_estimator(sk, seq_rtt_us); 2968 tcp_rtt_estimator(sk, seq_rtt_us);
2948 tcp_set_rto(sk); 2969 tcp_set_rto(sk);
2949 2970
@@ -2953,21 +2974,21 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2953} 2974}
2954 2975
2955/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ 2976/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
2956static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) 2977void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
2957{ 2978{
2958 struct tcp_sock *tp = tcp_sk(sk); 2979 long rtt_us = -1L;
2959 long seq_rtt_us = -1L;
2960 2980
2961 if (synack_stamp && !tp->total_retrans) 2981 if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
2962 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp); 2982 struct skb_mstamp now;
2963 2983
2964 /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets 2984 skb_mstamp_get(&now);
2965 * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() 2985 rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
2966 */ 2986 }
2967 if (!tp->srtt_us) 2987
2968 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); 2988 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
2969} 2989}
2970 2990
2991
2971static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) 2992static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
2972{ 2993{
2973 const struct inet_connection_sock *icsk = inet_csk(sk); 2994 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3131,6 +3152,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3131 3152
3132 if (sacked & TCPCB_SACKED_ACKED) 3153 if (sacked & TCPCB_SACKED_ACKED)
3133 tp->sacked_out -= acked_pcount; 3154 tp->sacked_out -= acked_pcount;
3155 else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
3156 tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
3134 if (sacked & TCPCB_LOST) 3157 if (sacked & TCPCB_LOST)
3135 tp->lost_out -= acked_pcount; 3158 tp->lost_out -= acked_pcount;
3136 3159
@@ -3169,7 +3192,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3169 flag |= FLAG_SACK_RENEGING; 3192 flag |= FLAG_SACK_RENEGING;
3170 3193
3171 skb_mstamp_get(&now); 3194 skb_mstamp_get(&now);
3172 if (likely(first_ackt.v64)) { 3195 if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3173 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); 3196 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
3174 ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); 3197 ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
3175 } 3198 }
@@ -3178,7 +3201,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3178 ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); 3201 ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
3179 } 3202 }
3180 3203
3181 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); 3204 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
3205 ca_rtt_us);
3182 3206
3183 if (flag & FLAG_ACKED) { 3207 if (flag & FLAG_ACKED) {
3184 tcp_rearm_rto(sk); 3208 tcp_rearm_rto(sk);
@@ -5472,7 +5496,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5472} 5496}
5473 5497
5474static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5498static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5475 const struct tcphdr *th, unsigned int len) 5499 const struct tcphdr *th)
5476{ 5500{
5477 struct inet_connection_sock *icsk = inet_csk(sk); 5501 struct inet_connection_sock *icsk = inet_csk(sk);
5478 struct tcp_sock *tp = tcp_sk(sk); 5502 struct tcp_sock *tp = tcp_sk(sk);
@@ -5698,15 +5722,14 @@ reset_and_undo:
5698 * address independent. 5722 * address independent.
5699 */ 5723 */
5700 5724
5701int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 5725int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5702 const struct tcphdr *th, unsigned int len)
5703{ 5726{
5704 struct tcp_sock *tp = tcp_sk(sk); 5727 struct tcp_sock *tp = tcp_sk(sk);
5705 struct inet_connection_sock *icsk = inet_csk(sk); 5728 struct inet_connection_sock *icsk = inet_csk(sk);
5729 const struct tcphdr *th = tcp_hdr(skb);
5706 struct request_sock *req; 5730 struct request_sock *req;
5707 int queued = 0; 5731 int queued = 0;
5708 bool acceptable; 5732 bool acceptable;
5709 u32 synack_stamp;
5710 5733
5711 tp->rx_opt.saw_tstamp = 0; 5734 tp->rx_opt.saw_tstamp = 0;
5712 5735
@@ -5750,7 +5773,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5750 goto discard; 5773 goto discard;
5751 5774
5752 case TCP_SYN_SENT: 5775 case TCP_SYN_SENT:
5753 queued = tcp_rcv_synsent_state_process(sk, skb, th, len); 5776 queued = tcp_rcv_synsent_state_process(sk, skb, th);
5754 if (queued >= 0) 5777 if (queued >= 0)
5755 return queued; 5778 return queued;
5756 5779
@@ -5785,15 +5808,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5785 if (!acceptable) 5808 if (!acceptable)
5786 return 1; 5809 return 1;
5787 5810
5811 if (!tp->srtt_us)
5812 tcp_synack_rtt_meas(sk, req);
5813
5788 /* Once we leave TCP_SYN_RECV, we no longer need req 5814 /* Once we leave TCP_SYN_RECV, we no longer need req
5789 * so release it. 5815 * so release it.
5790 */ 5816 */
5791 if (req) { 5817 if (req) {
5792 synack_stamp = tcp_rsk(req)->snt_synack;
5793 tp->total_retrans = req->num_retrans; 5818 tp->total_retrans = req->num_retrans;
5794 reqsk_fastopen_remove(sk, req, false); 5819 reqsk_fastopen_remove(sk, req, false);
5795 } else { 5820 } else {
5796 synack_stamp = tp->lsndtime;
5797 /* Make sure socket is routed, for correct metrics. */ 5821 /* Make sure socket is routed, for correct metrics. */
5798 icsk->icsk_af_ops->rebuild_header(sk); 5822 icsk->icsk_af_ops->rebuild_header(sk);
5799 tcp_init_congestion_control(sk); 5823 tcp_init_congestion_control(sk);
@@ -5816,7 +5840,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5816 tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 5840 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5817 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; 5841 tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
5818 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5842 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5819 tcp_synack_rtt_meas(sk, synack_stamp);
5820 5843
5821 if (tp->rx_opt.tstamp_ok) 5844 if (tp->rx_opt.tstamp_ok)
5822 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5845 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -6023,11 +6046,11 @@ static void tcp_openreq_init(struct request_sock *req,
6023{ 6046{
6024 struct inet_request_sock *ireq = inet_rsk(req); 6047 struct inet_request_sock *ireq = inet_rsk(req);
6025 6048
6026 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ 6049 req->rsk_rcv_wnd = 0; /* So that tcp_send_synack() knows! */
6027 req->cookie_ts = 0; 6050 req->cookie_ts = 0;
6028 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; 6051 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
6029 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 6052 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
6030 tcp_rsk(req)->snt_synack = tcp_time_stamp; 6053 skb_mstamp_get(&tcp_rsk(req)->snt_synack);
6031 tcp_rsk(req)->last_oow_ack_time = 0; 6054 tcp_rsk(req)->last_oow_ack_time = 0;
6032 req->mss = rx_opt->mss_clamp; 6055 req->mss = rx_opt->mss_clamp;
6033 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; 6056 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
@@ -6043,9 +6066,11 @@ static void tcp_openreq_init(struct request_sock *req,
6043} 6066}
6044 6067
6045struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, 6068struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
6046 struct sock *sk_listener) 6069 struct sock *sk_listener,
6070 bool attach_listener)
6047{ 6071{
6048 struct request_sock *req = reqsk_alloc(ops, sk_listener); 6072 struct request_sock *req = reqsk_alloc(ops, sk_listener,
6073 attach_listener);
6049 6074
6050 if (req) { 6075 if (req) {
6051 struct inet_request_sock *ireq = inet_rsk(req); 6076 struct inet_request_sock *ireq = inet_rsk(req);
@@ -6065,13 +6090,13 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
6065/* 6090/*
6066 * Return true if a syncookie should be sent 6091 * Return true if a syncookie should be sent
6067 */ 6092 */
6068static bool tcp_syn_flood_action(struct sock *sk, 6093static bool tcp_syn_flood_action(const struct sock *sk,
6069 const struct sk_buff *skb, 6094 const struct sk_buff *skb,
6070 const char *proto) 6095 const char *proto)
6071{ 6096{
6097 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6072 const char *msg = "Dropping request"; 6098 const char *msg = "Dropping request";
6073 bool want_cookie = false; 6099 bool want_cookie = false;
6074 struct listen_sock *lopt;
6075 6100
6076#ifdef CONFIG_SYN_COOKIES 6101#ifdef CONFIG_SYN_COOKIES
6077 if (sysctl_tcp_syncookies) { 6102 if (sysctl_tcp_syncookies) {
@@ -6082,12 +6107,12 @@ static bool tcp_syn_flood_action(struct sock *sk,
6082#endif 6107#endif
6083 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 6108 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6084 6109
6085 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 6110 if (!queue->synflood_warned &&
6086 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { 6111 sysctl_tcp_syncookies != 2 &&
6087 lopt->synflood_warned = 1; 6112 xchg(&queue->synflood_warned, 1) == 0)
6088 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 6113 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6089 proto, ntohs(tcp_hdr(skb)->dest), msg); 6114 proto, ntohs(tcp_hdr(skb)->dest), msg);
6090 } 6115
6091 return want_cookie; 6116 return want_cookie;
6092} 6117}
6093 6118
@@ -6112,16 +6137,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6112 const struct tcp_request_sock_ops *af_ops, 6137 const struct tcp_request_sock_ops *af_ops,
6113 struct sock *sk, struct sk_buff *skb) 6138 struct sock *sk, struct sk_buff *skb)
6114{ 6139{
6140 struct tcp_fastopen_cookie foc = { .len = -1 };
6141 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6115 struct tcp_options_received tmp_opt; 6142 struct tcp_options_received tmp_opt;
6116 struct request_sock *req;
6117 struct tcp_sock *tp = tcp_sk(sk); 6143 struct tcp_sock *tp = tcp_sk(sk);
6144 struct sock *fastopen_sk = NULL;
6118 struct dst_entry *dst = NULL; 6145 struct dst_entry *dst = NULL;
6119 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; 6146 struct request_sock *req;
6120 bool want_cookie = false, fastopen; 6147 bool want_cookie = false;
6121 struct flowi fl; 6148 struct flowi fl;
6122 struct tcp_fastopen_cookie foc = { .len = -1 };
6123 int err;
6124
6125 6149
6126 /* TW buckets are converted to open requests without 6150 /* TW buckets are converted to open requests without
6127 * limitations, they conserve resources and peer is 6151 * limitations, they conserve resources and peer is
@@ -6145,7 +6169,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6145 goto drop; 6169 goto drop;
6146 } 6170 }
6147 6171
6148 req = inet_reqsk_alloc(rsk_ops, sk); 6172 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
6149 if (!req) 6173 if (!req)
6150 goto drop; 6174 goto drop;
6151 6175
@@ -6228,20 +6252,30 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6228 } 6252 }
6229 6253
6230 tcp_rsk(req)->snt_isn = isn; 6254 tcp_rsk(req)->snt_isn = isn;
6255 tcp_rsk(req)->txhash = net_tx_rndhash();
6231 tcp_openreq_init_rwin(req, sk, dst); 6256 tcp_openreq_init_rwin(req, sk, dst);
6232 fastopen = !want_cookie && 6257 if (!want_cookie) {
6233 tcp_try_fastopen(sk, skb, req, &foc, dst); 6258 tcp_reqsk_record_syn(sk, req, skb);
6234 err = af_ops->send_synack(sk, dst, &fl, req, 6259 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
6235 skb_get_queue_mapping(skb), &foc); 6260 }
6236 if (!fastopen) { 6261 if (fastopen_sk) {
6237 if (err || want_cookie) 6262 af_ops->send_synack(fastopen_sk, dst, &fl, req,
6238 goto drop_and_free; 6263 &foc, false);
6239 6264 /* Add the child socket directly into the accept queue */
6265 inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
6266 sk->sk_data_ready(sk);
6267 bh_unlock_sock(fastopen_sk);
6268 sock_put(fastopen_sk);
6269 } else {
6240 tcp_rsk(req)->tfo_listener = false; 6270 tcp_rsk(req)->tfo_listener = false;
6241 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6271 if (!want_cookie)
6272 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6273 af_ops->send_synack(sk, dst, &fl, req,
6274 &foc, !want_cookie);
6275 if (want_cookie)
6276 goto drop_and_free;
6242 } 6277 }
6243 tcp_reqsk_record_syn(sk, req, skb); 6278 reqsk_put(req);
6244
6245 return 0; 6279 return 0;
6246 6280
6247drop_and_release: 6281drop_and_release:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 93898e093d4e..ba09016d1bfd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -324,7 +324,6 @@ void tcp_req_err(struct sock *sk, u32 seq)
324 324
325 if (seq != tcp_rsk(req)->snt_isn) { 325 if (seq != tcp_rsk(req)->snt_isn) {
326 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 326 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
327 reqsk_put(req);
328 } else { 327 } else {
329 /* 328 /*
330 * Still in SYN_RECV, just remove it silently. 329 * Still in SYN_RECV, just remove it silently.
@@ -332,9 +331,10 @@ void tcp_req_err(struct sock *sk, u32 seq)
332 * created socket, and POSIX does not want network 331 * created socket, and POSIX does not want network
333 * errors returned from accept(). 332 * errors returned from accept().
334 */ 333 */
335 NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
336 inet_csk_reqsk_queue_drop(req->rsk_listener, req); 334 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
335 NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
337 } 336 }
337 reqsk_put(req);
338} 338}
339EXPORT_SYMBOL(tcp_req_err); 339EXPORT_SYMBOL(tcp_req_err);
340 340
@@ -576,7 +576,7 @@ EXPORT_SYMBOL(tcp_v4_send_check);
576 * Exception: precedence violation. We do not implement it in any case. 576 * Exception: precedence violation. We do not implement it in any case.
577 */ 577 */
578 578
579static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 579static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
580{ 580{
581 const struct tcphdr *th = tcp_hdr(skb); 581 const struct tcphdr *th = tcp_hdr(skb);
582 struct { 582 struct {
@@ -795,7 +795,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
795 inet_twsk_put(tw); 795 inet_twsk_put(tw);
796} 796}
797 797
798static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 798static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
799 struct request_sock *req) 799 struct request_sock *req)
800{ 800{
801 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 801 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
@@ -803,7 +803,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
803 */ 803 */
804 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? 804 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
805 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 805 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
806 tcp_rsk(req)->rcv_nxt, req->rcv_wnd, 806 tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
807 tcp_time_stamp, 807 tcp_time_stamp,
808 req->ts_recent, 808 req->ts_recent,
809 0, 809 0,
@@ -818,11 +818,11 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
818 * This still operates on a request_sock only, not on a big 818 * This still operates on a request_sock only, not on a big
819 * socket. 819 * socket.
820 */ 820 */
821static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 821static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
822 struct flowi *fl, 822 struct flowi *fl,
823 struct request_sock *req, 823 struct request_sock *req,
824 u16 queue_mapping, 824 struct tcp_fastopen_cookie *foc,
825 struct tcp_fastopen_cookie *foc) 825 bool attach_req)
826{ 826{
827 const struct inet_request_sock *ireq = inet_rsk(req); 827 const struct inet_request_sock *ireq = inet_rsk(req);
828 struct flowi4 fl4; 828 struct flowi4 fl4;
@@ -833,12 +833,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
834 return -1; 834 return -1;
835 835
836 skb = tcp_make_synack(sk, dst, req, foc); 836 skb = tcp_make_synack(sk, dst, req, foc, attach_req);
837 837
838 if (skb) { 838 if (skb) {
839 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); 839 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
840 840
841 skb_set_queue_mapping(skb, queue_mapping);
842 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 841 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
843 ireq->ir_rmt_addr, 842 ireq->ir_rmt_addr,
844 ireq->opt); 843 ireq->opt);
@@ -865,7 +864,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
865 */ 864 */
866 865
867/* Find the Key structure for an address. */ 866/* Find the Key structure for an address. */
868struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, 867struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
869 const union tcp_md5_addr *addr, 868 const union tcp_md5_addr *addr,
870 int family) 869 int family)
871{ 870{
@@ -877,7 +876,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
877 /* caller either holds rcu_read_lock() or socket lock */ 876 /* caller either holds rcu_read_lock() or socket lock */
878 md5sig = rcu_dereference_check(tp->md5sig_info, 877 md5sig = rcu_dereference_check(tp->md5sig_info,
879 sock_owned_by_user(sk) || 878 sock_owned_by_user(sk) ||
880 lockdep_is_held(&sk->sk_lock.slock)); 879 lockdep_is_held((spinlock_t *)&sk->sk_lock.slock));
881 if (!md5sig) 880 if (!md5sig)
882 return NULL; 881 return NULL;
883#if IS_ENABLED(CONFIG_IPV6) 882#if IS_ENABLED(CONFIG_IPV6)
@@ -894,7 +893,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
894} 893}
895EXPORT_SYMBOL(tcp_md5_do_lookup); 894EXPORT_SYMBOL(tcp_md5_do_lookup);
896 895
897struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 896struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
898 const struct sock *addr_sk) 897 const struct sock *addr_sk)
899{ 898{
900 const union tcp_md5_addr *addr; 899 const union tcp_md5_addr *addr;
@@ -1112,10 +1111,13 @@ clear_hash_noput:
1112} 1111}
1113EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1112EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1114 1113
1114#endif
1115
1115/* Called with rcu_read_lock() */ 1116/* Called with rcu_read_lock() */
1116static bool tcp_v4_inbound_md5_hash(struct sock *sk, 1117static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1117 const struct sk_buff *skb) 1118 const struct sk_buff *skb)
1118{ 1119{
1120#ifdef CONFIG_TCP_MD5SIG
1119 /* 1121 /*
1120 * This gets called for each TCP segment that arrives 1122 * This gets called for each TCP segment that arrives
1121 * so we want to be efficient. 1123 * so we want to be efficient.
@@ -1165,10 +1167,12 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk,
1165 return true; 1167 return true;
1166 } 1168 }
1167 return false; 1169 return false;
1168}
1169#endif 1170#endif
1171 return false;
1172}
1170 1173
1171static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, 1174static void tcp_v4_init_req(struct request_sock *req,
1175 const struct sock *sk_listener,
1172 struct sk_buff *skb) 1176 struct sk_buff *skb)
1173{ 1177{
1174 struct inet_request_sock *ireq = inet_rsk(req); 1178 struct inet_request_sock *ireq = inet_rsk(req);
@@ -1179,7 +1183,8 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
1179 ireq->opt = tcp_v4_save_options(skb); 1183 ireq->opt = tcp_v4_save_options(skb);
1180} 1184}
1181 1185
1182static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, 1186static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1187 struct flowi *fl,
1183 const struct request_sock *req, 1188 const struct request_sock *req,
1184 bool *strict) 1189 bool *strict)
1185{ 1190{
@@ -1218,7 +1223,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1218 .route_req = tcp_v4_route_req, 1223 .route_req = tcp_v4_route_req,
1219 .init_seq = tcp_v4_init_sequence, 1224 .init_seq = tcp_v4_init_sequence,
1220 .send_synack = tcp_v4_send_synack, 1225 .send_synack = tcp_v4_send_synack,
1221 .queue_hash_add = inet_csk_reqsk_queue_hash_add,
1222}; 1226};
1223 1227
1224int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1228int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -1241,9 +1245,11 @@ EXPORT_SYMBOL(tcp_v4_conn_request);
1241 * The three way handshake has completed - we got a valid synack - 1245 * The three way handshake has completed - we got a valid synack -
1242 * now create the new socket. 1246 * now create the new socket.
1243 */ 1247 */
1244struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1248struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1245 struct request_sock *req, 1249 struct request_sock *req,
1246 struct dst_entry *dst) 1250 struct dst_entry *dst,
1251 struct request_sock *req_unhash,
1252 bool *own_req)
1247{ 1253{
1248 struct inet_request_sock *ireq; 1254 struct inet_request_sock *ireq;
1249 struct inet_sock *newinet; 1255 struct inet_sock *newinet;
@@ -1277,7 +1283,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1277 newinet->mc_ttl = ip_hdr(skb)->ttl; 1283 newinet->mc_ttl = ip_hdr(skb)->ttl;
1278 newinet->rcv_tos = ip_hdr(skb)->tos; 1284 newinet->rcv_tos = ip_hdr(skb)->tos;
1279 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1285 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1280 sk_set_txhash(newsk);
1281 if (inet_opt) 1286 if (inet_opt)
1282 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1287 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1283 newinet->inet_id = newtp->write_seq ^ jiffies; 1288 newinet->inet_id = newtp->write_seq ^ jiffies;
@@ -1320,7 +1325,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1320 1325
1321 if (__inet_inherit_port(sk, newsk) < 0) 1326 if (__inet_inherit_port(sk, newsk) < 0)
1322 goto put_and_exit; 1327 goto put_and_exit;
1323 __inet_hash_nolisten(newsk, NULL); 1328 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1329 if (*own_req)
1330 tcp_move_syn(newtp, req);
1324 1331
1325 return newsk; 1332 return newsk;
1326 1333
@@ -1338,34 +1345,11 @@ put_and_exit:
1338} 1345}
1339EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1346EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1340 1347
1341static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1348static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1342{ 1349{
1350#ifdef CONFIG_SYN_COOKIES
1343 const struct tcphdr *th = tcp_hdr(skb); 1351 const struct tcphdr *th = tcp_hdr(skb);
1344 const struct iphdr *iph = ip_hdr(skb);
1345 struct request_sock *req;
1346 struct sock *nsk;
1347
1348 req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
1349 if (req) {
1350 nsk = tcp_check_req(sk, skb, req, false);
1351 if (!nsk || nsk == sk)
1352 reqsk_put(req);
1353 return nsk;
1354 }
1355
1356 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1357 th->source, iph->daddr, th->dest, inet_iif(skb));
1358
1359 if (nsk) {
1360 if (nsk->sk_state != TCP_TIME_WAIT) {
1361 bh_lock_sock(nsk);
1362 return nsk;
1363 }
1364 inet_twsk_put(inet_twsk(nsk));
1365 return NULL;
1366 }
1367 1352
1368#ifdef CONFIG_SYN_COOKIES
1369 if (!th->syn) 1353 if (!th->syn)
1370 sk = cookie_v4_check(sk, skb); 1354 sk = cookie_v4_check(sk, skb);
1371#endif 1355#endif
@@ -1373,7 +1357,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1373} 1357}
1374 1358
1375/* The socket must have it's spinlock held when we get 1359/* The socket must have it's spinlock held when we get
1376 * here. 1360 * here, unless it is a TCP_LISTEN socket.
1377 * 1361 *
1378 * We have a potential double-lock case here, so even when 1362 * We have a potential double-lock case here, so even when
1379 * doing backlog processing we use the BH locking scheme. 1363 * doing backlog processing we use the BH locking scheme.
@@ -1404,13 +1388,13 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1404 goto csum_err; 1388 goto csum_err;
1405 1389
1406 if (sk->sk_state == TCP_LISTEN) { 1390 if (sk->sk_state == TCP_LISTEN) {
1407 struct sock *nsk = tcp_v4_hnd_req(sk, skb); 1391 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1392
1408 if (!nsk) 1393 if (!nsk)
1409 goto discard; 1394 goto discard;
1410
1411 if (nsk != sk) { 1395 if (nsk != sk) {
1412 sock_rps_save_rxhash(nsk, skb); 1396 sock_rps_save_rxhash(nsk, skb);
1413 sk_mark_napi_id(sk, skb); 1397 sk_mark_napi_id(nsk, skb);
1414 if (tcp_child_process(sk, nsk, skb)) { 1398 if (tcp_child_process(sk, nsk, skb)) {
1415 rsk = nsk; 1399 rsk = nsk;
1416 goto reset; 1400 goto reset;
@@ -1420,7 +1404,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1420 } else 1404 } else
1421 sock_rps_save_rxhash(sk, skb); 1405 sock_rps_save_rxhash(sk, skb);
1422 1406
1423 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1407 if (tcp_rcv_state_process(sk, skb)) {
1424 rsk = sk; 1408 rsk = sk;
1425 goto reset; 1409 goto reset;
1426 } 1410 }
@@ -1590,6 +1574,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1590 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 1574 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1591 TCP_SKB_CB(skb)->sacked = 0; 1575 TCP_SKB_CB(skb)->sacked = 0;
1592 1576
1577lookup:
1593 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1578 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1594 if (!sk) 1579 if (!sk)
1595 goto no_tcp_socket; 1580 goto no_tcp_socket;
@@ -1598,6 +1583,33 @@ process:
1598 if (sk->sk_state == TCP_TIME_WAIT) 1583 if (sk->sk_state == TCP_TIME_WAIT)
1599 goto do_time_wait; 1584 goto do_time_wait;
1600 1585
1586 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1587 struct request_sock *req = inet_reqsk(sk);
1588 struct sock *nsk = NULL;
1589
1590 sk = req->rsk_listener;
1591 if (tcp_v4_inbound_md5_hash(sk, skb))
1592 goto discard_and_relse;
1593 if (likely(sk->sk_state == TCP_LISTEN)) {
1594 nsk = tcp_check_req(sk, skb, req, false);
1595 } else {
1596 inet_csk_reqsk_queue_drop_and_put(sk, req);
1597 goto lookup;
1598 }
1599 if (!nsk) {
1600 reqsk_put(req);
1601 goto discard_it;
1602 }
1603 if (nsk == sk) {
1604 sock_hold(sk);
1605 reqsk_put(req);
1606 } else if (tcp_child_process(sk, nsk, skb)) {
1607 tcp_v4_send_reset(nsk, skb);
1608 goto discard_it;
1609 } else {
1610 return 0;
1611 }
1612 }
1601 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 1613 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1602 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1614 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1603 goto discard_and_relse; 1615 goto discard_and_relse;
@@ -1606,25 +1618,23 @@ process:
1606 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1618 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1607 goto discard_and_relse; 1619 goto discard_and_relse;
1608 1620
1609#ifdef CONFIG_TCP_MD5SIG
1610 /*
1611 * We really want to reject the packet as early as possible
1612 * if:
1613 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1614 * o There is an MD5 option and we're not expecting one
1615 */
1616 if (tcp_v4_inbound_md5_hash(sk, skb)) 1621 if (tcp_v4_inbound_md5_hash(sk, skb))
1617 goto discard_and_relse; 1622 goto discard_and_relse;
1618#endif
1619 1623
1620 nf_reset(skb); 1624 nf_reset(skb);
1621 1625
1622 if (sk_filter(sk, skb)) 1626 if (sk_filter(sk, skb))
1623 goto discard_and_relse; 1627 goto discard_and_relse;
1624 1628
1625 sk_incoming_cpu_update(sk);
1626 skb->dev = NULL; 1629 skb->dev = NULL;
1627 1630
1631 if (sk->sk_state == TCP_LISTEN) {
1632 ret = tcp_v4_do_rcv(sk, skb);
1633 goto put_and_return;
1634 }
1635
1636 sk_incoming_cpu_update(sk);
1637
1628 bh_lock_sock_nested(sk); 1638 bh_lock_sock_nested(sk);
1629 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 1639 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
1630 ret = 0; 1640 ret = 0;
@@ -1639,6 +1649,7 @@ process:
1639 } 1649 }
1640 bh_unlock_sock(sk); 1650 bh_unlock_sock(sk);
1641 1651
1652put_and_return:
1642 sock_put(sk); 1653 sock_put(sk);
1643 1654
1644 return ret; 1655 return ret;
@@ -1833,35 +1844,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1833 ++st->num; 1844 ++st->num;
1834 ++st->offset; 1845 ++st->offset;
1835 1846
1836 if (st->state == TCP_SEQ_STATE_OPENREQ) { 1847 sk = sk_nulls_next(sk);
1837 struct request_sock *req = cur;
1838
1839 icsk = inet_csk(st->syn_wait_sk);
1840 req = req->dl_next;
1841 while (1) {
1842 while (req) {
1843 if (req->rsk_ops->family == st->family) {
1844 cur = req;
1845 goto out;
1846 }
1847 req = req->dl_next;
1848 }
1849 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1850 break;
1851get_req:
1852 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1853 }
1854 sk = sk_nulls_next(st->syn_wait_sk);
1855 st->state = TCP_SEQ_STATE_LISTENING;
1856 spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1857 } else {
1858 icsk = inet_csk(sk);
1859 spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1860 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1861 goto start_req;
1862 spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1863 sk = sk_nulls_next(sk);
1864 }
1865get_sk: 1848get_sk:
1866 sk_nulls_for_each_from(sk, node) { 1849 sk_nulls_for_each_from(sk, node) {
1867 if (!net_eq(sock_net(sk), net)) 1850 if (!net_eq(sock_net(sk), net))
@@ -1871,16 +1854,6 @@ get_sk:
1871 goto out; 1854 goto out;
1872 } 1855 }
1873 icsk = inet_csk(sk); 1856 icsk = inet_csk(sk);
1874 spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1875 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1876start_req:
1877 st->uid = sock_i_uid(sk);
1878 st->syn_wait_sk = sk;
1879 st->state = TCP_SEQ_STATE_OPENREQ;
1880 st->sbucket = 0;
1881 goto get_req;
1882 }
1883 spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1884 } 1857 }
1885 spin_unlock_bh(&ilb->lock); 1858 spin_unlock_bh(&ilb->lock);
1886 st->offset = 0; 1859 st->offset = 0;
@@ -2012,7 +1985,6 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
2012 void *rc = NULL; 1985 void *rc = NULL;
2013 1986
2014 switch (st->state) { 1987 switch (st->state) {
2015 case TCP_SEQ_STATE_OPENREQ:
2016 case TCP_SEQ_STATE_LISTENING: 1988 case TCP_SEQ_STATE_LISTENING:
2017 if (st->bucket >= INET_LHTABLE_SIZE) 1989 if (st->bucket >= INET_LHTABLE_SIZE)
2018 break; 1990 break;
@@ -2071,7 +2043,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2071 } 2043 }
2072 2044
2073 switch (st->state) { 2045 switch (st->state) {
2074 case TCP_SEQ_STATE_OPENREQ:
2075 case TCP_SEQ_STATE_LISTENING: 2046 case TCP_SEQ_STATE_LISTENING:
2076 rc = listening_get_next(seq, v); 2047 rc = listening_get_next(seq, v);
2077 if (!rc) { 2048 if (!rc) {
@@ -2096,11 +2067,6 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2096 struct tcp_iter_state *st = seq->private; 2067 struct tcp_iter_state *st = seq->private;
2097 2068
2098 switch (st->state) { 2069 switch (st->state) {
2099 case TCP_SEQ_STATE_OPENREQ:
2100 if (v) {
2101 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2102 spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2103 }
2104 case TCP_SEQ_STATE_LISTENING: 2070 case TCP_SEQ_STATE_LISTENING:
2105 if (v != SEQ_START_TOKEN) 2071 if (v != SEQ_START_TOKEN)
2106 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 2072 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
@@ -2154,7 +2120,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2154EXPORT_SYMBOL(tcp_proc_unregister); 2120EXPORT_SYMBOL(tcp_proc_unregister);
2155 2121
2156static void get_openreq4(const struct request_sock *req, 2122static void get_openreq4(const struct request_sock *req,
2157 struct seq_file *f, int i, kuid_t uid) 2123 struct seq_file *f, int i)
2158{ 2124{
2159 const struct inet_request_sock *ireq = inet_rsk(req); 2125 const struct inet_request_sock *ireq = inet_rsk(req);
2160 long delta = req->rsk_timer.expires - jiffies; 2126 long delta = req->rsk_timer.expires - jiffies;
@@ -2171,7 +2137,8 @@ static void get_openreq4(const struct request_sock *req,
2171 1, /* timers active (only the expire timer) */ 2137 1, /* timers active (only the expire timer) */
2172 jiffies_delta_to_clock_t(delta), 2138 jiffies_delta_to_clock_t(delta),
2173 req->num_timeout, 2139 req->num_timeout,
2174 from_kuid_munged(seq_user_ns(f), uid), 2140 from_kuid_munged(seq_user_ns(f),
2141 sock_i_uid(req->rsk_listener)),
2175 0, /* non standard timer */ 2142 0, /* non standard timer */
2176 0, /* open_requests have no inode */ 2143 0, /* open_requests have no inode */
2177 0, 2144 0,
@@ -2185,12 +2152,13 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2185 const struct tcp_sock *tp = tcp_sk(sk); 2152 const struct tcp_sock *tp = tcp_sk(sk);
2186 const struct inet_connection_sock *icsk = inet_csk(sk); 2153 const struct inet_connection_sock *icsk = inet_csk(sk);
2187 const struct inet_sock *inet = inet_sk(sk); 2154 const struct inet_sock *inet = inet_sk(sk);
2188 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; 2155 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2189 __be32 dest = inet->inet_daddr; 2156 __be32 dest = inet->inet_daddr;
2190 __be32 src = inet->inet_rcv_saddr; 2157 __be32 src = inet->inet_rcv_saddr;
2191 __u16 destp = ntohs(inet->inet_dport); 2158 __u16 destp = ntohs(inet->inet_dport);
2192 __u16 srcp = ntohs(inet->inet_sport); 2159 __u16 srcp = ntohs(inet->inet_sport);
2193 int rx_queue; 2160 int rx_queue;
2161 int state;
2194 2162
2195 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2163 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2196 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 2164 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2208,17 +2176,18 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2208 timer_expires = jiffies; 2176 timer_expires = jiffies;
2209 } 2177 }
2210 2178
2211 if (sk->sk_state == TCP_LISTEN) 2179 state = sk_state_load(sk);
2180 if (state == TCP_LISTEN)
2212 rx_queue = sk->sk_ack_backlog; 2181 rx_queue = sk->sk_ack_backlog;
2213 else 2182 else
2214 /* 2183 /* Because we don't lock the socket,
2215 * because we dont lock socket, we might find a transient negative value 2184 * we might find a transient negative value.
2216 */ 2185 */
2217 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2186 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2218 2187
2219 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2188 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2220 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 2189 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2221 i, src, srcp, dest, destp, sk->sk_state, 2190 i, src, srcp, dest, destp, state,
2222 tp->write_seq - tp->snd_una, 2191 tp->write_seq - tp->snd_una,
2223 rx_queue, 2192 rx_queue,
2224 timer_active, 2193 timer_active,
@@ -2232,8 +2201,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2232 jiffies_to_clock_t(icsk->icsk_ack.ato), 2201 jiffies_to_clock_t(icsk->icsk_ack.ato),
2233 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2202 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2234 tp->snd_cwnd, 2203 tp->snd_cwnd,
2235 sk->sk_state == TCP_LISTEN ? 2204 state == TCP_LISTEN ?
2236 (fastopenq ? fastopenq->max_qlen : 0) : 2205 fastopenq->max_qlen :
2237 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 2206 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2238} 2207}
2239 2208
@@ -2272,18 +2241,12 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
2272 } 2241 }
2273 st = seq->private; 2242 st = seq->private;
2274 2243
2275 switch (st->state) { 2244 if (sk->sk_state == TCP_TIME_WAIT)
2276 case TCP_SEQ_STATE_LISTENING: 2245 get_timewait4_sock(v, seq, st->num);
2277 case TCP_SEQ_STATE_ESTABLISHED: 2246 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2278 if (sk->sk_state == TCP_TIME_WAIT) 2247 get_openreq4(v, seq, st->num);
2279 get_timewait4_sock(v, seq, st->num); 2248 else
2280 else 2249 get_tcp4_sock(v, seq, st->num);
2281 get_tcp4_sock(v, seq, st->num);
2282 break;
2283 case TCP_SEQ_STATE_OPENREQ:
2284 get_openreq4(v, seq, st->num, st->uid);
2285 break;
2286 }
2287out: 2250out:
2288 seq_pad(seq, '\n'); 2251 seq_pad(seq, '\n');
2289 return 0; 2252 return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index def765911ff8..ac6b1961ffeb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -361,30 +361,38 @@ void tcp_twsk_destructor(struct sock *sk)
361} 361}
362EXPORT_SYMBOL_GPL(tcp_twsk_destructor); 362EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
363 363
364/* Warning : This function is called without sk_listener being locked.
365 * Be sure to read socket fields once, as their value could change under us.
366 */
364void tcp_openreq_init_rwin(struct request_sock *req, 367void tcp_openreq_init_rwin(struct request_sock *req,
365 struct sock *sk, struct dst_entry *dst) 368 const struct sock *sk_listener,
369 const struct dst_entry *dst)
366{ 370{
367 struct inet_request_sock *ireq = inet_rsk(req); 371 struct inet_request_sock *ireq = inet_rsk(req);
368 struct tcp_sock *tp = tcp_sk(sk); 372 const struct tcp_sock *tp = tcp_sk(sk_listener);
369 __u8 rcv_wscale; 373 u16 user_mss = READ_ONCE(tp->rx_opt.user_mss);
374 int full_space = tcp_full_space(sk_listener);
370 int mss = dst_metric_advmss(dst); 375 int mss = dst_metric_advmss(dst);
376 u32 window_clamp;
377 __u8 rcv_wscale;
371 378
372 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 379 if (user_mss && user_mss < mss)
373 mss = tp->rx_opt.user_mss; 380 mss = user_mss;
374 381
382 window_clamp = READ_ONCE(tp->window_clamp);
375 /* Set this up on the first call only */ 383 /* Set this up on the first call only */
376 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 384 req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW);
377 385
378 /* limit the window selection if the user enforce a smaller rx buffer */ 386 /* limit the window selection if the user enforce a smaller rx buffer */
379 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && 387 if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK &&
380 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) 388 (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
381 req->window_clamp = tcp_full_space(sk); 389 req->rsk_window_clamp = full_space;
382 390
383 /* tcp_full_space because it is guaranteed to be the first packet */ 391 /* tcp_full_space because it is guaranteed to be the first packet */
384 tcp_select_initial_window(tcp_full_space(sk), 392 tcp_select_initial_window(full_space,
385 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 393 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
386 &req->rcv_wnd, 394 &req->rsk_rcv_wnd,
387 &req->window_clamp, 395 &req->rsk_window_clamp,
388 ireq->wscale_ok, 396 ireq->wscale_ok,
389 &rcv_wscale, 397 &rcv_wscale,
390 dst_metric(dst, RTAX_INITRWND)); 398 dst_metric(dst, RTAX_INITRWND));
@@ -433,7 +441,9 @@ EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
433 * Actually, we could lots of memory writes here. tp of listening 441 * Actually, we could lots of memory writes here. tp of listening
434 * socket contains all necessary default parameters. 442 * socket contains all necessary default parameters.
435 */ 443 */
436struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) 444struct sock *tcp_create_openreq_child(const struct sock *sk,
445 struct request_sock *req,
446 struct sk_buff *skb)
437{ 447{
438 struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); 448 struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
439 449
@@ -460,6 +470,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
460 470
461 newtp->srtt_us = 0; 471 newtp->srtt_us = 0;
462 newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); 472 newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
473 newtp->rtt_min[0].rtt = ~0U;
463 newicsk->icsk_rto = TCP_TIMEOUT_INIT; 474 newicsk->icsk_rto = TCP_TIMEOUT_INIT;
464 475
465 newtp->packets_out = 0; 476 newtp->packets_out = 0;
@@ -469,7 +480,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
469 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 480 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
470 tcp_enable_early_retrans(newtp); 481 tcp_enable_early_retrans(newtp);
471 newtp->tlp_high_seq = 0; 482 newtp->tlp_high_seq = 0;
472 newtp->lsndtime = treq->snt_synack; 483 newtp->lsndtime = treq->snt_synack.stamp_jiffies;
484 newsk->sk_txhash = treq->txhash;
473 newtp->last_oow_ack_time = 0; 485 newtp->last_oow_ack_time = 0;
474 newtp->total_retrans = req->num_retrans; 486 newtp->total_retrans = req->num_retrans;
475 487
@@ -501,9 +513,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
501 if (sysctl_tcp_fack) 513 if (sysctl_tcp_fack)
502 tcp_enable_fack(newtp); 514 tcp_enable_fack(newtp);
503 } 515 }
504 newtp->window_clamp = req->window_clamp; 516 newtp->window_clamp = req->rsk_window_clamp;
505 newtp->rcv_ssthresh = req->rcv_wnd; 517 newtp->rcv_ssthresh = req->rsk_rcv_wnd;
506 newtp->rcv_wnd = req->rcv_wnd; 518 newtp->rcv_wnd = req->rsk_rcv_wnd;
507 newtp->rx_opt.wscale_ok = ireq->wscale_ok; 519 newtp->rx_opt.wscale_ok = ireq->wscale_ok;
508 if (newtp->rx_opt.wscale_ok) { 520 if (newtp->rx_opt.wscale_ok) {
509 newtp->rx_opt.snd_wscale = ireq->snd_wscale; 521 newtp->rx_opt.snd_wscale = ireq->snd_wscale;
@@ -536,9 +548,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
536 tcp_ecn_openreq_child(newtp, req); 548 tcp_ecn_openreq_child(newtp, req);
537 newtp->fastopen_rsk = NULL; 549 newtp->fastopen_rsk = NULL;
538 newtp->syn_data_acked = 0; 550 newtp->syn_data_acked = 0;
539 551 newtp->rack.mstamp.v64 = 0;
540 newtp->saved_syn = req->saved_syn; 552 newtp->rack.advanced = 0;
541 req->saved_syn = NULL;
542 553
543 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); 554 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
544 } 555 }
@@ -566,8 +577,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
566 const struct tcphdr *th = tcp_hdr(skb); 577 const struct tcphdr *th = tcp_hdr(skb);
567 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 578 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
568 bool paws_reject = false; 579 bool paws_reject = false;
569 580 bool own_req;
570 BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
571 581
572 tmp_opt.saw_tstamp = 0; 582 tmp_opt.saw_tstamp = 0;
573 if (th->doff > (sizeof(struct tcphdr)>>2)) { 583 if (th->doff > (sizeof(struct tcphdr)>>2)) {
@@ -698,7 +708,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
698 /* RFC793: "first check sequence number". */ 708 /* RFC793: "first check sequence number". */
699 709
700 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, 710 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
701 tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) { 711 tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
702 /* Out of window: send ACK and drop. */ 712 /* Out of window: send ACK and drop. */
703 if (!(flg & TCP_FLAG_RST)) 713 if (!(flg & TCP_FLAG_RST))
704 req->rsk_ops->send_ack(sk, skb, req); 714 req->rsk_ops->send_ack(sk, skb, req);
@@ -755,16 +765,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
755 * ESTABLISHED STATE. If it will be dropped after 765 * ESTABLISHED STATE. If it will be dropped after
756 * socket is created, wait for troubles. 766 * socket is created, wait for troubles.
757 */ 767 */
758 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 768 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
769 req, &own_req);
759 if (!child) 770 if (!child)
760 goto listen_overflow; 771 goto listen_overflow;
761 772
762 inet_csk_reqsk_queue_drop(sk, req); 773 sock_rps_save_rxhash(child, skb);
763 inet_csk_reqsk_queue_add(sk, req, child); 774 tcp_synack_rtt_meas(child, req);
764 /* Warning: caller must not call reqsk_put(req); 775 return inet_csk_complete_hashdance(sk, child, req, own_req);
765 * child stole last reference on it.
766 */
767 return child;
768 776
769listen_overflow: 777listen_overflow:
770 if (!sysctl_tcp_abort_on_overflow) { 778 if (!sysctl_tcp_abort_on_overflow) {
@@ -811,8 +819,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
811 int state = child->sk_state; 819 int state = child->sk_state;
812 820
813 if (!sock_owned_by_user(child)) { 821 if (!sock_owned_by_user(child)) {
814 ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), 822 ret = tcp_rcv_state_process(child, skb);
815 skb->len);
816 /* Wakeup parent, send SIGIO */ 823 /* Wakeup parent, send SIGIO */
817 if (state == TCP_SYN_RECV && child->sk_state != state) 824 if (state == TCP_SYN_RECV && child->sk_state != state)
818 parent->sk_data_ready(parent); 825 parent->sk_data_ready(parent);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1100ffe4a722..cb7ca569052c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -357,14 +357,10 @@ static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
357} 357}
358 358
359static void 359static void
360tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, 360tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
361 struct sock *sk)
362{ 361{
363 if (inet_rsk(req)->ecn_ok) { 362 if (inet_rsk(req)->ecn_ok)
364 th->ece = 1; 363 th->ece = 1;
365 if (tcp_ca_needs_ecn(sk))
366 INET_ECN_xmit(sk);
367 }
368} 364}
369 365
370/* Set up ECN state for a packet on a ESTABLISHED socket that is about to 366/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
@@ -612,12 +608,11 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
612} 608}
613 609
614/* Set up TCP options for SYN-ACKs. */ 610/* Set up TCP options for SYN-ACKs. */
615static unsigned int tcp_synack_options(struct sock *sk, 611static unsigned int tcp_synack_options(struct request_sock *req,
616 struct request_sock *req, 612 unsigned int mss, struct sk_buff *skb,
617 unsigned int mss, struct sk_buff *skb, 613 struct tcp_out_options *opts,
618 struct tcp_out_options *opts, 614 const struct tcp_md5sig_key *md5,
619 const struct tcp_md5sig_key *md5, 615 struct tcp_fastopen_cookie *foc)
620 struct tcp_fastopen_cookie *foc)
621{ 616{
622 struct inet_request_sock *ireq = inet_rsk(req); 617 struct inet_request_sock *ireq = inet_rsk(req);
623 unsigned int remaining = MAX_TCP_OPTION_SPACE; 618 unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@ -1827,7 +1822,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1827 1822
1828 /* Ok, it looks like it is advisable to defer. */ 1823 /* Ok, it looks like it is advisable to defer. */
1829 1824
1830 if (cong_win < send_win && cong_win < skb->len) 1825 if (cong_win < send_win && cong_win <= skb->len)
1831 *is_cwnd_limited = true; 1826 *is_cwnd_limited = true;
1832 1827
1833 return true; 1828 return true;
@@ -2060,7 +2055,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2060 2055
2061 cwnd_quota = tcp_cwnd_test(tp, skb); 2056 cwnd_quota = tcp_cwnd_test(tp, skb);
2062 if (!cwnd_quota) { 2057 if (!cwnd_quota) {
2063 is_cwnd_limited = true;
2064 if (push_one == 2) 2058 if (push_one == 2)
2065 /* Force out a loss probe pkt. */ 2059 /* Force out a loss probe pkt. */
2066 cwnd_quota = 1; 2060 cwnd_quota = 1;
@@ -2142,6 +2136,7 @@ repair:
2142 /* Send one loss probe per tail loss episode. */ 2136 /* Send one loss probe per tail loss episode. */
2143 if (push_one != 2) 2137 if (push_one != 2)
2144 tcp_schedule_loss_probe(sk); 2138 tcp_schedule_loss_probe(sk);
2139 is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
2145 tcp_cwnd_validate(sk, is_cwnd_limited); 2140 tcp_cwnd_validate(sk, is_cwnd_limited);
2146 return false; 2141 return false;
2147 } 2142 }
@@ -2165,7 +2160,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2165 /* Don't do any loss probe on a Fast Open connection before 3WHS 2160 /* Don't do any loss probe on a Fast Open connection before 3WHS
2166 * finishes. 2161 * finishes.
2167 */ 2162 */
2168 if (sk->sk_state == TCP_SYN_RECV) 2163 if (tp->fastopen_rsk)
2169 return false; 2164 return false;
2170 2165
2171 /* TLP is only scheduled when next timer event is RTO. */ 2166 /* TLP is only scheduled when next timer event is RTO. */
@@ -2175,7 +2170,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2175 /* Schedule a loss probe in 2*RTT for SACK capable connections 2170 /* Schedule a loss probe in 2*RTT for SACK capable connections
2176 * in Open state, that are either limited by cwnd or application. 2171 * in Open state, that are either limited by cwnd or application.
2177 */ 2172 */
2178 if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out || 2173 if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
2179 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) 2174 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2180 return false; 2175 return false;
2181 2176
@@ -2184,9 +2179,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2184 return false; 2179 return false;
2185 2180
2186 /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account 2181 /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
2187 * for delayed ack when there's one outstanding packet. 2182 * for delayed ack when there's one outstanding packet. If no RTT
2183 * sample is available then probe after TCP_TIMEOUT_INIT.
2188 */ 2184 */
2189 timeout = rtt << 1; 2185 timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
2190 if (tp->packets_out == 1) 2186 if (tp->packets_out == 1)
2191 timeout = max_t(u32, timeout, 2187 timeout = max_t(u32, timeout,
2192 (rtt + (rtt >> 1) + TCP_DELACK_MAX)); 2188 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
@@ -2659,8 +2655,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2659 net_dbg_ratelimited("retrans_out leaked\n"); 2655 net_dbg_ratelimited("retrans_out leaked\n");
2660 } 2656 }
2661#endif 2657#endif
2662 if (!tp->retrans_out)
2663 tp->lost_retrans_low = tp->snd_nxt;
2664 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; 2658 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2665 tp->retrans_out += tcp_skb_pcount(skb); 2659 tp->retrans_out += tcp_skb_pcount(skb);
2666 2660
@@ -2668,10 +2662,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2668 if (!tp->retrans_stamp) 2662 if (!tp->retrans_stamp)
2669 tp->retrans_stamp = tcp_skb_timestamp(skb); 2663 tp->retrans_stamp = tcp_skb_timestamp(skb);
2670 2664
2671 /* snd_nxt is stored to detect loss of retransmitted segment,
2672 * see tcp_input.c tcp_sacktag_write_queue().
2673 */
2674 TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2675 } else if (err != -EBUSY) { 2665 } else if (err != -EBUSY) {
2676 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); 2666 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2677 } 2667 }
@@ -2949,20 +2939,22 @@ int tcp_send_synack(struct sock *sk)
2949 * Allocate one skb and build a SYNACK packet. 2939 * Allocate one skb and build a SYNACK packet.
2950 * @dst is consumed : Caller should not use it again. 2940 * @dst is consumed : Caller should not use it again.
2951 */ 2941 */
2952struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2942struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
2953 struct request_sock *req, 2943 struct request_sock *req,
2954 struct tcp_fastopen_cookie *foc) 2944 struct tcp_fastopen_cookie *foc,
2945 bool attach_req)
2955{ 2946{
2956 struct tcp_out_options opts;
2957 struct inet_request_sock *ireq = inet_rsk(req); 2947 struct inet_request_sock *ireq = inet_rsk(req);
2958 struct tcp_sock *tp = tcp_sk(sk); 2948 const struct tcp_sock *tp = tcp_sk(sk);
2959 struct tcphdr *th;
2960 struct sk_buff *skb;
2961 struct tcp_md5sig_key *md5 = NULL; 2949 struct tcp_md5sig_key *md5 = NULL;
2950 struct tcp_out_options opts;
2951 struct sk_buff *skb;
2962 int tcp_header_size; 2952 int tcp_header_size;
2953 struct tcphdr *th;
2954 u16 user_mss;
2963 int mss; 2955 int mss;
2964 2956
2965 skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); 2957 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2966 if (unlikely(!skb)) { 2958 if (unlikely(!skb)) {
2967 dst_release(dst); 2959 dst_release(dst);
2968 return NULL; 2960 return NULL;
@@ -2970,11 +2962,21 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2970 /* Reserve space for headers. */ 2962 /* Reserve space for headers. */
2971 skb_reserve(skb, MAX_TCP_HEADER); 2963 skb_reserve(skb, MAX_TCP_HEADER);
2972 2964
2965 if (attach_req) {
2966 skb_set_owner_w(skb, req_to_sk(req));
2967 } else {
2968 /* sk is a const pointer, because we want to express multiple
2969 * cpu might call us concurrently.
2970 * sk->sk_wmem_alloc in an atomic, we can promote to rw.
2971 */
2972 skb_set_owner_w(skb, (struct sock *)sk);
2973 }
2973 skb_dst_set(skb, dst); 2974 skb_dst_set(skb, dst);
2974 2975
2975 mss = dst_metric_advmss(dst); 2976 mss = dst_metric_advmss(dst);
2976 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2977 user_mss = READ_ONCE(tp->rx_opt.user_mss);
2977 mss = tp->rx_opt.user_mss; 2978 if (user_mss && user_mss < mss)
2979 mss = user_mss;
2978 2980
2979 memset(&opts, 0, sizeof(opts)); 2981 memset(&opts, 0, sizeof(opts));
2980#ifdef CONFIG_SYN_COOKIES 2982#ifdef CONFIG_SYN_COOKIES
@@ -2988,8 +2990,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2988 rcu_read_lock(); 2990 rcu_read_lock();
2989 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); 2991 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
2990#endif 2992#endif
2991 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, 2993 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
2992 foc) + sizeof(*th); 2994 tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
2995 sizeof(*th);
2993 2996
2994 skb_push(skb, tcp_header_size); 2997 skb_push(skb, tcp_header_size);
2995 skb_reset_transport_header(skb); 2998 skb_reset_transport_header(skb);
@@ -2998,7 +3001,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2998 memset(th, 0, sizeof(struct tcphdr)); 3001 memset(th, 0, sizeof(struct tcphdr));
2999 th->syn = 1; 3002 th->syn = 1;
3000 th->ack = 1; 3003 th->ack = 1;
3001 tcp_ecn_make_synack(req, th, sk); 3004 tcp_ecn_make_synack(req, th);
3002 th->source = htons(ireq->ir_num); 3005 th->source = htons(ireq->ir_num);
3003 th->dest = ireq->ir_rmt_port; 3006 th->dest = ireq->ir_rmt_port;
3004 /* Setting of flags are superfluous here for callers (and ECE is 3007 /* Setting of flags are superfluous here for callers (and ECE is
@@ -3012,8 +3015,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
3012 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); 3015 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
3013 3016
3014 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 3017 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
3015 th->window = htons(min(req->rcv_wnd, 65535U)); 3018 th->window = htons(min(req->rsk_rcv_wnd, 65535U));
3016 tcp_options_write((__be32 *)(th + 1), tp, &opts); 3019 tcp_options_write((__be32 *)(th + 1), NULL, &opts);
3017 th->doff = (tcp_header_size >> 2); 3020 th->doff = (tcp_header_size >> 2);
3018 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); 3021 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
3019 3022
@@ -3405,7 +3408,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
3405 */ 3408 */
3406 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); 3409 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3407 skb_mstamp_get(&skb->skb_mstamp); 3410 skb_mstamp_get(&skb->skb_mstamp);
3408 NET_INC_STATS_BH(sock_net(sk), mib); 3411 NET_INC_STATS(sock_net(sk), mib);
3409 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 3412 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3410} 3413}
3411 3414
@@ -3500,13 +3503,14 @@ void tcp_send_probe0(struct sock *sk)
3500 TCP_RTO_MAX); 3503 TCP_RTO_MAX);
3501} 3504}
3502 3505
3503int tcp_rtx_synack(struct sock *sk, struct request_sock *req) 3506int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3504{ 3507{
3505 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; 3508 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3506 struct flowi fl; 3509 struct flowi fl;
3507 int res; 3510 int res;
3508 3511
3509 res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); 3512 tcp_rsk(req)->txhash = net_tx_rndhash();
3513 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true);
3510 if (!res) { 3514 if (!res) {
3511 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 3515 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
3512 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); 3516 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
new file mode 100644
index 000000000000..5353085fd0b2
--- /dev/null
+++ b/net/ipv4/tcp_recovery.c
@@ -0,0 +1,109 @@
1#include <linux/tcp.h>
2#include <net/tcp.h>
3
4int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOST_RETRANS;
5
6/* Marks a packet lost, if some packet sent later has been (s)acked.
7 * The underlying idea is similar to the traditional dupthresh and FACK
8 * but they look at different metrics:
9 *
10 * dupthresh: 3 OOO packets delivered (packet count)
11 * FACK: sequence delta to highest sacked sequence (sequence space)
12 * RACK: sent time delta to the latest delivered packet (time domain)
13 *
14 * The advantage of RACK is it applies to both original and retransmitted
15 * packet and therefore is robust against tail losses. Another advantage
16 * is being more resilient to reordering by simply allowing some
17 * "settling delay", instead of tweaking the dupthresh.
18 *
19 * The current version is only used after recovery starts but can be
20 * easily extended to detect the first loss.
21 */
22int tcp_rack_mark_lost(struct sock *sk)
23{
24 struct tcp_sock *tp = tcp_sk(sk);
25 struct sk_buff *skb;
26 u32 reo_wnd, prior_retrans = tp->retrans_out;
27
28 if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced)
29 return 0;
30
31 /* Reset the advanced flag to avoid unnecessary queue scanning */
32 tp->rack.advanced = 0;
33
34 /* To be more reordering resilient, allow min_rtt/4 settling delay
35 * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed
36 * RTT because reordering is often a path property and less related
37 * to queuing or delayed ACKs.
38 *
39 * TODO: measure and adapt to the observed reordering delay, and
40 * use a timer to retransmit like the delayed early retransmit.
41 */
42 reo_wnd = 1000;
43 if (tp->rack.reord && tcp_min_rtt(tp) != ~0U)
44 reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
45
46 tcp_for_write_queue(skb, sk) {
47 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
48
49 if (skb == tcp_send_head(sk))
50 break;
51
52 /* Skip ones already (s)acked */
53 if (!after(scb->end_seq, tp->snd_una) ||
54 scb->sacked & TCPCB_SACKED_ACKED)
55 continue;
56
57 if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {
58
59 if (skb_mstamp_us_delta(&tp->rack.mstamp,
60 &skb->skb_mstamp) <= reo_wnd)
61 continue;
62
63 /* skb is lost if packet sent later is sacked */
64 tcp_skb_mark_lost_uncond_verify(tp, skb);
65 if (scb->sacked & TCPCB_SACKED_RETRANS) {
66 scb->sacked &= ~TCPCB_SACKED_RETRANS;
67 tp->retrans_out -= tcp_skb_pcount(skb);
68 NET_INC_STATS_BH(sock_net(sk),
69 LINUX_MIB_TCPLOSTRETRANSMIT);
70 }
71 } else if (!(scb->sacked & TCPCB_RETRANS)) {
72 /* Original data are sent sequentially so stop early
73 * b/c the rest are all sent after rack_sent
74 */
75 break;
76 }
77 }
78 return prior_retrans - tp->retrans_out;
79}
80
81/* Record the most recently (re)sent time among the (s)acked packets */
82void tcp_rack_advance(struct tcp_sock *tp,
83 const struct skb_mstamp *xmit_time, u8 sacked)
84{
85 if (tp->rack.mstamp.v64 &&
86 !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
87 return;
88
89 if (sacked & TCPCB_RETRANS) {
90 struct skb_mstamp now;
91
92 /* If the sacked packet was retransmitted, it's ambiguous
93 * whether the retransmission or the original (or the prior
94 * retransmission) was sacked.
95 *
96 * If the original is lost, there is no ambiguity. Otherwise
97 * we assume the original can be delayed up to aRTT + min_rtt.
98 * the aRTT term is bounded by the fast recovery or timeout,
99 * so it's at least one RTT (i.e., retransmission is at least
100 * an RTT later).
101 */
102 skb_mstamp_get(&now);
103 if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp))
104 return;
105 }
106
107 tp->rack.mstamp = *xmit_time;
108 tp->rack.advanced = 1;
109}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7149ebc820c7..c9c716a483e4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -83,7 +83,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
83} 83}
84 84
85/* Calculate maximal number or retries on an orphaned socket. */ 85/* Calculate maximal number or retries on an orphaned socket. */
86static int tcp_orphan_retries(struct sock *sk, int alive) 86static int tcp_orphan_retries(struct sock *sk, bool alive)
87{ 87{
88 int retries = sysctl_tcp_orphan_retries; /* May be zero. */ 88 int retries = sysctl_tcp_orphan_retries; /* May be zero. */
89 89
@@ -184,7 +184,7 @@ static int tcp_write_timeout(struct sock *sk)
184 184
185 retry_until = sysctl_tcp_retries2; 185 retry_until = sysctl_tcp_retries2;
186 if (sock_flag(sk, SOCK_DEAD)) { 186 if (sock_flag(sk, SOCK_DEAD)) {
187 const int alive = icsk->icsk_rto < TCP_RTO_MAX; 187 const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
188 188
189 retry_until = tcp_orphan_retries(sk, alive); 189 retry_until = tcp_orphan_retries(sk, alive);
190 do_reset = alive || 190 do_reset = alive ||
@@ -298,7 +298,7 @@ static void tcp_probe_timer(struct sock *sk)
298 298
299 max_probes = sysctl_tcp_retries2; 299 max_probes = sysctl_tcp_retries2;
300 if (sock_flag(sk, SOCK_DEAD)) { 300 if (sock_flag(sk, SOCK_DEAD)) {
301 const int alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; 301 const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
302 302
303 max_probes = tcp_orphan_retries(sk, alive); 303 max_probes = tcp_orphan_retries(sk, alive);
304 if (!alive && icsk->icsk_backoff >= max_probes) 304 if (!alive && icsk->icsk_backoff >= max_probes)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f7d1d5e19e95..24ec14f9825c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -375,7 +375,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
375 return -1; 375 return -1;
376 score += 4; 376 score += 4;
377 } 377 }
378 378 if (sk->sk_incoming_cpu == raw_smp_processor_id())
379 score++;
379 return score; 380 return score;
380} 381}
381 382
@@ -419,6 +420,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
419 score += 4; 420 score += 4;
420 } 421 }
421 422
423 if (sk->sk_incoming_cpu == raw_smp_processor_id())
424 score++;
425
422 return score; 426 return score;
423} 427}
424 428
@@ -1017,30 +1021,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1017 1021
1018 fl4 = &fl4_stack; 1022 fl4 = &fl4_stack;
1019 1023
1020 /* unconnected socket. If output device is enslaved to a VRF
1021 * device lookup source address from VRF table. This mimics
1022 * behavior of ip_route_connect{_init}.
1023 */
1024 if (netif_index_is_vrf(net, ipc.oif)) {
1025 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
1026 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1027 (flow_flags | FLOWI_FLAG_VRFSRC |
1028 FLOWI_FLAG_SKIP_NH_OIF),
1029 faddr, saddr, dport,
1030 inet->inet_sport);
1031
1032 rt = ip_route_output_flow(net, fl4, sk);
1033 if (!IS_ERR(rt)) {
1034 saddr = fl4->saddr;
1035 ip_rt_put(rt);
1036 }
1037 }
1038
1039 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, 1024 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
1040 RT_SCOPE_UNIVERSE, sk->sk_protocol, 1025 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1041 flow_flags, 1026 flow_flags,
1042 faddr, saddr, dport, inet->inet_sport); 1027 faddr, saddr, dport, inet->inet_sport);
1043 1028
1029 if (!saddr && ipc.oif)
1030 l3mdev_get_saddr(net, ipc.oif, fl4);
1031
1044 security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); 1032 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
1045 rt = ip_route_output_flow(net, fl4, sk); 1033 rt = ip_route_output_flow(net, fl4, sk);
1046 if (IS_ERR(rt)) { 1034 if (IS_ERR(rt)) {
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 60b032f58ccc..62e1e72db461 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -22,7 +22,8 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
22 return xfrm4_extract_header(skb); 22 return xfrm4_extract_header(skb);
23} 23}
24 24
25static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb) 25static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
26 struct sk_buff *skb)
26{ 27{
27 if (!skb_dst(skb)) { 28 if (!skb_dst(skb)) {
28 const struct iphdr *iph = ip_hdr(skb); 29 const struct iphdr *iph = ip_hdr(skb);
@@ -52,8 +53,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
52 iph->tot_len = htons(skb->len); 53 iph->tot_len = htons(skb->len);
53 ip_send_check(iph); 54 ip_send_check(iph);
54 55
55 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, 56 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
56 skb->dev, NULL, 57 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
57 xfrm4_rcv_encap_finish); 58 xfrm4_rcv_encap_finish);
58 return 0; 59 return 0;
59} 60}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 2878dbfffeb7..7ee6518afa86 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,6 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
30 30
31 mtu = dst_mtu(skb_dst(skb)); 31 mtu = dst_mtu(skb_dst(skb));
32 if (skb->len > mtu) { 32 if (skb->len > mtu) {
33 skb->protocol = htons(ETH_P_IP);
34
33 if (skb->sk) 35 if (skb->sk)
34 xfrm_local_error(skb, mtu); 36 xfrm_local_error(skb, mtu);
35 else 37 else
@@ -80,24 +82,25 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
80 return xfrm_output(sk, skb); 82 return xfrm_output(sk, skb);
81} 83}
82 84
83static int __xfrm4_output(struct sock *sk, struct sk_buff *skb) 85static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
84{ 86{
85 struct xfrm_state *x = skb_dst(skb)->xfrm; 87 struct xfrm_state *x = skb_dst(skb)->xfrm;
86 88
87#ifdef CONFIG_NETFILTER 89#ifdef CONFIG_NETFILTER
88 if (!x) { 90 if (!x) {
89 IPCB(skb)->flags |= IPSKB_REROUTED; 91 IPCB(skb)->flags |= IPSKB_REROUTED;
90 return dst_output_sk(sk, skb); 92 return dst_output(net, sk, skb);
91 } 93 }
92#endif 94#endif
93 95
94 return x->outer_mode->afinfo->output_finish(sk, skb); 96 return x->outer_mode->afinfo->output_finish(sk, skb);
95} 97}
96 98
97int xfrm4_output(struct sock *sk, struct sk_buff *skb) 99int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
98{ 100{
99 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, 101 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
100 NULL, skb_dst(skb)->dev, __xfrm4_output, 102 net, sk, skb, NULL, skb_dst(skb)->dev,
103 __xfrm4_output,
101 !(IPCB(skb)->flags & IPSKB_REROUTED)); 104 !(IPCB(skb)->flags & IPSKB_REROUTED));
102} 105}
103 106
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c10a9ee68433..1e0c3c835a63 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,7 +15,7 @@
15#include <net/dst.h> 15#include <net/dst.h>
16#include <net/xfrm.h> 16#include <net/xfrm.h>
17#include <net/ip.h> 17#include <net/ip.h>
18#include <net/vrf.h> 18#include <net/l3mdev.h>
19 19
20static struct xfrm_policy_afinfo xfrm4_policy_afinfo; 20static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
21 21
@@ -97,6 +97,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
97 xdst->u.rt.rt_gateway = rt->rt_gateway; 97 xdst->u.rt.rt_gateway = rt->rt_gateway;
98 xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; 98 xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
99 xdst->u.rt.rt_pmtu = rt->rt_pmtu; 99 xdst->u.rt.rt_pmtu = rt->rt_pmtu;
100 xdst->u.rt.rt_table_id = rt->rt_table_id;
100 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); 101 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
101 102
102 return 0; 103 return 0;
@@ -110,10 +111,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
110 struct flowi4 *fl4 = &fl->u.ip4; 111 struct flowi4 *fl4 = &fl->u.ip4;
111 int oif = 0; 112 int oif = 0;
112 113
113 if (skb_dst(skb)) { 114 if (skb_dst(skb))
114 oif = vrf_master_ifindex(skb_dst(skb)->dev) ? 115 oif = l3mdev_fib_oif(skb_dst(skb)->dev);
115 : skb_dst(skb)->dev->ifindex;
116 }
117 116
118 memset(fl4, 0, sizeof(struct flowi4)); 117 memset(fl4, 0, sizeof(struct flowi4));
119 fl4->flowi4_mark = skb->mark; 118 fl4->flowi4_mark = skb->mark;
@@ -128,7 +127,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
128 case IPPROTO_DCCP: 127 case IPPROTO_DCCP:
129 if (xprth + 4 < skb->data || 128 if (xprth + 4 < skb->data ||
130 pskb_may_pull(skb, xprth + 4 - skb->data)) { 129 pskb_may_pull(skb, xprth + 4 - skb->data)) {
131 __be16 *ports = (__be16 *)xprth; 130 __be16 *ports;
131
132 xprth = skb_network_header(skb) + iph->ihl * 4;
133 ports = (__be16 *)xprth;
132 134
133 fl4->fl4_sport = ports[!!reverse]; 135 fl4->fl4_sport = ports[!!reverse];
134 fl4->fl4_dport = ports[!reverse]; 136 fl4->fl4_dport = ports[!reverse];
@@ -136,8 +138,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
136 break; 138 break;
137 139
138 case IPPROTO_ICMP: 140 case IPPROTO_ICMP:
139 if (pskb_may_pull(skb, xprth + 2 - skb->data)) { 141 if (xprth + 2 < skb->data ||
140 u8 *icmp = xprth; 142 pskb_may_pull(skb, xprth + 2 - skb->data)) {
143 u8 *icmp;
144
145 xprth = skb_network_header(skb) + iph->ihl * 4;
146 icmp = xprth;
141 147
142 fl4->fl4_icmp_type = icmp[0]; 148 fl4->fl4_icmp_type = icmp[0];
143 fl4->fl4_icmp_code = icmp[1]; 149 fl4->fl4_icmp_code = icmp[1];
@@ -145,33 +151,50 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
145 break; 151 break;
146 152
147 case IPPROTO_ESP: 153 case IPPROTO_ESP:
148 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 154 if (xprth + 4 < skb->data ||
149 __be32 *ehdr = (__be32 *)xprth; 155 pskb_may_pull(skb, xprth + 4 - skb->data)) {
156 __be32 *ehdr;
157
158 xprth = skb_network_header(skb) + iph->ihl * 4;
159 ehdr = (__be32 *)xprth;
150 160
151 fl4->fl4_ipsec_spi = ehdr[0]; 161 fl4->fl4_ipsec_spi = ehdr[0];
152 } 162 }
153 break; 163 break;
154 164
155 case IPPROTO_AH: 165 case IPPROTO_AH:
156 if (pskb_may_pull(skb, xprth + 8 - skb->data)) { 166 if (xprth + 8 < skb->data ||
157 __be32 *ah_hdr = (__be32 *)xprth; 167 pskb_may_pull(skb, xprth + 8 - skb->data)) {
168 __be32 *ah_hdr;
169
170 xprth = skb_network_header(skb) + iph->ihl * 4;
171 ah_hdr = (__be32 *)xprth;
158 172
159 fl4->fl4_ipsec_spi = ah_hdr[1]; 173 fl4->fl4_ipsec_spi = ah_hdr[1];
160 } 174 }
161 break; 175 break;
162 176
163 case IPPROTO_COMP: 177 case IPPROTO_COMP:
164 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 178 if (xprth + 4 < skb->data ||
165 __be16 *ipcomp_hdr = (__be16 *)xprth; 179 pskb_may_pull(skb, xprth + 4 - skb->data)) {
180 __be16 *ipcomp_hdr;
181
182 xprth = skb_network_header(skb) + iph->ihl * 4;
183 ipcomp_hdr = (__be16 *)xprth;
166 184
167 fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); 185 fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
168 } 186 }
169 break; 187 break;
170 188
171 case IPPROTO_GRE: 189 case IPPROTO_GRE:
172 if (pskb_may_pull(skb, xprth + 12 - skb->data)) { 190 if (xprth + 12 < skb->data ||
173 __be16 *greflags = (__be16 *)xprth; 191 pskb_may_pull(skb, xprth + 12 - skb->data)) {
174 __be32 *gre_hdr = (__be32 *)xprth; 192 __be16 *greflags;
193 __be32 *gre_hdr;
194
195 xprth = skb_network_header(skb) + iph->ihl * 4;
196 greflags = (__be16 *)xprth;
197 gre_hdr = (__be32 *)xprth;
175 198
176 if (greflags[0] & GRE_KEY) { 199 if (greflags[0] & GRE_KEY) {
177 if (greflags[0] & GRE_CSUM) 200 if (greflags[0] & GRE_CSUM)
@@ -245,7 +268,7 @@ static struct dst_ops xfrm4_dst_ops = {
245 .destroy = xfrm4_dst_destroy, 268 .destroy = xfrm4_dst_destroy,
246 .ifdown = xfrm4_dst_ifdown, 269 .ifdown = xfrm4_dst_ifdown,
247 .local_out = __ip_local_out, 270 .local_out = __ip_local_out,
248 .gc_thresh = 32768, 271 .gc_thresh = INT_MAX,
249}; 272};
250 273
251static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { 274static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 900113376d4e..d84742f003a9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
81#include <net/ip.h> 81#include <net/ip.h>
82#include <net/netlink.h> 82#include <net/netlink.h>
83#include <net/pkt_sched.h> 83#include <net/pkt_sched.h>
84#include <net/l3mdev.h>
84#include <linux/if_tunnel.h> 85#include <linux/if_tunnel.h>
85#include <linux/rtnetlink.h> 86#include <linux/rtnetlink.h>
86#include <linux/netconf.h> 87#include <linux/netconf.h>
@@ -417,6 +418,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
417 if (err) { 418 if (err) {
418 ipv6_mc_destroy_dev(ndev); 419 ipv6_mc_destroy_dev(ndev);
419 del_timer(&ndev->regen_timer); 420 del_timer(&ndev->regen_timer);
421 snmp6_unregister_dev(ndev);
420 goto err_release; 422 goto err_release;
421 } 423 }
422 /* protected by rtnl_lock */ 424 /* protected by rtnl_lock */
@@ -2146,7 +2148,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
2146 unsigned long expires, u32 flags) 2148 unsigned long expires, u32 flags)
2147{ 2149{
2148 struct fib6_config cfg = { 2150 struct fib6_config cfg = {
2149 .fc_table = RT6_TABLE_PREFIX, 2151 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
2150 .fc_metric = IP6_RT_PRIO_ADDRCONF, 2152 .fc_metric = IP6_RT_PRIO_ADDRCONF,
2151 .fc_ifindex = dev->ifindex, 2153 .fc_ifindex = dev->ifindex,
2152 .fc_expires = expires, 2154 .fc_expires = expires,
@@ -2179,8 +2181,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
2179 struct fib6_node *fn; 2181 struct fib6_node *fn;
2180 struct rt6_info *rt = NULL; 2182 struct rt6_info *rt = NULL;
2181 struct fib6_table *table; 2183 struct fib6_table *table;
2184 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
2182 2185
2183 table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); 2186 table = fib6_get_table(dev_net(dev), tb_id);
2184 if (!table) 2187 if (!table)
2185 return NULL; 2188 return NULL;
2186 2189
@@ -2211,7 +2214,7 @@ out:
2211static void addrconf_add_mroute(struct net_device *dev) 2214static void addrconf_add_mroute(struct net_device *dev)
2212{ 2215{
2213 struct fib6_config cfg = { 2216 struct fib6_config cfg = {
2214 .fc_table = RT6_TABLE_LOCAL, 2217 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
2215 .fc_metric = IP6_RT_PRIO_ADDRCONF, 2218 .fc_metric = IP6_RT_PRIO_ADDRCONF,
2216 .fc_ifindex = dev->ifindex, 2219 .fc_ifindex = dev->ifindex,
2217 .fc_dst_len = 8, 2220 .fc_dst_len = 8,
@@ -3029,6 +3032,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
3029{ 3032{
3030 struct in6_addr addr; 3033 struct in6_addr addr;
3031 3034
3035 /* no link local addresses on L3 master devices */
3036 if (netif_is_l3_master(idev->dev))
3037 return;
3038
3032 ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); 3039 ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
3033 3040
3034 if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { 3041 if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
@@ -3119,6 +3126,8 @@ static void addrconf_gre_config(struct net_device *dev)
3119 } 3126 }
3120 3127
3121 addrconf_addr_gen(idev, true); 3128 addrconf_addr_gen(idev, true);
3129 if (dev->flags & IFF_POINTOPOINT)
3130 addrconf_add_mroute(dev);
3122} 3131}
3123#endif 3132#endif
3124 3133
@@ -3139,6 +3148,32 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3139 } 3148 }
3140 break; 3149 break;
3141 3150
3151 case NETDEV_CHANGEMTU:
3152 /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
3153 if (dev->mtu < IPV6_MIN_MTU) {
3154 addrconf_ifdown(dev, 1);
3155 break;
3156 }
3157
3158 if (idev) {
3159 rt6_mtu_change(dev, dev->mtu);
3160 idev->cnf.mtu6 = dev->mtu;
3161 break;
3162 }
3163
3164 /* allocate new idev */
3165 idev = ipv6_add_dev(dev);
3166 if (IS_ERR(idev))
3167 break;
3168
3169 /* device is still not ready */
3170 if (!(idev->if_flags & IF_READY))
3171 break;
3172
3173 run_pending = 1;
3174
3175 /* fall through */
3176
3142 case NETDEV_UP: 3177 case NETDEV_UP:
3143 case NETDEV_CHANGE: 3178 case NETDEV_CHANGE:
3144 if (dev->flags & IFF_SLAVE) 3179 if (dev->flags & IFF_SLAVE)
@@ -3162,7 +3197,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3162 idev->if_flags |= IF_READY; 3197 idev->if_flags |= IF_READY;
3163 run_pending = 1; 3198 run_pending = 1;
3164 } 3199 }
3165 } else { 3200 } else if (event == NETDEV_CHANGE) {
3166 if (!addrconf_qdisc_ok(dev)) { 3201 if (!addrconf_qdisc_ok(dev)) {
3167 /* device is still not ready. */ 3202 /* device is still not ready. */
3168 break; 3203 break;
@@ -3227,24 +3262,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3227 } 3262 }
3228 break; 3263 break;
3229 3264
3230 case NETDEV_CHANGEMTU:
3231 if (idev && dev->mtu >= IPV6_MIN_MTU) {
3232 rt6_mtu_change(dev, dev->mtu);
3233 idev->cnf.mtu6 = dev->mtu;
3234 break;
3235 }
3236
3237 if (!idev && dev->mtu >= IPV6_MIN_MTU) {
3238 idev = ipv6_add_dev(dev);
3239 if (!IS_ERR(idev))
3240 break;
3241 }
3242
3243 /*
3244 * if MTU under IPV6_MIN_MTU.
3245 * Stop IPv6 on this interface.
3246 */
3247
3248 case NETDEV_DOWN: 3265 case NETDEV_DOWN:
3249 case NETDEV_UNREGISTER: 3266 case NETDEV_UNREGISTER:
3250 /* 3267 /*
@@ -3625,7 +3642,7 @@ static void addrconf_dad_work(struct work_struct *w)
3625 3642
3626 /* send a neighbour solicitation for our addr */ 3643 /* send a neighbour solicitation for our addr */
3627 addrconf_addr_solict_mult(&ifp->addr, &mcaddr); 3644 addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
3628 ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL); 3645 ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL);
3629out: 3646out:
3630 in6_ifa_put(ifp); 3647 in6_ifa_put(ifp);
3631 rtnl_unlock(); 3648 rtnl_unlock();
@@ -4729,7 +4746,8 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
4729 } 4746 }
4730} 4747}
4731 4748
4732static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) 4749static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
4750 u32 ext_filter_mask)
4733{ 4751{
4734 struct nlattr *nla; 4752 struct nlattr *nla;
4735 struct ifla_cacheinfo ci; 4753 struct ifla_cacheinfo ci;
@@ -4749,6 +4767,9 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
4749 4767
4750 /* XXX - MC not implemented */ 4768 /* XXX - MC not implemented */
4751 4769
4770 if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
4771 return 0;
4772
4752 nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); 4773 nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
4753 if (!nla) 4774 if (!nla)
4754 goto nla_put_failure; 4775 goto nla_put_failure;
@@ -4776,7 +4797,8 @@ nla_put_failure:
4776 return -EMSGSIZE; 4797 return -EMSGSIZE;
4777} 4798}
4778 4799
4779static size_t inet6_get_link_af_size(const struct net_device *dev) 4800static size_t inet6_get_link_af_size(const struct net_device *dev,
4801 u32 ext_filter_mask)
4780{ 4802{
4781 if (!__in6_dev_get(dev)) 4803 if (!__in6_dev_get(dev))
4782 return 0; 4804 return 0;
@@ -4784,14 +4806,15 @@ static size_t inet6_get_link_af_size(const struct net_device *dev)
4784 return inet6_ifla6_size(); 4806 return inet6_ifla6_size();
4785} 4807}
4786 4808
4787static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 4809static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
4810 u32 ext_filter_mask)
4788{ 4811{
4789 struct inet6_dev *idev = __in6_dev_get(dev); 4812 struct inet6_dev *idev = __in6_dev_get(dev);
4790 4813
4791 if (!idev) 4814 if (!idev)
4792 return -ENODATA; 4815 return -ENODATA;
4793 4816
4794 if (inet6_fill_ifla6_attrs(skb, idev) < 0) 4817 if (inet6_fill_ifla6_attrs(skb, idev, ext_filter_mask) < 0)
4795 return -EMSGSIZE; 4818 return -EMSGSIZE;
4796 4819
4797 return 0; 4820 return 0;
@@ -4946,7 +4969,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
4946 if (!protoinfo) 4969 if (!protoinfo)
4947 goto nla_put_failure; 4970 goto nla_put_failure;
4948 4971
4949 if (inet6_fill_ifla6_attrs(skb, idev) < 0) 4972 if (inet6_fill_ifla6_attrs(skb, idev, 0) < 0)
4950 goto nla_put_failure; 4973 goto nla_put_failure;
4951 4974
4952 nla_nest_end(skb, protoinfo); 4975 nla_nest_end(skb, protoinfo);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9aadd57808a5..d70b0238f468 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -263,7 +263,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
263 263
264void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) 264void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
265{ 265{
266 struct ipv6_pinfo *np = inet6_sk(sk); 266 const struct ipv6_pinfo *np = inet6_sk(sk);
267 struct sock_exterr_skb *serr; 267 struct sock_exterr_skb *serr;
268 struct ipv6hdr *iph; 268 struct ipv6hdr *iph;
269 struct sk_buff *skb; 269 struct sk_buff *skb;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 9f777ec59a59..ed33abf57abd 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -32,6 +32,7 @@ struct fib6_rule {
32struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, 32struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
33 int flags, pol_lookup_t lookup) 33 int flags, pol_lookup_t lookup)
34{ 34{
35 struct rt6_info *rt;
35 struct fib_lookup_arg arg = { 36 struct fib_lookup_arg arg = {
36 .lookup_ptr = lookup, 37 .lookup_ptr = lookup,
37 .flags = FIB_LOOKUP_NOREF, 38 .flags = FIB_LOOKUP_NOREF,
@@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
40 fib_rules_lookup(net->ipv6.fib6_rules_ops, 41 fib_rules_lookup(net->ipv6.fib6_rules_ops,
41 flowi6_to_flowi(fl6), flags, &arg); 42 flowi6_to_flowi(fl6), flags, &arg);
42 43
43 if (arg.result) 44 rt = arg.result;
44 return arg.result;
45 45
46 dst_hold(&net->ipv6.ip6_null_entry->dst); 46 if (!rt) {
47 return &net->ipv6.ip6_null_entry->dst; 47 dst_hold(&net->ipv6.ip6_null_entry->dst);
48 return &net->ipv6.ip6_null_entry->dst;
49 }
50
51 if (rt->rt6i_flags & RTF_REJECT &&
52 rt->dst.error == -EAGAIN) {
53 ip6_rt_put(rt);
54 rt = net->ipv6.ip6_null_entry;
55 dst_hold(&rt->dst);
56 }
57
58 return &rt->dst;
48} 59}
49 60
50static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, 61static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6c2b2132c8d3..36c5a98b0472 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
68#include <net/xfrm.h> 68#include <net/xfrm.h>
69#include <net/inet_common.h> 69#include <net/inet_common.h>
70#include <net/dsfield.h> 70#include <net/dsfield.h>
71#include <net/l3mdev.h>
71 72
72#include <asm/uaccess.h> 73#include <asm/uaccess.h>
73 74
@@ -452,7 +453,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
452 * and anycast addresses will be checked later. 453 * and anycast addresses will be checked later.
453 */ 454 */
454 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { 455 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
455 net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n"); 456 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
457 &hdr->saddr, &hdr->daddr);
456 return; 458 return;
457 } 459 }
458 460
@@ -460,7 +462,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
460 * Never answer to a ICMP packet. 462 * Never answer to a ICMP packet.
461 */ 463 */
462 if (is_ineligible(skb)) { 464 if (is_ineligible(skb)) {
463 net_dbg_ratelimited("icmp6_send: no reply to icmp error\n"); 465 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
466 &hdr->saddr, &hdr->daddr);
464 return; 467 return;
465 } 468 }
466 469
@@ -496,6 +499,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
496 else if (!fl6.flowi6_oif) 499 else if (!fl6.flowi6_oif)
497 fl6.flowi6_oif = np->ucast_oif; 500 fl6.flowi6_oif = np->ucast_oif;
498 501
502 if (!fl6.flowi6_oif)
503 fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
504
499 dst = icmpv6_route_lookup(net, skb, sk, &fl6); 505 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
500 if (IS_ERR(dst)) 506 if (IS_ERR(dst))
501 goto out; 507 goto out;
@@ -509,7 +515,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
509 len = skb->len - msg.offset; 515 len = skb->len - msg.offset;
510 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); 516 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
511 if (len < 0) { 517 if (len < 0) {
512 net_dbg_ratelimited("icmp: len problem\n"); 518 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
519 &hdr->saddr, &hdr->daddr);
513 goto out_dst_release; 520 goto out_dst_release;
514 } 521 }
515 522
@@ -575,7 +582,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
575 fl6.daddr = ipv6_hdr(skb)->saddr; 582 fl6.daddr = ipv6_hdr(skb)->saddr;
576 if (saddr) 583 if (saddr)
577 fl6.saddr = *saddr; 584 fl6.saddr = *saddr;
578 fl6.flowi6_oif = skb->dev->ifindex; 585 fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
579 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; 586 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
580 fl6.flowi6_mark = mark; 587 fl6.flowi6_mark = mark;
581 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 588 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -781,7 +788,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
781 if (type & ICMPV6_INFOMSG_MASK) 788 if (type & ICMPV6_INFOMSG_MASK)
782 break; 789 break;
783 790
784 net_dbg_ratelimited("icmpv6: msg of unknown type\n"); 791 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
792 saddr, daddr);
785 793
786 /* 794 /*
787 * error of unknown type. 795 * error of unknown type.
diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c
index 678d2df4b8d9..1a6852e1ac69 100644
--- a/net/ipv6/ila.c
+++ b/net/ipv6/ila.c
@@ -91,7 +91,7 @@ static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
91 *(__be64 *)&ip6h->daddr = p->locator; 91 *(__be64 *)&ip6h->daddr = p->locator;
92} 92}
93 93
94static int ila_output(struct sock *sk, struct sk_buff *skb) 94static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
95{ 95{
96 struct dst_entry *dst = skb_dst(skb); 96 struct dst_entry *dst = skb_dst(skb);
97 97
@@ -100,7 +100,7 @@ static int ila_output(struct sock *sk, struct sk_buff *skb)
100 100
101 update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); 101 update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
102 102
103 return dst->lwtstate->orig_output(sk, skb); 103 return dst->lwtstate->orig_output(net, sk, skb);
104 104
105drop: 105drop:
106 kfree_skb(skb); 106 kfree_skb(skb);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 6927f3fb5597..5d1c7cee2cb2 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -65,17 +65,18 @@ int inet6_csk_bind_conflict(const struct sock *sk,
65} 65}
66EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); 66EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
67 67
68struct dst_entry *inet6_csk_route_req(struct sock *sk, 68struct dst_entry *inet6_csk_route_req(const struct sock *sk,
69 struct flowi6 *fl6, 69 struct flowi6 *fl6,
70 const struct request_sock *req) 70 const struct request_sock *req,
71 u8 proto)
71{ 72{
72 struct inet_request_sock *ireq = inet_rsk(req); 73 struct inet_request_sock *ireq = inet_rsk(req);
73 struct ipv6_pinfo *np = inet6_sk(sk); 74 const struct ipv6_pinfo *np = inet6_sk(sk);
74 struct in6_addr *final_p, final; 75 struct in6_addr *final_p, final;
75 struct dst_entry *dst; 76 struct dst_entry *dst;
76 77
77 memset(fl6, 0, sizeof(*fl6)); 78 memset(fl6, 0, sizeof(*fl6));
78 fl6->flowi6_proto = IPPROTO_TCP; 79 fl6->flowi6_proto = proto;
79 fl6->daddr = ireq->ir_v6_rmt_addr; 80 fl6->daddr = ireq->ir_v6_rmt_addr;
80 final_p = fl6_update_dst(fl6, np->opt, &final); 81 final_p = fl6_update_dst(fl6, np->opt, &final);
81 fl6->saddr = ireq->ir_v6_loc_addr; 82 fl6->saddr = ireq->ir_v6_loc_addr;
@@ -91,73 +92,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
91 92
92 return dst; 93 return dst;
93} 94}
94 95EXPORT_SYMBOL(inet6_csk_route_req);
95/*
96 * request_sock (formerly open request) hash tables.
97 */
98static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
99 const u32 rnd, const u32 synq_hsize)
100{
101 u32 c;
102
103 c = jhash_3words((__force u32)raddr->s6_addr32[0],
104 (__force u32)raddr->s6_addr32[1],
105 (__force u32)raddr->s6_addr32[2],
106 rnd);
107
108 c = jhash_2words((__force u32)raddr->s6_addr32[3],
109 (__force u32)rport,
110 c);
111
112 return c & (synq_hsize - 1);
113}
114
115struct request_sock *inet6_csk_search_req(struct sock *sk,
116 const __be16 rport,
117 const struct in6_addr *raddr,
118 const struct in6_addr *laddr,
119 const int iif)
120{
121 struct inet_connection_sock *icsk = inet_csk(sk);
122 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
123 struct request_sock *req;
124 u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd,
125 lopt->nr_table_entries);
126
127 spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
128 for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
129 const struct inet_request_sock *ireq = inet_rsk(req);
130
131 if (ireq->ir_rmt_port == rport &&
132 req->rsk_ops->family == AF_INET6 &&
133 ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
134 ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
135 (!ireq->ir_iif || ireq->ir_iif == iif)) {
136 atomic_inc(&req->rsk_refcnt);
137 WARN_ON(req->sk != NULL);
138 break;
139 }
140 }
141 spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
142
143 return req;
144}
145EXPORT_SYMBOL_GPL(inet6_csk_search_req);
146
147void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
148 struct request_sock *req,
149 const unsigned long timeout)
150{
151 struct inet_connection_sock *icsk = inet_csk(sk);
152 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
153 const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
154 inet_rsk(req)->ir_rmt_port,
155 lopt->hash_rnd, lopt->nr_table_entries);
156
157 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
158 inet_csk_reqsk_queue_added(sk, timeout);
159}
160EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
161 96
162void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 97void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
163{ 98{
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 6ac8dad0138a..21ace5a2bf7c 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -114,6 +114,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
114 return -1; 114 return -1;
115 score++; 115 score++;
116 } 116 }
117 if (sk->sk_incoming_cpu == raw_smp_processor_id())
118 score++;
117 } 119 }
118 return score; 120 return score;
119} 121}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7d2e0023c72d..0c7e276c230e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -264,6 +264,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
264 264
265 return NULL; 265 return NULL;
266} 266}
267EXPORT_SYMBOL_GPL(fib6_get_table);
267 268
268static void __net_init fib6_tables_init(struct net *net) 269static void __net_init fib6_tables_init(struct net *net)
269{ 270{
@@ -285,7 +286,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
285struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, 286struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
286 int flags, pol_lookup_t lookup) 287 int flags, pol_lookup_t lookup)
287{ 288{
288 return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); 289 struct rt6_info *rt;
290
291 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
292 if (rt->rt6i_flags & RTF_REJECT &&
293 rt->dst.error == -EAGAIN) {
294 ip6_rt_put(rt);
295 rt = net->ipv6.ip6_null_entry;
296 dst_hold(&rt->dst);
297 }
298
299 return &rt->dst;
289} 300}
290 301
291static void __net_init fib6_tables_init(struct net *net) 302static void __net_init fib6_tables_init(struct net *net)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index adba03ac7ce9..9075acf081dd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,7 +47,7 @@
47#include <net/inet_ecn.h> 47#include <net/inet_ecn.h>
48#include <net/dst_metadata.h> 48#include <net/dst_metadata.h>
49 49
50int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb) 50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
51{ 51{
52 if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { 52 if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
53 const struct inet6_protocol *ipprot; 53 const struct inet6_protocol *ipprot;
@@ -109,7 +109,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
109 if (hdr->version != 6) 109 if (hdr->version != 6)
110 goto err; 110 goto err;
111 111
112 IP6_ADD_STATS_BH(dev_net(dev), idev, 112 IP6_ADD_STATS_BH(net, idev,
113 IPSTATS_MIB_NOECTPKTS + 113 IPSTATS_MIB_NOECTPKTS +
114 (ipv6_get_dsfield(hdr) & INET_ECN_MASK), 114 (ipv6_get_dsfield(hdr) & INET_ECN_MASK),
115 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); 115 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
@@ -183,8 +183,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
183 /* Must drop socket now because of tproxy. */ 183 /* Must drop socket now because of tproxy. */
184 skb_orphan(skb); 184 skb_orphan(skb);
185 185
186 return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, 186 return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
187 dev, NULL, 187 net, NULL, skb, dev, NULL,
188 ip6_rcv_finish); 188 ip6_rcv_finish);
189err: 189err:
190 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); 190 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -199,9 +199,8 @@ drop:
199 */ 199 */
200 200
201 201
202static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) 202static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
203{ 203{
204 struct net *net = dev_net(skb_dst(skb)->dev);
205 const struct inet6_protocol *ipprot; 204 const struct inet6_protocol *ipprot;
206 struct inet6_dev *idev; 205 struct inet6_dev *idev;
207 unsigned int nhoff; 206 unsigned int nhoff;
@@ -278,8 +277,8 @@ discard:
278 277
279int ip6_input(struct sk_buff *skb) 278int ip6_input(struct sk_buff *skb)
280{ 279{
281 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb, 280 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
282 skb->dev, NULL, 281 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
283 ip6_input_finish); 282 ip6_input_finish);
284} 283}
285 284
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 08b62047c67f..eeca943f12dc 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -264,6 +264,9 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
264 struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); 264 struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
265 int err = -ENOSYS; 265 int err = -ENOSYS;
266 266
267 if (skb->encapsulation)
268 skb_set_inner_network_header(skb, nhoff);
269
267 iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); 270 iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
268 271
269 rcu_read_lock(); 272 rcu_read_lock();
@@ -280,6 +283,13 @@ out_unlock:
280 return err; 283 return err;
281} 284}
282 285
286static int sit_gro_complete(struct sk_buff *skb, int nhoff)
287{
288 skb->encapsulation = 1;
289 skb_shinfo(skb)->gso_type |= SKB_GSO_SIT;
290 return ipv6_gro_complete(skb, nhoff);
291}
292
283static struct packet_offload ipv6_packet_offload __read_mostly = { 293static struct packet_offload ipv6_packet_offload __read_mostly = {
284 .type = cpu_to_be16(ETH_P_IPV6), 294 .type = cpu_to_be16(ETH_P_IPV6),
285 .callbacks = { 295 .callbacks = {
@@ -292,6 +302,8 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
292static const struct net_offload sit_offload = { 302static const struct net_offload sit_offload = {
293 .callbacks = { 303 .callbacks = {
294 .gso_segment = ipv6_gso_segment, 304 .gso_segment = ipv6_gso_segment,
305 .gro_receive = ipv6_gro_receive,
306 .gro_complete = sit_gro_complete,
295 }, 307 },
296}; 308};
297 309
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 92b1aa38f121..e6a7bd15b9b7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -55,8 +55,9 @@
55#include <net/xfrm.h> 55#include <net/xfrm.h>
56#include <net/checksum.h> 56#include <net/checksum.h>
57#include <linux/mroute6.h> 57#include <linux/mroute6.h>
58#include <net/l3mdev.h>
58 59
59static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) 60static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60{ 61{
61 struct dst_entry *dst = skb_dst(skb); 62 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev; 63 struct net_device *dev = dst->dev;
@@ -71,7 +72,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72 73
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_socket(dev_net(dev), skb) && 75 ((mroute6_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) { 78 &ipv6_hdr(skb)->saddr))) {
@@ -82,19 +83,18 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
82 */ 83 */
83 if (newskb) 84 if (newskb)
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 sk, newskb, NULL, newskb->dev, 86 net, sk, newskb, NULL, newskb->dev,
86 dev_loopback_xmit); 87 dev_loopback_xmit);
87 88
88 if (ipv6_hdr(skb)->hop_limit == 0) { 89 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(dev_net(dev), idev, 90 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS); 91 IPSTATS_MIB_OUTDISCARDS);
91 kfree_skb(skb); 92 kfree_skb(skb);
92 return 0; 93 return 0;
93 } 94 }
94 } 95 }
95 96
96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 97 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
97 skb->len);
98 98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL && 100 IPV6_ADDR_SCOPE_NODELOCAL &&
@@ -116,48 +116,49 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
116 } 116 }
117 rcu_read_unlock_bh(); 117 rcu_read_unlock_bh();
118 118
119 IP6_INC_STATS(dev_net(dst->dev), 119 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121 kfree_skb(skb); 120 kfree_skb(skb);
122 return -EINVAL; 121 return -EINVAL;
123} 122}
124 123
125static int ip6_finish_output(struct sock *sk, struct sk_buff *skb) 124static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
126{ 125{
127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 126 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128 dst_allfrag(skb_dst(skb)) || 127 dst_allfrag(skb_dst(skb)) ||
129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 128 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130 return ip6_fragment(sk, skb, ip6_finish_output2); 129 return ip6_fragment(net, sk, skb, ip6_finish_output2);
131 else 130 else
132 return ip6_finish_output2(sk, skb); 131 return ip6_finish_output2(net, sk, skb);
133} 132}
134 133
135int ip6_output(struct sock *sk, struct sk_buff *skb) 134int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
136{ 135{
137 struct net_device *dev = skb_dst(skb)->dev; 136 struct net_device *dev = skb_dst(skb)->dev;
138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
138
139 if (unlikely(idev->cnf.disable_ipv6)) { 139 if (unlikely(idev->cnf.disable_ipv6)) {
140 IP6_INC_STATS(dev_net(dev), idev, 140 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
141 IPSTATS_MIB_OUTDISCARDS);
142 kfree_skb(skb); 141 kfree_skb(skb);
143 return 0; 142 return 0;
144 } 143 }
145 144
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, 145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
147 NULL, dev, 146 net, sk, skb, NULL, dev,
148 ip6_finish_output, 147 ip6_finish_output,
149 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
150} 149}
151 150
152/* 151/*
153 * xmit an sk_buff (used by TCP, SCTP and DCCP) 152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 * Note : socket lock is not held for SYNACK packets, but might be modified
154 * by calls to skb_set_owner_w() and ipv6_local_error(),
155 * which are using proper atomic operations or spinlocks.
154 */ 156 */
155 157int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
157 struct ipv6_txoptions *opt, int tclass) 158 struct ipv6_txoptions *opt, int tclass)
158{ 159{
159 struct net *net = sock_net(sk); 160 struct net *net = sock_net(sk);
160 struct ipv6_pinfo *np = inet6_sk(sk); 161 const struct ipv6_pinfo *np = inet6_sk(sk);
161 struct in6_addr *first_hop = &fl6->daddr; 162 struct in6_addr *first_hop = &fl6->daddr;
162 struct dst_entry *dst = skb_dst(skb); 163 struct dst_entry *dst = skb_dst(skb);
163 struct ipv6hdr *hdr; 164 struct ipv6hdr *hdr;
@@ -186,7 +187,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
186 } 187 }
187 consume_skb(skb); 188 consume_skb(skb);
188 skb = skb2; 189 skb = skb2;
189 skb_set_owner_w(skb, sk); 190 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
191 * it is safe to call in our context (socket lock not held)
192 */
193 skb_set_owner_w(skb, (struct sock *)sk);
190 } 194 }
191 if (opt->opt_flen) 195 if (opt->opt_flen)
192 ipv6_push_frag_opts(skb, opt, &proto); 196 ipv6_push_frag_opts(skb, opt, &proto);
@@ -224,12 +228,20 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
224 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 228 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
225 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 229 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
226 IPSTATS_MIB_OUT, skb->len); 230 IPSTATS_MIB_OUT, skb->len);
227 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, 231 /* hooks should never assume socket lock is held.
228 NULL, dst->dev, dst_output_sk); 232 * we promote our socket to non const
233 */
234 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
235 net, (struct sock *)sk, skb, NULL, dst->dev,
236 dst_output);
229 } 237 }
230 238
231 skb->dev = dst->dev; 239 skb->dev = dst->dev;
232 ipv6_local_error(sk, EMSGSIZE, fl6, mtu); 240 /* ipv6_local_error() does not require socket lock,
241 * we promote our socket to non const
242 */
243 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
244
233 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 245 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
234 kfree_skb(skb); 246 kfree_skb(skb);
235 return -EMSGSIZE; 247 return -EMSGSIZE;
@@ -317,10 +329,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
317 return 0; 329 return 0;
318} 330}
319 331
320static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb) 332static inline int ip6_forward_finish(struct net *net, struct sock *sk,
333 struct sk_buff *skb)
321{ 334{
322 skb_sender_cpu_clear(skb); 335 skb_sender_cpu_clear(skb);
323 return dst_output_sk(sk, skb); 336 return dst_output(net, sk, skb);
324} 337}
325 338
326static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) 339static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@ -376,6 +389,9 @@ int ip6_forward(struct sk_buff *skb)
376 if (skb->pkt_type != PACKET_HOST) 389 if (skb->pkt_type != PACKET_HOST)
377 goto drop; 390 goto drop;
378 391
392 if (unlikely(skb->sk))
393 goto drop;
394
379 if (skb_warn_if_lro(skb)) 395 if (skb_warn_if_lro(skb))
380 goto drop; 396 goto drop;
381 397
@@ -512,8 +528,8 @@ int ip6_forward(struct sk_buff *skb)
512 528
513 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 529 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
514 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 530 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
515 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, 531 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
516 skb->dev, dst->dev, 532 net, NULL, skb, skb->dev, dst->dev,
517 ip6_forward_finish); 533 ip6_forward_finish);
518 534
519error: 535error:
@@ -540,8 +556,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540 skb_copy_secmark(to, from); 556 skb_copy_secmark(to, from);
541} 557}
542 558
543int ip6_fragment(struct sock *sk, struct sk_buff *skb, 559int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
544 int (*output)(struct sock *, struct sk_buff *)) 560 int (*output)(struct net *, struct sock *, struct sk_buff *))
545{ 561{
546 struct sk_buff *frag; 562 struct sk_buff *frag;
547 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 563 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -554,7 +570,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
554 __be32 frag_id; 570 __be32 frag_id;
555 int ptr, offset = 0, err = 0; 571 int ptr, offset = 0, err = 0;
556 u8 *prevhdr, nexthdr = 0; 572 u8 *prevhdr, nexthdr = 0;
557 struct net *net = dev_net(skb_dst(skb)->dev);
558 573
559 hlen = ip6_find_1stfragopt(skb, &prevhdr); 574 hlen = ip6_find_1stfragopt(skb, &prevhdr);
560 nexthdr = *prevhdr; 575 nexthdr = *prevhdr;
@@ -581,11 +596,17 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
581 if (np->frag_size) 596 if (np->frag_size)
582 mtu = np->frag_size; 597 mtu = np->frag_size;
583 } 598 }
599 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
600 goto fail_toobig;
584 mtu -= hlen + sizeof(struct frag_hdr); 601 mtu -= hlen + sizeof(struct frag_hdr);
585 602
586 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 603 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
587 &ipv6_hdr(skb)->saddr); 604 &ipv6_hdr(skb)->saddr);
588 605
606 if (skb->ip_summed == CHECKSUM_PARTIAL &&
607 (err = skb_checksum_help(skb)))
608 goto fail;
609
589 hroom = LL_RESERVED_SPACE(rt->dst.dev); 610 hroom = LL_RESERVED_SPACE(rt->dst.dev);
590 if (skb_has_frag_list(skb)) { 611 if (skb_has_frag_list(skb)) {
591 int first_len = skb_pagelen(skb); 612 int first_len = skb_pagelen(skb);
@@ -674,7 +695,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
674 ip6_copy_metadata(frag, skb); 695 ip6_copy_metadata(frag, skb);
675 } 696 }
676 697
677 err = output(sk, skb); 698 err = output(net, sk, skb);
678 if (!err) 699 if (!err)
679 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 700 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
680 IPSTATS_MIB_FRAGCREATES); 701 IPSTATS_MIB_FRAGCREATES);
@@ -714,10 +735,6 @@ slow_path_clean:
714 } 735 }
715 736
716slow_path: 737slow_path:
717 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
718 skb_checksum_help(skb))
719 goto fail;
720
721 left = skb->len - hlen; /* Space per frame */ 738 left = skb->len - hlen; /* Space per frame */
722 ptr = hlen; /* Where to start from */ 739 ptr = hlen; /* Where to start from */
723 740
@@ -802,7 +819,7 @@ slow_path:
802 /* 819 /*
803 * Put this fragment into the sending queue. 820 * Put this fragment into the sending queue.
804 */ 821 */
805 err = output(sk, frag); 822 err = output(net, sk, frag);
806 if (err) 823 if (err)
807 goto fail; 824 goto fail;
808 825
@@ -874,7 +891,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
874#ifdef CONFIG_IPV6_SUBTREES 891#ifdef CONFIG_IPV6_SUBTREES
875 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 892 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
876#endif 893#endif
877 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 894 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
895 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
878 dst_release(dst); 896 dst_release(dst);
879 dst = NULL; 897 dst = NULL;
880 } 898 }
@@ -883,7 +901,7 @@ out:
883 return dst; 901 return dst;
884} 902}
885 903
886static int ip6_dst_lookup_tail(struct net *net, struct sock *sk, 904static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
887 struct dst_entry **dst, struct flowi6 *fl6) 905 struct dst_entry **dst, struct flowi6 *fl6)
888{ 906{
889#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 907#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -1014,7 +1032,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1014 * It returns a valid dst pointer on success, or a pointer encoded 1032 * It returns a valid dst pointer on success, or a pointer encoded
1015 * error code. 1033 * error code.
1016 */ 1034 */
1017struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1035struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1018 const struct in6_addr *final_dst) 1036 const struct in6_addr *final_dst)
1019{ 1037{
1020 struct dst_entry *dst = NULL; 1038 struct dst_entry *dst = NULL;
@@ -1026,7 +1044,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1026 if (final_dst) 1044 if (final_dst)
1027 fl6->daddr = *final_dst; 1045 fl6->daddr = *final_dst;
1028 if (!fl6->flowi6_oif) 1046 if (!fl6->flowi6_oif)
1029 fl6->flowi6_oif = dst->dev->ifindex; 1047 fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
1030 1048
1031 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1049 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1032} 1050}
@@ -1252,6 +1270,7 @@ static int __ip6_append_data(struct sock *sk,
1252 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1270 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1253 struct ipv6_txoptions *opt = v6_cork->opt; 1271 struct ipv6_txoptions *opt = v6_cork->opt;
1254 int csummode = CHECKSUM_NONE; 1272 int csummode = CHECKSUM_NONE;
1273 unsigned int maxnonfragsize, headersize;
1255 1274
1256 skb = skb_peek_tail(queue); 1275 skb = skb_peek_tail(queue);
1257 if (!skb) { 1276 if (!skb) {
@@ -1269,38 +1288,43 @@ static int __ip6_append_data(struct sock *sk,
1269 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1288 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1270 sizeof(struct frag_hdr); 1289 sizeof(struct frag_hdr);
1271 1290
1272 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1291 headersize = sizeof(struct ipv6hdr) +
1273 unsigned int maxnonfragsize, headersize; 1292 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1274 1293 (dst_allfrag(&rt->dst) ?
1275 headersize = sizeof(struct ipv6hdr) + 1294 sizeof(struct frag_hdr) : 0) +
1276 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1295 rt->rt6i_nfheader_len;
1277 (dst_allfrag(&rt->dst) ? 1296
1278 sizeof(struct frag_hdr) : 0) + 1297 if (cork->length + length > mtu - headersize && dontfrag &&
1279 rt->rt6i_nfheader_len; 1298 (sk->sk_protocol == IPPROTO_UDP ||
1280 1299 sk->sk_protocol == IPPROTO_RAW)) {
1281 if (ip6_sk_ignore_df(sk)) 1300 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1282 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1301 sizeof(struct ipv6hdr));
1283 else 1302 goto emsgsize;
1284 maxnonfragsize = mtu; 1303 }
1285 1304
1286 /* dontfrag active */ 1305 if (ip6_sk_ignore_df(sk))
1287 if ((cork->length + length > mtu - headersize) && dontfrag && 1306 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1288 (sk->sk_protocol == IPPROTO_UDP || 1307 else
1289 sk->sk_protocol == IPPROTO_RAW)) { 1308 maxnonfragsize = mtu;
1290 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1291 sizeof(struct ipv6hdr));
1292 goto emsgsize;
1293 }
1294 1309
1295 if (cork->length + length > maxnonfragsize - headersize) { 1310 if (cork->length + length > maxnonfragsize - headersize) {
1296emsgsize: 1311emsgsize:
1297 ipv6_local_error(sk, EMSGSIZE, fl6, 1312 ipv6_local_error(sk, EMSGSIZE, fl6,
1298 mtu - headersize + 1313 mtu - headersize +
1299 sizeof(struct ipv6hdr)); 1314 sizeof(struct ipv6hdr));
1300 return -EMSGSIZE; 1315 return -EMSGSIZE;
1301 }
1302 } 1316 }
1303 1317
1318 /* CHECKSUM_PARTIAL only with no extension headers and when
1319 * we are not going to fragment
1320 */
1321 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1322 headersize == sizeof(struct ipv6hdr) &&
1323 length < mtu - headersize &&
1324 !(flags & MSG_MORE) &&
1325 rt->dst.dev->features & NETIF_F_V6_CSUM)
1326 csummode = CHECKSUM_PARTIAL;
1327
1304 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { 1328 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1305 sock_tx_timestamp(sk, &tx_flags); 1329 sock_tx_timestamp(sk, &tx_flags);
1306 if (tx_flags & SKBTX_ANY_SW_TSTAMP && 1330 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
@@ -1308,16 +1332,6 @@ emsgsize:
1308 tskey = sk->sk_tskey++; 1332 tskey = sk->sk_tskey++;
1309 } 1333 }
1310 1334
1311 /* If this is the first and only packet and device
1312 * supports checksum offloading, let's use it.
1313 * Use transhdrlen, same as IPv4, because partial
1314 * sums only work when transhdrlen is set.
1315 */
1316 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1317 length + fragheaderlen < mtu &&
1318 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1319 !exthdrlen)
1320 csummode = CHECKSUM_PARTIAL;
1321 /* 1335 /*
1322 * Let's try using as much space as possible. 1336 * Let's try using as much space as possible.
1323 * Use MTU if total length of the message fits into the MTU. 1337 * Use MTU if total length of the message fits into the MTU.
@@ -1680,7 +1694,7 @@ int ip6_send_skb(struct sk_buff *skb)
1680 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1694 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1681 int err; 1695 int err;
1682 1696
1683 err = ip6_local_out(skb); 1697 err = ip6_local_out(net, skb->sk, skb);
1684 if (err) { 1698 if (err) {
1685 if (err > 0) 1699 if (err > 0)
1686 err = net_xmit_errno(err); 1700 err = net_xmit_errno(err);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0224c032dca5..0a8610b33d79 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -482,7 +482,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
482 return -EMSGSIZE; 482 return -EMSGSIZE;
483 } 483 }
484 484
485 err = dst_output(skb); 485 err = dst_output(t->net, skb->sk, skb);
486 if (net_xmit_eval(err) == 0) { 486 if (net_xmit_eval(err) == 0) {
487 struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); 487 struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
488 488
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0e004cc42a22..ad19136086dd 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1985,13 +1985,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1985} 1985}
1986#endif 1986#endif
1987 1987
1988static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb) 1988static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1989{ 1989{
1990 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), 1990 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
1991 IPSTATS_MIB_OUTFORWDATAGRAMS); 1991 IPSTATS_MIB_OUTFORWDATAGRAMS);
1992 IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), 1992 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
1993 IPSTATS_MIB_OUTOCTETS, skb->len); 1993 IPSTATS_MIB_OUTOCTETS, skb->len);
1994 return dst_output_sk(sk, skb); 1994 return dst_output(net, sk, skb);
1995} 1995}
1996 1996
1997/* 1997/*
@@ -2063,8 +2063,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2063 2063
2064 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 2064 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2065 2065
2066 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, 2066 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2067 skb->dev, dev, 2067 net, NULL, skb, skb->dev, dev,
2068 ip6mr_forward2_finish); 2068 ip6mr_forward2_finish);
2069 2069
2070out_free: 2070out_free:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 083b2927fc67..5ee56d0a8699 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1645,13 +1645,12 @@ static void mld_sendpack(struct sk_buff *skb)
1645 payload_len = skb->len; 1645 payload_len = skb->len;
1646 1646
1647 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 1647 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1648 net->ipv6.igmp_sk, skb, NULL, skb->dev, 1648 net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
1649 dst_output_sk); 1649 dst_output);
1650out: 1650out:
1651 if (!err) { 1651 if (!err) {
1652 ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); 1652 ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
1653 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1653 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1654 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
1655 } else { 1654 } else {
1656 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 1655 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
1657 } 1656 }
@@ -2008,13 +2007,13 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
2008 } 2007 }
2009 2008
2010 skb_dst_set(skb, dst); 2009 skb_dst_set(skb, dst);
2011 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, 2010 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
2012 NULL, skb->dev, dst_output_sk); 2011 net, sk, skb, NULL, skb->dev,
2012 dst_output);
2013out: 2013out:
2014 if (!err) { 2014 if (!err) {
2015 ICMP6MSGOUT_INC_STATS(net, idev, type); 2015 ICMP6MSGOUT_INC_STATS(net, idev, type);
2016 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 2016 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
2017 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
2018 } else 2017 } else
2019 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 2018 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
2020 2019
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index b9779d441b12..60c79a08e14a 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -118,7 +118,7 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
118 118
119struct mip6_report_rate_limiter { 119struct mip6_report_rate_limiter {
120 spinlock_t lock; 120 spinlock_t lock;
121 struct timeval stamp; 121 ktime_t stamp;
122 int iif; 122 int iif;
123 struct in6_addr src; 123 struct in6_addr src;
124 struct in6_addr dst; 124 struct in6_addr dst;
@@ -184,20 +184,18 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
184 return 0; 184 return 0;
185} 185}
186 186
187static inline int mip6_report_rl_allow(struct timeval *stamp, 187static inline int mip6_report_rl_allow(ktime_t stamp,
188 const struct in6_addr *dst, 188 const struct in6_addr *dst,
189 const struct in6_addr *src, int iif) 189 const struct in6_addr *src, int iif)
190{ 190{
191 int allow = 0; 191 int allow = 0;
192 192
193 spin_lock_bh(&mip6_report_rl.lock); 193 spin_lock_bh(&mip6_report_rl.lock);
194 if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec || 194 if (!ktime_equal(mip6_report_rl.stamp, stamp) ||
195 mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
196 mip6_report_rl.iif != iif || 195 mip6_report_rl.iif != iif ||
197 !ipv6_addr_equal(&mip6_report_rl.src, src) || 196 !ipv6_addr_equal(&mip6_report_rl.src, src) ||
198 !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { 197 !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
199 mip6_report_rl.stamp.tv_sec = stamp->tv_sec; 198 mip6_report_rl.stamp = stamp;
200 mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
201 mip6_report_rl.iif = iif; 199 mip6_report_rl.iif = iif;
202 mip6_report_rl.src = *src; 200 mip6_report_rl.src = *src;
203 mip6_report_rl.dst = *dst; 201 mip6_report_rl.dst = *dst;
@@ -216,7 +214,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
216 struct ipv6_destopt_hao *hao = NULL; 214 struct ipv6_destopt_hao *hao = NULL;
217 struct xfrm_selector sel; 215 struct xfrm_selector sel;
218 int offset; 216 int offset;
219 struct timeval stamp; 217 ktime_t stamp;
220 int err = 0; 218 int err = 0;
221 219
222 if (unlikely(fl6->flowi6_proto == IPPROTO_MH && 220 if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
@@ -230,9 +228,9 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
230 (skb_network_header(skb) + offset); 228 (skb_network_header(skb) + offset);
231 } 229 }
232 230
233 skb_get_timestamp(skb, &stamp); 231 stamp = skb_get_ktime(skb);
234 232
235 if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr, 233 if (!mip6_report_rl_allow(stamp, &ipv6_hdr(skb)->daddr,
236 hao ? &hao->addr : &ipv6_hdr(skb)->saddr, 234 hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
237 opt->iif)) 235 opt->iif))
238 goto out; 236 goto out;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 64a71354b069..3e0f855e1bea 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -67,6 +67,7 @@
67#include <net/flow.h> 67#include <net/flow.h>
68#include <net/ip6_checksum.h> 68#include <net/ip6_checksum.h>
69#include <net/inet_common.h> 69#include <net/inet_common.h>
70#include <net/l3mdev.h>
70#include <linux/proc_fs.h> 71#include <linux/proc_fs.h>
71 72
72#include <linux/netfilter.h> 73#include <linux/netfilter.h>
@@ -147,6 +148,7 @@ struct neigh_table nd_tbl = {
147 .gc_thresh2 = 512, 148 .gc_thresh2 = 512,
148 .gc_thresh3 = 1024, 149 .gc_thresh3 = 1024,
149}; 150};
151EXPORT_SYMBOL_GPL(nd_tbl);
150 152
151static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) 153static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
152{ 154{
@@ -441,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb,
441 443
442 if (!dst) { 444 if (!dst) {
443 struct flowi6 fl6; 445 struct flowi6 fl6;
446 int oif = l3mdev_fib_oif(skb->dev);
444 447
445 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); 448 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
449 if (oif != skb->dev->ifindex)
450 fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
446 dst = icmp6_dst_alloc(skb->dev, &fl6); 451 dst = icmp6_dst_alloc(skb->dev, &fl6);
447 if (IS_ERR(dst)) { 452 if (IS_ERR(dst)) {
448 kfree_skb(skb); 453 kfree_skb(skb);
@@ -463,9 +468,9 @@ static void ndisc_send_skb(struct sk_buff *skb,
463 idev = __in6_dev_get(dst->dev); 468 idev = __in6_dev_get(dst->dev);
464 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 469 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
465 470
466 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, 471 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
467 NULL, dst->dev, 472 net, sk, skb, NULL, dst->dev,
468 dst_output_sk); 473 dst_output);
469 if (!err) { 474 if (!err) {
470 ICMP6MSGOUT_INC_STATS(net, idev, type); 475 ICMP6MSGOUT_INC_STATS(net, idev, type);
471 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 476 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
@@ -474,8 +479,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
474 rcu_read_unlock(); 479 rcu_read_unlock();
475} 480}
476 481
477void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, 482void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
478 const struct in6_addr *daddr,
479 const struct in6_addr *solicited_addr, 483 const struct in6_addr *solicited_addr,
480 bool router, bool solicited, bool override, bool inc_opt) 484 bool router, bool solicited, bool override, bool inc_opt)
481{ 485{
@@ -541,7 +545,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
541 545
542 read_lock_bh(&idev->lock); 546 read_lock_bh(&idev->lock);
543 list_for_each_entry(ifa, &idev->addr_list, if_list) { 547 list_for_each_entry(ifa, &idev->addr_list, if_list) {
544 ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr, 548 ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
545 /*router=*/ !!idev->cnf.forwarding, 549 /*router=*/ !!idev->cnf.forwarding,
546 /*solicited=*/ false, /*override=*/ true, 550 /*solicited=*/ false, /*override=*/ true,
547 /*inc_opt=*/ true); 551 /*inc_opt=*/ true);
@@ -551,8 +555,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
551 in6_dev_put(idev); 555 in6_dev_put(idev);
552} 556}
553 557
554void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, 558void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
555 const struct in6_addr *solicit,
556 const struct in6_addr *daddr, const struct in6_addr *saddr, 559 const struct in6_addr *daddr, const struct in6_addr *saddr,
557 struct sk_buff *oskb) 560 struct sk_buff *oskb)
558{ 561{
@@ -679,12 +682,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
679 "%s: trying to ucast probe in NUD_INVALID: %pI6\n", 682 "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
680 __func__, target); 683 __func__, target);
681 } 684 }
682 ndisc_send_ns(dev, neigh, target, target, saddr, skb); 685 ndisc_send_ns(dev, target, target, saddr, skb);
683 } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { 686 } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
684 neigh_app_ns(neigh); 687 neigh_app_ns(neigh);
685 } else { 688 } else {
686 addrconf_addr_solict_mult(target, &mcaddr); 689 addrconf_addr_solict_mult(target, &mcaddr);
687 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb); 690 ndisc_send_ns(dev, target, &mcaddr, saddr, skb);
688 } 691 }
689} 692}
690 693
@@ -768,7 +771,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
768 771
769 ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); 772 ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
770 if (ifp) { 773 if (ifp) {
771 774have_ifp:
772 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { 775 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
773 if (dad) { 776 if (dad) {
774 /* 777 /*
@@ -794,6 +797,18 @@ static void ndisc_recv_ns(struct sk_buff *skb)
794 } else { 797 } else {
795 struct net *net = dev_net(dev); 798 struct net *net = dev_net(dev);
796 799
800 /* perhaps an address on the master device */
801 if (netif_is_l3_slave(dev)) {
802 struct net_device *mdev;
803
804 mdev = netdev_master_upper_dev_get_rcu(dev);
805 if (mdev) {
806 ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
807 if (ifp)
808 goto have_ifp;
809 }
810 }
811
797 idev = in6_dev_get(dev); 812 idev = in6_dev_get(dev);
798 if (!idev) { 813 if (!idev) {
799 /* XXX: count this drop? */ 814 /* XXX: count this drop? */
@@ -828,7 +843,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
828 is_router = idev->cnf.forwarding; 843 is_router = idev->cnf.forwarding;
829 844
830 if (dad) { 845 if (dad) {
831 ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target, 846 ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
832 !!is_router, false, (ifp != NULL), true); 847 !!is_router, false, (ifp != NULL), true);
833 goto out; 848 goto out;
834 } 849 }
@@ -849,8 +864,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
849 NEIGH_UPDATE_F_WEAK_OVERRIDE| 864 NEIGH_UPDATE_F_WEAK_OVERRIDE|
850 NEIGH_UPDATE_F_OVERRIDE); 865 NEIGH_UPDATE_F_OVERRIDE);
851 if (neigh || !dev->header_ops) { 866 if (neigh || !dev->header_ops) {
852 ndisc_send_na(dev, neigh, saddr, &msg->target, 867 ndisc_send_na(dev, saddr, &msg->target, !!is_router,
853 !!is_router,
854 true, (ifp != NULL && inc), inc); 868 true, (ifp != NULL && inc), inc);
855 if (neigh) 869 if (neigh)
856 neigh_release(neigh); 870 neigh_release(neigh);
@@ -1486,6 +1500,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1486 struct flowi6 fl6; 1500 struct flowi6 fl6;
1487 int rd_len; 1501 int rd_len;
1488 u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; 1502 u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1503 int oif = l3mdev_fib_oif(dev);
1489 bool ret; 1504 bool ret;
1490 1505
1491 if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { 1506 if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1502,7 +1517,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1502 } 1517 }
1503 1518
1504 icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, 1519 icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1505 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); 1520 &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
1521
1522 if (oif != skb->dev->ifindex)
1523 fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
1506 1524
1507 dst = ip6_route_output(net, NULL, &fl6); 1525 dst = ip6_route_output(net, NULL, &fl6);
1508 if (dst->error) { 1526 if (dst->error) {
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index b4de08a83e0b..d11c46833d61 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -18,9 +18,8 @@
18#include <net/ip6_checksum.h> 18#include <net/ip6_checksum.h>
19#include <net/netfilter/nf_queue.h> 19#include <net/netfilter/nf_queue.h>
20 20
21int ip6_route_me_harder(struct sk_buff *skb) 21int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
22{ 22{
23 struct net *net = dev_net(skb_dst(skb)->dev);
24 const struct ipv6hdr *iph = ipv6_hdr(skb); 23 const struct ipv6hdr *iph = ipv6_hdr(skb);
25 unsigned int hh_len; 24 unsigned int hh_len;
26 struct dst_entry *dst; 25 struct dst_entry *dst;
@@ -93,7 +92,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
93 } 92 }
94} 93}
95 94
96static int nf_ip6_reroute(struct sk_buff *skb, 95static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
97 const struct nf_queue_entry *entry) 96 const struct nf_queue_entry *entry)
98{ 97{
99 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); 98 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +102,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
103 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || 102 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
104 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || 103 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
105 skb->mark != rt_info->mark) 104 skb->mark != rt_info->mark)
106 return ip6_route_me_harder(skb); 105 return ip6_route_me_harder(net, skb);
107 } 106 }
108 return 0; 107 return 0;
109} 108}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 96833e4b3193..f6a024e141e5 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -58,6 +58,7 @@ endif # NF_TABLES
58 58
59config NF_DUP_IPV6 59config NF_DUP_IPV6
60 tristate "Netfilter IPv6 packet duplication to alternate destination" 60 tristate "Netfilter IPv6 packet duplication to alternate destination"
61 depends on !NF_CONNTRACK || NF_CONNTRACK
61 help 62 help
62 This option enables the nf_dup_ipv6 core, which duplicates an IPv6 63 This option enables the nf_dup_ipv6 core, which duplicates an IPv6
63 packet to be rerouted to another destination. 64 packet to be rerouted to another destination.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0771991ed812..99425cf2819b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -117,7 +117,7 @@ ip6_packet_match(const struct sk_buff *skb,
117 if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { 117 if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
118 dprintf("VIA in mismatch (%s vs %s).%s\n", 118 dprintf("VIA in mismatch (%s vs %s).%s\n",
119 indev, ip6info->iniface, 119 indev, ip6info->iniface,
120 ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":""); 120 ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
121 return false; 121 return false;
122 } 122 }
123 123
@@ -126,14 +126,14 @@ ip6_packet_match(const struct sk_buff *skb,
126 if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { 126 if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
127 dprintf("VIA out mismatch (%s vs %s).%s\n", 127 dprintf("VIA out mismatch (%s vs %s).%s\n",
128 outdev, ip6info->outiface, 128 outdev, ip6info->outiface,
129 ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":""); 129 ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
130 return false; 130 return false;
131 } 131 }
132 132
133/* ... might want to do something with class and flowlabel here ... */ 133/* ... might want to do something with class and flowlabel here ... */
134 134
135 /* look for the desired protocol header */ 135 /* look for the desired protocol header */
136 if((ip6info->flags & IP6T_F_PROTO)) { 136 if (ip6info->flags & IP6T_F_PROTO) {
137 int protohdr; 137 int protohdr;
138 unsigned short _frag_off; 138 unsigned short _frag_off;
139 139
@@ -151,9 +151,9 @@ ip6_packet_match(const struct sk_buff *skb,
151 ip6info->proto); 151 ip6info->proto);
152 152
153 if (ip6info->proto == protohdr) { 153 if (ip6info->proto == protohdr) {
154 if(ip6info->invflags & IP6T_INV_PROTO) { 154 if (ip6info->invflags & IP6T_INV_PROTO)
155 return false; 155 return false;
156 } 156
157 return true; 157 return true;
158 } 158 }
159 159
@@ -275,7 +275,8 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
275 return 0; 275 return 0;
276} 276}
277 277
278static void trace_packet(const struct sk_buff *skb, 278static void trace_packet(struct net *net,
279 const struct sk_buff *skb,
279 unsigned int hook, 280 unsigned int hook,
280 const struct net_device *in, 281 const struct net_device *in,
281 const struct net_device *out, 282 const struct net_device *out,
@@ -287,7 +288,6 @@ static void trace_packet(const struct sk_buff *skb,
287 const char *hookname, *chainname, *comment; 288 const char *hookname, *chainname, *comment;
288 const struct ip6t_entry *iter; 289 const struct ip6t_entry *iter;
289 unsigned int rulenum = 0; 290 unsigned int rulenum = 0;
290 struct net *net = dev_net(in ? in : out);
291 291
292 root = get_entry(private->entries, private->hook_entry[hook]); 292 root = get_entry(private->entries, private->hook_entry[hook]);
293 293
@@ -314,10 +314,10 @@ ip6t_next_entry(const struct ip6t_entry *entry)
314/* Returns one of the generic firewall policies, like NF_ACCEPT. */ 314/* Returns one of the generic firewall policies, like NF_ACCEPT. */
315unsigned int 315unsigned int
316ip6t_do_table(struct sk_buff *skb, 316ip6t_do_table(struct sk_buff *skb,
317 unsigned int hook,
318 const struct nf_hook_state *state, 317 const struct nf_hook_state *state,
319 struct xt_table *table) 318 struct xt_table *table)
320{ 319{
320 unsigned int hook = state->hook;
321 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 321 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
322 /* Initializing verdict to NF_DROP keeps gcc happy. */ 322 /* Initializing verdict to NF_DROP keeps gcc happy. */
323 unsigned int verdict = NF_DROP; 323 unsigned int verdict = NF_DROP;
@@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
340 * rule is also a fragment-specific rule, non-fragments won't 340 * rule is also a fragment-specific rule, non-fragments won't
341 * match it. */ 341 * match it. */
342 acpar.hotdrop = false; 342 acpar.hotdrop = false;
343 acpar.net = state->net;
343 acpar.in = state->in; 344 acpar.in = state->in;
344 acpar.out = state->out; 345 acpar.out = state->out;
345 acpar.family = NFPROTO_IPV6; 346 acpar.family = NFPROTO_IPV6;
@@ -401,8 +402,8 @@ ip6t_do_table(struct sk_buff *skb,
401#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 402#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
402 /* The packet is traced: log it */ 403 /* The packet is traced: log it */
403 if (unlikely(skb->nf_trace)) 404 if (unlikely(skb->nf_trace))
404 trace_packet(skb, hook, state->in, state->out, 405 trace_packet(state->net, skb, hook, state->in,
405 table->name, private, e); 406 state->out, table->name, private, e);
406#endif 407#endif
407 /* Standard target? */ 408 /* Standard target? */
408 if (!t->u.kernel.target->target) { 409 if (!t->u.kernel.target->target) {
@@ -442,8 +443,8 @@ ip6t_do_table(struct sk_buff *skb,
442 break; 443 break;
443 } while (!acpar.hotdrop); 444 } while (!acpar.hotdrop);
444 445
445 xt_write_recseq_end(addend); 446 xt_write_recseq_end(addend);
446 local_bh_enable(); 447 local_bh_enable();
447 448
448#ifdef DEBUG_ALLOW_ALL 449#ifdef DEBUG_ALLOW_ALL
449 return NF_ACCEPT; 450 return NF_ACCEPT;
@@ -560,7 +561,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
560 pos = newpos; 561 pos = newpos;
561 } 562 }
562 } 563 }
563 next: 564next:
564 duprintf("Finished chain %u\n", hook); 565 duprintf("Finished chain %u\n", hook);
565 } 566 }
566 return 1; 567 return 1;
@@ -815,7 +816,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
815 newinfo) */ 816 newinfo) */
816static int 817static int
817translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, 818translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
818 const struct ip6t_replace *repl) 819 const struct ip6t_replace *repl)
819{ 820{
820 struct ip6t_entry *iter; 821 struct ip6t_entry *iter;
821 unsigned int i; 822 unsigned int i;
@@ -1089,7 +1090,7 @@ static int compat_table_info(const struct xt_table_info *info,
1089#endif 1090#endif
1090 1091
1091static int get_info(struct net *net, void __user *user, 1092static int get_info(struct net *net, void __user *user,
1092 const int *len, int compat) 1093 const int *len, int compat)
1093{ 1094{
1094 char name[XT_TABLE_MAXNAMELEN]; 1095 char name[XT_TABLE_MAXNAMELEN];
1095 struct xt_table *t; 1096 struct xt_table *t;
@@ -1151,7 +1152,7 @@ static int get_info(struct net *net, void __user *user,
1151 1152
1152static int 1153static int
1153get_entries(struct net *net, struct ip6t_get_entries __user *uptr, 1154get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
1154 const int *len) 1155 const int *len)
1155{ 1156{
1156 int ret; 1157 int ret;
1157 struct ip6t_get_entries get; 1158 struct ip6t_get_entries get;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 0ed841a3fa33..db29bbf41b59 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -39,7 +39,7 @@ static unsigned int
39reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) 39reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
40{ 40{
41 const struct ip6t_reject_info *reject = par->targinfo; 41 const struct ip6t_reject_info *reject = par->targinfo;
42 struct net *net = dev_net((par->in != NULL) ? par->in : par->out); 42 struct net *net = par->net;
43 43
44 switch (reject->with) { 44 switch (reject->with) {
45 case IP6T_ICMP6_NO_ROUTE: 45 case IP6T_ICMP6_NO_ROUTE:
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 1e4bf99ed16e..3deed5860a42 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -76,7 +76,7 @@ synproxy_send_tcp(const struct synproxy_net *snet,
76 nf_conntrack_get(nfct); 76 nf_conntrack_get(nfct);
77 } 77 }
78 78
79 ip6_local_out(nskb); 79 ip6_local_out(net, nskb->sk, nskb);
80 return; 80 return;
81 81
82free_nskb: 82free_nskb:
@@ -244,7 +244,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
244 synproxy_build_options(nth, opts); 244 synproxy_build_options(nth, opts);
245 245
246 synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 246 synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
247 niph, nth, tcp_hdr_size); 247 niph, nth, tcp_hdr_size);
248} 248}
249 249
250static bool 250static bool
@@ -275,7 +275,7 @@ static unsigned int
275synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) 275synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
276{ 276{
277 const struct xt_synproxy_info *info = par->targinfo; 277 const struct xt_synproxy_info *info = par->targinfo;
278 struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); 278 struct synproxy_net *snet = synproxy_pernet(par->net);
279 struct synproxy_options opts = {}; 279 struct synproxy_options opts = {};
280 struct tcphdr *th, _th; 280 struct tcphdr *th, _th;
281 281
@@ -316,11 +316,11 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
316 return XT_CONTINUE; 316 return XT_CONTINUE;
317} 317}
318 318
319static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, 319static unsigned int ipv6_synproxy_hook(void *priv,
320 struct sk_buff *skb, 320 struct sk_buff *skb,
321 const struct nf_hook_state *nhs) 321 const struct nf_hook_state *nhs)
322{ 322{
323 struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); 323 struct synproxy_net *snet = synproxy_pernet(nhs->net);
324 enum ip_conntrack_info ctinfo; 324 enum ip_conntrack_info ctinfo;
325 struct nf_conn *ct; 325 struct nf_conn *ct;
326 struct nf_conn_synproxy *synproxy; 326 struct nf_conn_synproxy *synproxy;
@@ -458,14 +458,12 @@ static struct xt_target synproxy_tg6_reg __read_mostly = {
458static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { 458static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
459 { 459 {
460 .hook = ipv6_synproxy_hook, 460 .hook = ipv6_synproxy_hook,
461 .owner = THIS_MODULE,
462 .pf = NFPROTO_IPV6, 461 .pf = NFPROTO_IPV6,
463 .hooknum = NF_INET_LOCAL_IN, 462 .hooknum = NF_INET_LOCAL_IN,
464 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, 463 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
465 }, 464 },
466 { 465 {
467 .hook = ipv6_synproxy_hook, 466 .hook = ipv6_synproxy_hook,
468 .owner = THIS_MODULE,
469 .pf = NFPROTO_IPV6, 467 .pf = NFPROTO_IPV6,
470 .hooknum = NF_INET_POST_ROUTING, 468 .hooknum = NF_INET_POST_ROUTING,
471 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, 469 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 790e0c6b19e1..1ee1b25df096 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -26,7 +26,7 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr)
26 return addr_type & IPV6_ADDR_UNICAST; 26 return addr_type & IPV6_ADDR_UNICAST;
27} 27}
28 28
29static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, 29static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
30 const struct net_device *dev, u8 flags) 30 const struct net_device *dev, u8 flags)
31{ 31{
32 struct rt6_info *rt; 32 struct rt6_info *rt;
@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
53 lookup_flags |= RT6_LOOKUP_F_IFACE; 53 lookup_flags |= RT6_LOOKUP_F_IFACE;
54 } 54 }
55 55
56 rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags); 56 rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
57 if (rt->dst.error) 57 if (rt->dst.error)
58 goto out; 58 goto out;
59 59
@@ -93,7 +93,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
93 if (unlikely(saddrtype == IPV6_ADDR_ANY)) 93 if (unlikely(saddrtype == IPV6_ADDR_ANY))
94 return true ^ invert; /* not routable: forward path will drop it */ 94 return true ^ invert; /* not routable: forward path will drop it */
95 95
96 return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert; 96 return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert;
97} 97}
98 98
99static int rpfilter_check(const struct xt_mtchk_param *par) 99static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 5c33d8abc077..8b277b983ca5 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,12 +32,10 @@ static const struct xt_table packet_filter = {
32 32
33/* The work comes in here from netfilter.c. */ 33/* The work comes in here from netfilter.c. */
34static unsigned int 34static unsigned int
35ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 35ip6table_filter_hook(void *priv, struct sk_buff *skb,
36 const struct nf_hook_state *state) 36 const struct nf_hook_state *state)
37{ 37{
38 const struct net *net = dev_net(state->in ? state->in : state->out); 38 return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter);
39
40 return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter);
41} 39}
42 40
43static struct nf_hook_ops *filter_ops __read_mostly; 41static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b551f5b79fe2..abe278b07932 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -57,8 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
57 /* flowlabel and prio (includes version, which shouldn't change either */ 57 /* flowlabel and prio (includes version, which shouldn't change either */
58 flowlabel = *((u_int32_t *)ipv6_hdr(skb)); 58 flowlabel = *((u_int32_t *)ipv6_hdr(skb));
59 59
60 ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state, 60 ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
61 dev_net(state->out)->ipv6.ip6table_mangle);
62 61
63 if (ret != NF_DROP && ret != NF_STOLEN && 62 if (ret != NF_DROP && ret != NF_STOLEN &&
64 (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || 63 (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -66,7 +65,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
66 skb->mark != mark || 65 skb->mark != mark ||
67 ipv6_hdr(skb)->hop_limit != hop_limit || 66 ipv6_hdr(skb)->hop_limit != hop_limit ||
68 flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { 67 flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
69 err = ip6_route_me_harder(skb); 68 err = ip6_route_me_harder(state->net, skb);
70 if (err < 0) 69 if (err < 0)
71 ret = NF_DROP_ERR(err); 70 ret = NF_DROP_ERR(err);
72 } 71 }
@@ -76,17 +75,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
76 75
77/* The work comes in here from netfilter.c. */ 76/* The work comes in here from netfilter.c. */
78static unsigned int 77static unsigned int
79ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 78ip6table_mangle_hook(void *priv, struct sk_buff *skb,
80 const struct nf_hook_state *state) 79 const struct nf_hook_state *state)
81{ 80{
82 if (ops->hooknum == NF_INET_LOCAL_OUT) 81 if (state->hook == NF_INET_LOCAL_OUT)
83 return ip6t_mangle_out(skb, state); 82 return ip6t_mangle_out(skb, state);
84 if (ops->hooknum == NF_INET_POST_ROUTING) 83 if (state->hook == NF_INET_POST_ROUTING)
85 return ip6t_do_table(skb, ops->hooknum, state, 84 return ip6t_do_table(skb, state,
86 dev_net(state->out)->ipv6.ip6table_mangle); 85 state->net->ipv6.ip6table_mangle);
87 /* INPUT/FORWARD */ 86 /* INPUT/FORWARD */
88 return ip6t_do_table(skb, ops->hooknum, state, 87 return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
89 dev_net(state->in)->ipv6.ip6table_mangle);
90} 88}
91 89
92static struct nf_hook_ops *mangle_ops __read_mostly; 90static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index c3a7f7af0ed4..de2a10a565f5 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -30,49 +30,46 @@ static const struct xt_table nf_nat_ipv6_table = {
30 .af = NFPROTO_IPV6, 30 .af = NFPROTO_IPV6,
31}; 31};
32 32
33static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, 33static unsigned int ip6table_nat_do_chain(void *priv,
34 struct sk_buff *skb, 34 struct sk_buff *skb,
35 const struct nf_hook_state *state, 35 const struct nf_hook_state *state,
36 struct nf_conn *ct) 36 struct nf_conn *ct)
37{ 37{
38 struct net *net = nf_ct_net(ct); 38 return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
39
40 return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat);
41} 39}
42 40
43static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, 41static unsigned int ip6table_nat_fn(void *priv,
44 struct sk_buff *skb, 42 struct sk_buff *skb,
45 const struct nf_hook_state *state) 43 const struct nf_hook_state *state)
46{ 44{
47 return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain); 45 return nf_nat_ipv6_fn(priv, skb, state, ip6table_nat_do_chain);
48} 46}
49 47
50static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops, 48static unsigned int ip6table_nat_in(void *priv,
51 struct sk_buff *skb, 49 struct sk_buff *skb,
52 const struct nf_hook_state *state) 50 const struct nf_hook_state *state)
53{ 51{
54 return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain); 52 return nf_nat_ipv6_in(priv, skb, state, ip6table_nat_do_chain);
55} 53}
56 54
57static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops, 55static unsigned int ip6table_nat_out(void *priv,
58 struct sk_buff *skb, 56 struct sk_buff *skb,
59 const struct nf_hook_state *state) 57 const struct nf_hook_state *state)
60{ 58{
61 return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain); 59 return nf_nat_ipv6_out(priv, skb, state, ip6table_nat_do_chain);
62} 60}
63 61
64static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops, 62static unsigned int ip6table_nat_local_fn(void *priv,
65 struct sk_buff *skb, 63 struct sk_buff *skb,
66 const struct nf_hook_state *state) 64 const struct nf_hook_state *state)
67{ 65{
68 return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain); 66 return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
69} 67}
70 68
71static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { 69static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
72 /* Before packet filtering, change destination */ 70 /* Before packet filtering, change destination */
73 { 71 {
74 .hook = ip6table_nat_in, 72 .hook = ip6table_nat_in,
75 .owner = THIS_MODULE,
76 .pf = NFPROTO_IPV6, 73 .pf = NFPROTO_IPV6,
77 .hooknum = NF_INET_PRE_ROUTING, 74 .hooknum = NF_INET_PRE_ROUTING,
78 .priority = NF_IP6_PRI_NAT_DST, 75 .priority = NF_IP6_PRI_NAT_DST,
@@ -80,7 +77,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
80 /* After packet filtering, change source */ 77 /* After packet filtering, change source */
81 { 78 {
82 .hook = ip6table_nat_out, 79 .hook = ip6table_nat_out,
83 .owner = THIS_MODULE,
84 .pf = NFPROTO_IPV6, 80 .pf = NFPROTO_IPV6,
85 .hooknum = NF_INET_POST_ROUTING, 81 .hooknum = NF_INET_POST_ROUTING,
86 .priority = NF_IP6_PRI_NAT_SRC, 82 .priority = NF_IP6_PRI_NAT_SRC,
@@ -88,7 +84,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
88 /* Before packet filtering, change destination */ 84 /* Before packet filtering, change destination */
89 { 85 {
90 .hook = ip6table_nat_local_fn, 86 .hook = ip6table_nat_local_fn,
91 .owner = THIS_MODULE,
92 .pf = NFPROTO_IPV6, 87 .pf = NFPROTO_IPV6,
93 .hooknum = NF_INET_LOCAL_OUT, 88 .hooknum = NF_INET_LOCAL_OUT,
94 .priority = NF_IP6_PRI_NAT_DST, 89 .priority = NF_IP6_PRI_NAT_DST,
@@ -96,7 +91,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
96 /* After packet filtering, change source */ 91 /* After packet filtering, change source */
97 { 92 {
98 .hook = ip6table_nat_fn, 93 .hook = ip6table_nat_fn,
99 .owner = THIS_MODULE,
100 .pf = NFPROTO_IPV6, 94 .pf = NFPROTO_IPV6,
101 .hooknum = NF_INET_LOCAL_IN, 95 .hooknum = NF_INET_LOCAL_IN,
102 .priority = NF_IP6_PRI_NAT_SRC, 96 .priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 0b33caad2b69..9021963565c3 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,12 +19,10 @@ static const struct xt_table packet_raw = {
19 19
20/* The work comes in here from netfilter.c. */ 20/* The work comes in here from netfilter.c. */
21static unsigned int 21static unsigned int
22ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 22ip6table_raw_hook(void *priv, struct sk_buff *skb,
23 const struct nf_hook_state *state) 23 const struct nf_hook_state *state)
24{ 24{
25 const struct net *net = dev_net(state->in ? state->in : state->out); 25 return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw);
26
27 return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw);
28} 26}
29 27
30static struct nf_hook_ops *rawtable_ops __read_mostly; 28static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index fcef83c25f7b..0d856fedfeb0 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,13 +36,10 @@ static const struct xt_table security_table = {
36}; 36};
37 37
38static unsigned int 38static unsigned int
39ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, 39ip6table_security_hook(void *priv, struct sk_buff *skb,
40 const struct nf_hook_state *state) 40 const struct nf_hook_state *state)
41{ 41{
42 const struct net *net = dev_net(state->in ? state->in : state->out); 42 return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security);
43
44 return ip6t_do_table(skb, ops->hooknum, state,
45 net->ipv6.ip6table_security);
46} 43}
47 44
48static struct nf_hook_ops *sectbl_ops __read_mostly; 45static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 7302900c321a..1aa5848764a7 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
95 return NF_ACCEPT; 95 return NF_ACCEPT;
96} 96}
97 97
98static unsigned int ipv6_helper(const struct nf_hook_ops *ops, 98static unsigned int ipv6_helper(void *priv,
99 struct sk_buff *skb, 99 struct sk_buff *skb,
100 const struct nf_hook_state *state) 100 const struct nf_hook_state *state)
101{ 101{
@@ -131,7 +131,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
131 return helper->help(skb, protoff, ct, ctinfo); 131 return helper->help(skb, protoff, ct, ctinfo);
132} 132}
133 133
134static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, 134static unsigned int ipv6_confirm(void *priv,
135 struct sk_buff *skb, 135 struct sk_buff *skb,
136 const struct nf_hook_state *state) 136 const struct nf_hook_state *state)
137{ 137{
@@ -165,14 +165,14 @@ out:
165 return nf_conntrack_confirm(skb); 165 return nf_conntrack_confirm(skb);
166} 166}
167 167
168static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, 168static unsigned int ipv6_conntrack_in(void *priv,
169 struct sk_buff *skb, 169 struct sk_buff *skb,
170 const struct nf_hook_state *state) 170 const struct nf_hook_state *state)
171{ 171{
172 return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb); 172 return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
173} 173}
174 174
175static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, 175static unsigned int ipv6_conntrack_local(void *priv,
176 struct sk_buff *skb, 176 struct sk_buff *skb,
177 const struct nf_hook_state *state) 177 const struct nf_hook_state *state)
178{ 178{
@@ -181,48 +181,42 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
181 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); 181 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
182 return NF_ACCEPT; 182 return NF_ACCEPT;
183 } 183 }
184 return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb); 184 return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
185} 185}
186 186
187static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 187static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
188 { 188 {
189 .hook = ipv6_conntrack_in, 189 .hook = ipv6_conntrack_in,
190 .owner = THIS_MODULE,
191 .pf = NFPROTO_IPV6, 190 .pf = NFPROTO_IPV6,
192 .hooknum = NF_INET_PRE_ROUTING, 191 .hooknum = NF_INET_PRE_ROUTING,
193 .priority = NF_IP6_PRI_CONNTRACK, 192 .priority = NF_IP6_PRI_CONNTRACK,
194 }, 193 },
195 { 194 {
196 .hook = ipv6_conntrack_local, 195 .hook = ipv6_conntrack_local,
197 .owner = THIS_MODULE,
198 .pf = NFPROTO_IPV6, 196 .pf = NFPROTO_IPV6,
199 .hooknum = NF_INET_LOCAL_OUT, 197 .hooknum = NF_INET_LOCAL_OUT,
200 .priority = NF_IP6_PRI_CONNTRACK, 198 .priority = NF_IP6_PRI_CONNTRACK,
201 }, 199 },
202 { 200 {
203 .hook = ipv6_helper, 201 .hook = ipv6_helper,
204 .owner = THIS_MODULE,
205 .pf = NFPROTO_IPV6, 202 .pf = NFPROTO_IPV6,
206 .hooknum = NF_INET_POST_ROUTING, 203 .hooknum = NF_INET_POST_ROUTING,
207 .priority = NF_IP6_PRI_CONNTRACK_HELPER, 204 .priority = NF_IP6_PRI_CONNTRACK_HELPER,
208 }, 205 },
209 { 206 {
210 .hook = ipv6_confirm, 207 .hook = ipv6_confirm,
211 .owner = THIS_MODULE,
212 .pf = NFPROTO_IPV6, 208 .pf = NFPROTO_IPV6,
213 .hooknum = NF_INET_POST_ROUTING, 209 .hooknum = NF_INET_POST_ROUTING,
214 .priority = NF_IP6_PRI_LAST, 210 .priority = NF_IP6_PRI_LAST,
215 }, 211 },
216 { 212 {
217 .hook = ipv6_helper, 213 .hook = ipv6_helper,
218 .owner = THIS_MODULE,
219 .pf = NFPROTO_IPV6, 214 .pf = NFPROTO_IPV6,
220 .hooknum = NF_INET_LOCAL_IN, 215 .hooknum = NF_INET_LOCAL_IN,
221 .priority = NF_IP6_PRI_CONNTRACK_HELPER, 216 .priority = NF_IP6_PRI_CONNTRACK_HELPER,
222 }, 217 },
223 { 218 {
224 .hook = ipv6_confirm, 219 .hook = ipv6_confirm,
225 .owner = THIS_MODULE,
226 .pf = NFPROTO_IPV6, 220 .pf = NFPROTO_IPV6,
227 .hooknum = NF_INET_LOCAL_IN, 221 .hooknum = NF_INET_LOCAL_IN,
228 .priority = NF_IP6_PRI_LAST-1, 222 .priority = NF_IP6_PRI_LAST-1,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 0e6fae103d33..660bc10c7a9c 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -36,6 +36,7 @@ static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
36 36
37static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, 37static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
38 unsigned int dataoff, 38 unsigned int dataoff,
39 struct net *net,
39 struct nf_conntrack_tuple *tuple) 40 struct nf_conntrack_tuple *tuple)
40{ 41{
41 const struct icmp6hdr *hp; 42 const struct icmp6hdr *hp;
@@ -56,12 +57,12 @@ static const u_int8_t invmap[] = {
56 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, 57 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
57 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, 58 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
58 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1, 59 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1,
59 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY +1 60 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1
60}; 61};
61 62
62static const u_int8_t noct_valid_new[] = { 63static const u_int8_t noct_valid_new[] = {
63 [ICMPV6_MGM_QUERY - 130] = 1, 64 [ICMPV6_MGM_QUERY - 130] = 1,
64 [ICMPV6_MGM_REPORT -130] = 1, 65 [ICMPV6_MGM_REPORT - 130] = 1,
65 [ICMPV6_MGM_REDUCTION - 130] = 1, 66 [ICMPV6_MGM_REDUCTION - 130] = 1,
66 [NDISC_ROUTER_SOLICITATION - 130] = 1, 67 [NDISC_ROUTER_SOLICITATION - 130] = 1,
67 [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, 68 [NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
@@ -159,7 +160,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
159 skb_network_offset(skb) 160 skb_network_offset(skb)
160 + sizeof(struct ipv6hdr) 161 + sizeof(struct ipv6hdr)
161 + sizeof(struct icmp6hdr), 162 + sizeof(struct icmp6hdr),
162 PF_INET6, &origtuple)) { 163 PF_INET6, net, &origtuple)) {
163 pr_debug("icmpv6_error: Can't get tuple\n"); 164 pr_debug("icmpv6_error: Can't get tuple\n");
164 return -NF_ACCEPT; 165 return -NF_ACCEPT;
165 } 166 }
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 701cd2bae0a9..d5efeb87350e 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -59,7 +59,7 @@ struct nf_ct_frag6_skb_cb
59 struct sk_buff *orig; 59 struct sk_buff *orig;
60}; 60};
61 61
62#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) 62#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
63 63
64static struct inet_frags nf_frags; 64static struct inet_frags nf_frags;
65 65
@@ -445,7 +445,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
445 skb_reset_transport_header(head); 445 skb_reset_transport_header(head);
446 skb_push(head, head->data - skb_network_header(head)); 446 skb_push(head, head->data - skb_network_header(head));
447 447
448 for (fp=head->next; fp; fp = fp->next) { 448 for (fp = head->next; fp; fp = fp->next) {
449 head->data_len += fp->len; 449 head->data_len += fp->len;
450 head->len += fp->len; 450 head->len += fp->len;
451 if (head->ip_summed != fp->ip_summed) 451 if (head->ip_summed != fp->ip_summed)
@@ -563,12 +563,10 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
563 return 0; 563 return 0;
564} 564}
565 565
566struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) 566struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
567{ 567{
568 struct sk_buff *clone; 568 struct sk_buff *clone;
569 struct net_device *dev = skb->dev; 569 struct net_device *dev = skb->dev;
570 struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
571 : dev_net(skb->dev);
572 struct frag_hdr *fhdr; 570 struct frag_hdr *fhdr;
573 struct frag_queue *fq; 571 struct frag_queue *fq;
574 struct ipv6hdr *hdr; 572 struct ipv6hdr *hdr;
@@ -646,15 +644,22 @@ void nf_ct_frag6_consume_orig(struct sk_buff *skb)
646 s = s2; 644 s = s2;
647 } 645 }
648} 646}
647EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
649 648
650static int nf_ct_net_init(struct net *net) 649static int nf_ct_net_init(struct net *net)
651{ 650{
651 int res;
652
652 net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; 653 net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
653 net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; 654 net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
654 net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; 655 net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
655 inet_frags_init_net(&net->nf_frag.frags); 656 res = inet_frags_init_net(&net->nf_frag.frags);
656 657 if (res)
657 return nf_ct_frag6_sysctl_register(net); 658 return res;
659 res = nf_ct_frag6_sysctl_register(net);
660 if (res)
661 inet_frags_uninit_net(&net->nf_frag.frags);
662 return res;
658} 663}
659 664
660static void nf_ct_net_exit(struct net *net) 665static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 6d9c0b3d5b8c..4fdbed5ebfb6 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -51,7 +51,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
51 return IP6_DEFRAG_CONNTRACK_OUT + zone_id; 51 return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
52} 52}
53 53
54static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, 54static unsigned int ipv6_defrag(void *priv,
55 struct sk_buff *skb, 55 struct sk_buff *skb,
56 const struct nf_hook_state *state) 56 const struct nf_hook_state *state)
57{ 57{
@@ -63,7 +63,8 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
63 return NF_ACCEPT; 63 return NF_ACCEPT;
64#endif 64#endif
65 65
66 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb)); 66 reasm = nf_ct_frag6_gather(state->net, skb,
67 nf_ct6_defrag_user(state->hook, skb));
67 /* queued */ 68 /* queued */
68 if (reasm == NULL) 69 if (reasm == NULL)
69 return NF_STOLEN; 70 return NF_STOLEN;
@@ -74,7 +75,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
74 75
75 nf_ct_frag6_consume_orig(reasm); 76 nf_ct_frag6_consume_orig(reasm);
76 77
77 NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm, 78 NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm,
78 state->in, state->out, 79 state->in, state->out,
79 state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); 80 state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
80 81
@@ -84,14 +85,12 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
84static struct nf_hook_ops ipv6_defrag_ops[] = { 85static struct nf_hook_ops ipv6_defrag_ops[] = {
85 { 86 {
86 .hook = ipv6_defrag, 87 .hook = ipv6_defrag,
87 .owner = THIS_MODULE,
88 .pf = NFPROTO_IPV6, 88 .pf = NFPROTO_IPV6,
89 .hooknum = NF_INET_PRE_ROUTING, 89 .hooknum = NF_INET_PRE_ROUTING,
90 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, 90 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
91 }, 91 },
92 { 92 {
93 .hook = ipv6_defrag, 93 .hook = ipv6_defrag,
94 .owner = THIS_MODULE,
95 .pf = NFPROTO_IPV6, 94 .pf = NFPROTO_IPV6,
96 .hooknum = NF_INET_LOCAL_OUT, 95 .hooknum = NF_INET_LOCAL_OUT,
97 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, 96 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index c8ab626556a0..6989c70ae29f 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -19,25 +19,10 @@
19#include <net/netfilter/nf_conntrack.h> 19#include <net/netfilter/nf_conntrack.h>
20#endif 20#endif
21 21
22static struct net *pick_net(struct sk_buff *skb) 22static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
23{ 23 const struct in6_addr *gw, int oif)
24#ifdef CONFIG_NET_NS
25 const struct dst_entry *dst;
26
27 if (skb->dev != NULL)
28 return dev_net(skb->dev);
29 dst = skb_dst(skb);
30 if (dst != NULL && dst->dev != NULL)
31 return dev_net(dst->dev);
32#endif
33 return &init_net;
34}
35
36static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
37 int oif)
38{ 24{
39 const struct ipv6hdr *iph = ipv6_hdr(skb); 25 const struct ipv6hdr *iph = ipv6_hdr(skb);
40 struct net *net = pick_net(skb);
41 struct dst_entry *dst; 26 struct dst_entry *dst;
42 struct flowi6 fl6; 27 struct flowi6 fl6;
43 28
@@ -61,7 +46,7 @@ static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
61 return true; 46 return true;
62} 47}
63 48
64void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, 49void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
65 const struct in6_addr *gw, int oif) 50 const struct in6_addr *gw, int oif)
66{ 51{
67 if (this_cpu_read(nf_skb_duplicated)) 52 if (this_cpu_read(nf_skb_duplicated))
@@ -81,9 +66,9 @@ void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
81 struct ipv6hdr *iph = ipv6_hdr(skb); 66 struct ipv6hdr *iph = ipv6_hdr(skb);
82 --iph->hop_limit; 67 --iph->hop_limit;
83 } 68 }
84 if (nf_dup_ipv6_route(skb, gw, oif)) { 69 if (nf_dup_ipv6_route(net, skb, gw, oif)) {
85 __this_cpu_write(nf_skb_duplicated, true); 70 __this_cpu_write(nf_skb_duplicated, true);
86 ip6_local_out(skb); 71 ip6_local_out(net, skb->sk, skb);
87 __this_cpu_write(nf_skb_duplicated, false); 72 __this_cpu_write(nf_skb_duplicated, false);
88 } else { 73 } else {
89 kfree_skb(skb); 74 kfree_skb(skb);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 70fbaed49edb..238e70c3f7b7 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -262,9 +262,9 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
262EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); 262EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
263 263
264unsigned int 264unsigned int
265nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 265nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
266 const struct nf_hook_state *state, 266 const struct nf_hook_state *state,
267 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 267 unsigned int (*do_chain)(void *priv,
268 struct sk_buff *skb, 268 struct sk_buff *skb,
269 const struct nf_hook_state *state, 269 const struct nf_hook_state *state,
270 struct nf_conn *ct)) 270 struct nf_conn *ct))
@@ -272,7 +272,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
272 struct nf_conn *ct; 272 struct nf_conn *ct;
273 enum ip_conntrack_info ctinfo; 273 enum ip_conntrack_info ctinfo;
274 struct nf_conn_nat *nat; 274 struct nf_conn_nat *nat;
275 enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); 275 enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
276 __be16 frag_off; 276 __be16 frag_off;
277 int hdrlen; 277 int hdrlen;
278 u8 nexthdr; 278 u8 nexthdr;
@@ -303,7 +303,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
303 303
304 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 304 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
305 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, 305 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
306 ops->hooknum, 306 state->hook,
307 hdrlen)) 307 hdrlen))
308 return NF_DROP; 308 return NF_DROP;
309 else 309 else
@@ -317,21 +317,21 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
317 if (!nf_nat_initialized(ct, maniptype)) { 317 if (!nf_nat_initialized(ct, maniptype)) {
318 unsigned int ret; 318 unsigned int ret;
319 319
320 ret = do_chain(ops, skb, state, ct); 320 ret = do_chain(priv, skb, state, ct);
321 if (ret != NF_ACCEPT) 321 if (ret != NF_ACCEPT)
322 return ret; 322 return ret;
323 323
324 if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) 324 if (nf_nat_initialized(ct, HOOK2MANIP(state->hook)))
325 break; 325 break;
326 326
327 ret = nf_nat_alloc_null_binding(ct, ops->hooknum); 327 ret = nf_nat_alloc_null_binding(ct, state->hook);
328 if (ret != NF_ACCEPT) 328 if (ret != NF_ACCEPT)
329 return ret; 329 return ret;
330 } else { 330 } else {
331 pr_debug("Already setup manip %s for ct %p\n", 331 pr_debug("Already setup manip %s for ct %p\n",
332 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 332 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
333 ct); 333 ct);
334 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) 334 if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
335 goto oif_changed; 335 goto oif_changed;
336 } 336 }
337 break; 337 break;
@@ -340,11 +340,11 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
340 /* ESTABLISHED */ 340 /* ESTABLISHED */
341 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 341 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
342 ctinfo == IP_CT_ESTABLISHED_REPLY); 342 ctinfo == IP_CT_ESTABLISHED_REPLY);
343 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) 343 if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
344 goto oif_changed; 344 goto oif_changed;
345 } 345 }
346 346
347 return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); 347 return nf_nat_packet(ct, ctinfo, state->hook, skb);
348 348
349oif_changed: 349oif_changed:
350 nf_ct_kill_acct(ct, ctinfo, skb); 350 nf_ct_kill_acct(ct, ctinfo, skb);
@@ -353,9 +353,9 @@ oif_changed:
353EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn); 353EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
354 354
355unsigned int 355unsigned int
356nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, 356nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
357 const struct nf_hook_state *state, 357 const struct nf_hook_state *state,
358 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 358 unsigned int (*do_chain)(void *priv,
359 struct sk_buff *skb, 359 struct sk_buff *skb,
360 const struct nf_hook_state *state, 360 const struct nf_hook_state *state,
361 struct nf_conn *ct)) 361 struct nf_conn *ct))
@@ -363,7 +363,7 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
363 unsigned int ret; 363 unsigned int ret;
364 struct in6_addr daddr = ipv6_hdr(skb)->daddr; 364 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
365 365
366 ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); 366 ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
367 if (ret != NF_DROP && ret != NF_STOLEN && 367 if (ret != NF_DROP && ret != NF_STOLEN &&
368 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) 368 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
369 skb_dst_drop(skb); 369 skb_dst_drop(skb);
@@ -373,9 +373,9 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
373EXPORT_SYMBOL_GPL(nf_nat_ipv6_in); 373EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
374 374
375unsigned int 375unsigned int
376nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, 376nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
377 const struct nf_hook_state *state, 377 const struct nf_hook_state *state,
378 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 378 unsigned int (*do_chain)(void *priv,
379 struct sk_buff *skb, 379 struct sk_buff *skb,
380 const struct nf_hook_state *state, 380 const struct nf_hook_state *state,
381 struct nf_conn *ct)) 381 struct nf_conn *ct))
@@ -391,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
391 if (skb->len < sizeof(struct ipv6hdr)) 391 if (skb->len < sizeof(struct ipv6hdr))
392 return NF_ACCEPT; 392 return NF_ACCEPT;
393 393
394 ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); 394 ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
395#ifdef CONFIG_XFRM 395#ifdef CONFIG_XFRM
396 if (ret != NF_DROP && ret != NF_STOLEN && 396 if (ret != NF_DROP && ret != NF_STOLEN &&
397 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 397 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -403,7 +403,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
403 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 403 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
404 ct->tuplehash[dir].tuple.src.u.all != 404 ct->tuplehash[dir].tuple.src.u.all !=
405 ct->tuplehash[!dir].tuple.dst.u.all)) { 405 ct->tuplehash[!dir].tuple.dst.u.all)) {
406 err = nf_xfrm_me_harder(skb, AF_INET6); 406 err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
407 if (err < 0) 407 if (err < 0)
408 ret = NF_DROP_ERR(err); 408 ret = NF_DROP_ERR(err);
409 } 409 }
@@ -414,9 +414,9 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
414EXPORT_SYMBOL_GPL(nf_nat_ipv6_out); 414EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
415 415
416unsigned int 416unsigned int
417nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 417nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
418 const struct nf_hook_state *state, 418 const struct nf_hook_state *state,
419 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 419 unsigned int (*do_chain)(void *priv,
420 struct sk_buff *skb, 420 struct sk_buff *skb,
421 const struct nf_hook_state *state, 421 const struct nf_hook_state *state,
422 struct nf_conn *ct)) 422 struct nf_conn *ct))
@@ -430,14 +430,14 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
430 if (skb->len < sizeof(struct ipv6hdr)) 430 if (skb->len < sizeof(struct ipv6hdr))
431 return NF_ACCEPT; 431 return NF_ACCEPT;
432 432
433 ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); 433 ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
434 if (ret != NF_DROP && ret != NF_STOLEN && 434 if (ret != NF_DROP && ret != NF_STOLEN &&
435 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 435 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
436 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 436 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
437 437
438 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, 438 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
439 &ct->tuplehash[!dir].tuple.src.u3)) { 439 &ct->tuplehash[!dir].tuple.src.u3)) {
440 err = ip6_route_me_harder(skb); 440 err = ip6_route_me_harder(state->net, skb);
441 if (err < 0) 441 if (err < 0)
442 ret = NF_DROP_ERR(err); 442 ret = NF_DROP_ERR(err);
443 } 443 }
@@ -446,7 +446,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
446 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 446 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
447 ct->tuplehash[dir].tuple.dst.u.all != 447 ct->tuplehash[dir].tuple.dst.u.all !=
448 ct->tuplehash[!dir].tuple.src.u.all) { 448 ct->tuplehash[!dir].tuple.src.u.all) {
449 err = nf_xfrm_me_harder(skb, AF_INET6); 449 err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
450 if (err < 0) 450 if (err < 0)
451 ret = NF_DROP_ERR(err); 451 ret = NF_DROP_ERR(err);
452 } 452 }
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 7745609665cd..31ba7ca19757 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -34,7 +34,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
34 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 34 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
35 ctinfo == IP_CT_RELATED_REPLY)); 35 ctinfo == IP_CT_RELATED_REPLY));
36 36
37 if (ipv6_dev_get_saddr(dev_net(out), out, 37 if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
38 &ipv6_hdr(skb)->daddr, 0, &src) < 0) 38 &ipv6_hdr(skb)->daddr, 0, &src) < 0)
39 return NF_DROP; 39 return NF_DROP;
40 40
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 94b4c6dfb400..e0f922b777e3 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -26,7 +26,7 @@ const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
26 int tcphoff; 26 int tcphoff;
27 27
28 proto = oip6h->nexthdr; 28 proto = oip6h->nexthdr;
29 tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), 29 tcphoff = ipv6_skip_exthdr(oldskb, ((u8 *)(oip6h + 1) - oldskb->data),
30 &proto, &frag_off); 30 &proto, &frag_off);
31 31
32 if ((tcphoff < 0) || (tcphoff > oldskb->len)) { 32 if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
@@ -206,7 +206,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
206 dev_queue_xmit(nskb); 206 dev_queue_xmit(nskb);
207 } else 207 } else
208#endif 208#endif
209 ip6_local_out(nskb); 209 ip6_local_out(net, nskb->sk, nskb);
210} 210}
211EXPORT_SYMBOL_GPL(nf_send_reset6); 211EXPORT_SYMBOL_GPL(nf_send_reset6);
212 212
@@ -224,7 +224,7 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
224 return true; 224 return true;
225 225
226 proto = ip6h->nexthdr; 226 proto = ip6h->nexthdr;
227 thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); 227 thoff = ipv6_skip_exthdr(skb, ((u8 *)(ip6h + 1) - skb->data), &proto, &fo);
228 228
229 if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) 229 if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
230 return false; 230 return false;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index c8148ba76d1a..120ea9131be0 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -16,20 +16,20 @@
16#include <net/netfilter/nf_tables.h> 16#include <net/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables_ipv6.h> 17#include <net/netfilter/nf_tables_ipv6.h>
18 18
19static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, 19static unsigned int nft_do_chain_ipv6(void *priv,
20 struct sk_buff *skb, 20 struct sk_buff *skb,
21 const struct nf_hook_state *state) 21 const struct nf_hook_state *state)
22{ 22{
23 struct nft_pktinfo pkt; 23 struct nft_pktinfo pkt;
24 24
25 /* malformed packet, drop it */ 25 /* malformed packet, drop it */
26 if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) 26 if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
27 return NF_DROP; 27 return NF_DROP;
28 28
29 return nft_do_chain(&pkt, ops); 29 return nft_do_chain(&pkt, priv);
30} 30}
31 31
32static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, 32static unsigned int nft_ipv6_output(void *priv,
33 struct sk_buff *skb, 33 struct sk_buff *skb,
34 const struct nf_hook_state *state) 34 const struct nf_hook_state *state)
35{ 35{
@@ -40,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
40 return NF_ACCEPT; 40 return NF_ACCEPT;
41 } 41 }
42 42
43 return nft_do_chain_ipv6(ops, skb, state); 43 return nft_do_chain_ipv6(priv, skb, state);
44} 44}
45 45
46struct nft_af_info nft_af_ipv6 __read_mostly = { 46struct nft_af_info nft_af_ipv6 __read_mostly = {
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 951bb458b7bd..443cd306c0b0 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -24,44 +24,44 @@
24#include <net/netfilter/nf_nat_l3proto.h> 24#include <net/netfilter/nf_nat_l3proto.h>
25#include <net/ipv6.h> 25#include <net/ipv6.h>
26 26
27static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, 27static unsigned int nft_nat_do_chain(void *priv,
28 struct sk_buff *skb, 28 struct sk_buff *skb,
29 const struct nf_hook_state *state, 29 const struct nf_hook_state *state,
30 struct nf_conn *ct) 30 struct nf_conn *ct)
31{ 31{
32 struct nft_pktinfo pkt; 32 struct nft_pktinfo pkt;
33 33
34 nft_set_pktinfo_ipv6(&pkt, ops, skb, state); 34 nft_set_pktinfo_ipv6(&pkt, skb, state);
35 35
36 return nft_do_chain(&pkt, ops); 36 return nft_do_chain(&pkt, priv);
37} 37}
38 38
39static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops, 39static unsigned int nft_nat_ipv6_fn(void *priv,
40 struct sk_buff *skb, 40 struct sk_buff *skb,
41 const struct nf_hook_state *state) 41 const struct nf_hook_state *state)
42{ 42{
43 return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain); 43 return nf_nat_ipv6_fn(priv, skb, state, nft_nat_do_chain);
44} 44}
45 45
46static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops, 46static unsigned int nft_nat_ipv6_in(void *priv,
47 struct sk_buff *skb, 47 struct sk_buff *skb,
48 const struct nf_hook_state *state) 48 const struct nf_hook_state *state)
49{ 49{
50 return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain); 50 return nf_nat_ipv6_in(priv, skb, state, nft_nat_do_chain);
51} 51}
52 52
53static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops, 53static unsigned int nft_nat_ipv6_out(void *priv,
54 struct sk_buff *skb, 54 struct sk_buff *skb,
55 const struct nf_hook_state *state) 55 const struct nf_hook_state *state)
56{ 56{
57 return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain); 57 return nf_nat_ipv6_out(priv, skb, state, nft_nat_do_chain);
58} 58}
59 59
60static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops, 60static unsigned int nft_nat_ipv6_local_fn(void *priv,
61 struct sk_buff *skb, 61 struct sk_buff *skb,
62 const struct nf_hook_state *state) 62 const struct nf_hook_state *state)
63{ 63{
64 return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain); 64 return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
65} 65}
66 66
67static const struct nf_chain_type nft_chain_nat_ipv6 = { 67static const struct nf_chain_type nft_chain_nat_ipv6 = {
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 0dafdaac5e17..71d995ff3108 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -22,7 +22,7 @@
22#include <net/netfilter/nf_tables_ipv6.h> 22#include <net/netfilter/nf_tables_ipv6.h>
23#include <net/route.h> 23#include <net/route.h>
24 24
25static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, 25static unsigned int nf_route_table_hook(void *priv,
26 struct sk_buff *skb, 26 struct sk_buff *skb,
27 const struct nf_hook_state *state) 27 const struct nf_hook_state *state)
28{ 28{
@@ -33,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
33 u32 mark, flowlabel; 33 u32 mark, flowlabel;
34 34
35 /* malformed packet, drop it */ 35 /* malformed packet, drop it */
36 if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) 36 if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
37 return NF_DROP; 37 return NF_DROP;
38 38
39 /* save source/dest address, mark, hoplimit, flowlabel, priority */ 39 /* save source/dest address, mark, hoplimit, flowlabel, priority */
@@ -45,14 +45,14 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
45 /* flowlabel and prio (includes version, which shouldn't change either */ 45 /* flowlabel and prio (includes version, which shouldn't change either */
46 flowlabel = *((u32 *)ipv6_hdr(skb)); 46 flowlabel = *((u32 *)ipv6_hdr(skb));
47 47
48 ret = nft_do_chain(&pkt, ops); 48 ret = nft_do_chain(&pkt, priv);
49 if (ret != NF_DROP && ret != NF_QUEUE && 49 if (ret != NF_DROP && ret != NF_QUEUE &&
50 (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || 50 (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
51 memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || 51 memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
52 skb->mark != mark || 52 skb->mark != mark ||
53 ipv6_hdr(skb)->hop_limit != hop_limit || 53 ipv6_hdr(skb)->hop_limit != hop_limit ||
54 flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) 54 flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
55 return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; 55 return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
56 56
57 return ret; 57 return ret;
58} 58}
@@ -61,11 +61,11 @@ static const struct nf_chain_type nft_chain_route_ipv6 = {
61 .name = "route", 61 .name = "route",
62 .type = NFT_CHAIN_T_ROUTE, 62 .type = NFT_CHAIN_T_ROUTE,
63 .family = NFPROTO_IPV6, 63 .family = NFPROTO_IPV6,
64 .owner = THIS_MODULE, 64 .owner = THIS_MODULE,
65 .hook_mask = (1 << NF_INET_LOCAL_OUT), 65 .hook_mask = (1 << NF_INET_LOCAL_OUT),
66 .hooks = { 66 .hooks = {
67 [NF_INET_LOCAL_OUT] = nf_route_table_hook, 67 [NF_INET_LOCAL_OUT] = nf_route_table_hook,
68 }, 68 },
69}; 69};
70 70
71static int __init nft_chain_route_init(void) 71static int __init nft_chain_route_init(void)
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 0eaa4f65fdea..8bfd470cbe72 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
28 struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr]; 28 struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
29 int oif = regs->data[priv->sreg_dev]; 29 int oif = regs->data[priv->sreg_dev];
30 30
31 nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif); 31 nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
32} 32}
33 33
34static int nft_dup_ipv6_init(const struct nft_ctx *ctx, 34static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index effd393bd517..aca44e89a881 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -35,8 +35,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
35 35
36 range.flags |= priv->flags; 36 range.flags |= priv->flags;
37 37
38 regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, 38 regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook);
39 pkt->ops->hooknum);
40} 39}
41 40
42static struct nft_expr_type nft_redir_ipv6_type; 41static struct nft_expr_type nft_redir_ipv6_type;
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index d0d1540ecf87..533cd5719c59 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -24,15 +24,14 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
24 const struct nft_pktinfo *pkt) 24 const struct nft_pktinfo *pkt)
25{ 25{
26 struct nft_reject *priv = nft_expr_priv(expr); 26 struct nft_reject *priv = nft_expr_priv(expr);
27 struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
28 27
29 switch (priv->type) { 28 switch (priv->type) {
30 case NFT_REJECT_ICMP_UNREACH: 29 case NFT_REJECT_ICMP_UNREACH:
31 nf_send_unreach6(net, pkt->skb, priv->icmp_code, 30 nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
32 pkt->ops->hooknum); 31 pkt->hook);
33 break; 32 break;
34 case NFT_REJECT_TCP_RST: 33 case NFT_REJECT_TCP_RST:
35 nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); 34 nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
36 break; 35 break;
37 default: 36 default:
38 break; 37 break;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 928a0fb0b744..462f2a76b5c2 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -138,7 +138,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
138EXPORT_SYMBOL(ip6_dst_hoplimit); 138EXPORT_SYMBOL(ip6_dst_hoplimit);
139#endif 139#endif
140 140
141static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) 141int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
142{ 142{
143 int len; 143 int len;
144 144
@@ -148,30 +148,20 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
148 ipv6_hdr(skb)->payload_len = htons(len); 148 ipv6_hdr(skb)->payload_len = htons(len);
149 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); 149 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
150 150
151 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, 151 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
152 NULL, skb_dst(skb)->dev, dst_output_sk); 152 net, sk, skb, NULL, skb_dst(skb)->dev,
153} 153 dst_output);
154
155int __ip6_local_out(struct sk_buff *skb)
156{
157 return __ip6_local_out_sk(skb->sk, skb);
158} 154}
159EXPORT_SYMBOL_GPL(__ip6_local_out); 155EXPORT_SYMBOL_GPL(__ip6_local_out);
160 156
161int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) 157int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
162{ 158{
163 int err; 159 int err;
164 160
165 err = __ip6_local_out_sk(sk, skb); 161 err = __ip6_local_out(net, sk, skb);
166 if (likely(err == 1)) 162 if (likely(err == 1))
167 err = dst_output_sk(sk, skb); 163 err = dst_output(net, sk, skb);
168 164
169 return err; 165 return err;
170} 166}
171EXPORT_SYMBOL_GPL(ip6_local_out_sk);
172
173int ip6_local_out(struct sk_buff *skb)
174{
175 return ip6_local_out_sk(skb->sk, skb);
176}
177EXPORT_SYMBOL_GPL(ip6_local_out); 167EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fdbada1569a3..dc65ec198f7c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -614,6 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
614 unsigned int flags) 614 unsigned int flags)
615{ 615{
616 struct ipv6_pinfo *np = inet6_sk(sk); 616 struct ipv6_pinfo *np = inet6_sk(sk);
617 struct net *net = sock_net(sk);
617 struct ipv6hdr *iph; 618 struct ipv6hdr *iph;
618 struct sk_buff *skb; 619 struct sk_buff *skb;
619 int err; 620 int err;
@@ -652,9 +653,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
652 if (err) 653 if (err)
653 goto error_fault; 654 goto error_fault;
654 655
655 IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 656 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
656 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, 657 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
657 NULL, rt->dst.dev, dst_output_sk); 658 NULL, rt->dst.dev, dst_output);
658 if (err > 0) 659 if (err > 0)
659 err = net_xmit_errno(err); 660 err = net_xmit_errno(err);
660 if (err) 661 if (err)
@@ -666,7 +667,7 @@ error_fault:
666 err = -EFAULT; 667 err = -EFAULT;
667 kfree_skb(skb); 668 kfree_skb(skb);
668error: 669error:
669 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 670 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
670 if (err == -ENOBUFS && !np->recverr) 671 if (err == -ENOBUFS && !np->recverr)
671 err = 0; 672 err = 0;
672 return err; 673 return err;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f1159bb76e0a..44e21a03cfc3 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -706,13 +706,19 @@ static void ip6_frags_sysctl_unregister(void)
706 706
707static int __net_init ipv6_frags_init_net(struct net *net) 707static int __net_init ipv6_frags_init_net(struct net *net)
708{ 708{
709 int res;
710
709 net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; 711 net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
710 net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; 712 net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
711 net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; 713 net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
712 714
713 inet_frags_init_net(&net->ipv6.frags); 715 res = inet_frags_init_net(&net->ipv6.frags);
714 716 if (res)
715 return ip6_frags_ns_sysctl_register(net); 717 return res;
718 res = ip6_frags_ns_sysctl_register(net);
719 if (res)
720 inet_frags_uninit_net(&net->ipv6.frags);
721 return res;
716} 722}
717 723
718static void __net_exit ipv6_frags_exit_net(struct net *net) 724static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cb32ce250db0..6f01fe122abd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
61#include <net/nexthop.h> 61#include <net/nexthop.h>
62#include <net/lwtunnel.h> 62#include <net/lwtunnel.h>
63#include <net/ip_tunnels.h> 63#include <net/ip_tunnels.h>
64#include <net/l3mdev.h>
64 65
65#include <asm/uaccess.h> 66#include <asm/uaccess.h>
66 67
@@ -86,9 +87,9 @@ static void ip6_dst_ifdown(struct dst_entry *,
86static int ip6_dst_gc(struct dst_ops *ops); 87static int ip6_dst_gc(struct dst_ops *ops);
87 88
88static int ip6_pkt_discard(struct sk_buff *skb); 89static int ip6_pkt_discard(struct sk_buff *skb);
89static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); 90static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
90static int ip6_pkt_prohibit(struct sk_buff *skb); 91static int ip6_pkt_prohibit(struct sk_buff *skb);
91static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); 92static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92static void ip6_link_failure(struct sk_buff *skb); 93static void ip6_link_failure(struct sk_buff *skb);
93static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 94static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb, u32 mtu); 95 struct sk_buff *skb, u32 mtu);
@@ -142,6 +143,9 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
142 struct net_device *loopback_dev = net->loopback_dev; 143 struct net_device *loopback_dev = net->loopback_dev;
143 int cpu; 144 int cpu;
144 145
146 if (dev == loopback_dev)
147 return;
148
145 for_each_possible_cpu(cpu) { 149 for_each_possible_cpu(cpu) {
146 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 150 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
147 struct rt6_info *rt; 151 struct rt6_info *rt;
@@ -151,14 +155,12 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
151 struct inet6_dev *rt_idev = rt->rt6i_idev; 155 struct inet6_dev *rt_idev = rt->rt6i_idev;
152 struct net_device *rt_dev = rt->dst.dev; 156 struct net_device *rt_dev = rt->dst.dev;
153 157
154 if (rt_idev && (rt_idev->dev == dev || !dev) && 158 if (rt_idev->dev == dev) {
155 rt_idev->dev != loopback_dev) {
156 rt->rt6i_idev = in6_dev_get(loopback_dev); 159 rt->rt6i_idev = in6_dev_get(loopback_dev);
157 in6_dev_put(rt_idev); 160 in6_dev_put(rt_idev);
158 } 161 }
159 162
160 if (rt_dev && (rt_dev == dev || !dev) && 163 if (rt_dev == dev) {
161 rt_dev != loopback_dev) {
162 rt->dst.dev = loopback_dev; 164 rt->dst.dev = loopback_dev;
163 dev_hold(rt->dst.dev); 165 dev_hold(rt->dst.dev);
164 dev_put(rt_dev); 166 dev_put(rt_dev);
@@ -247,12 +249,6 @@ static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
247{ 249{
248} 250}
249 251
250static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
251 unsigned long old)
252{
253 return NULL;
254}
255
256static struct dst_ops ip6_dst_blackhole_ops = { 252static struct dst_ops ip6_dst_blackhole_ops = {
257 .family = AF_INET6, 253 .family = AF_INET6,
258 .destroy = ip6_dst_destroy, 254 .destroy = ip6_dst_destroy,
@@ -261,7 +257,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
261 .default_advmss = ip6_default_advmss, 257 .default_advmss = ip6_default_advmss,
262 .update_pmtu = ip6_rt_blackhole_update_pmtu, 258 .update_pmtu = ip6_rt_blackhole_update_pmtu,
263 .redirect = ip6_rt_blackhole_redirect, 259 .redirect = ip6_rt_blackhole_redirect,
264 .cow_metrics = ip6_rt_blackhole_cow_metrics, 260 .cow_metrics = dst_cow_metrics_generic,
265 .neigh_lookup = ip6_neigh_lookup, 261 .neigh_lookup = ip6_neigh_lookup,
266}; 262};
267 263
@@ -308,7 +304,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
308 .obsolete = DST_OBSOLETE_FORCE_CHK, 304 .obsolete = DST_OBSOLETE_FORCE_CHK,
309 .error = -EINVAL, 305 .error = -EINVAL,
310 .input = dst_discard, 306 .input = dst_discard,
311 .output = dst_discard_sk, 307 .output = dst_discard_out,
312 }, 308 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 309 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
314 .rt6i_protocol = RTPROT_KERNEL, 310 .rt6i_protocol = RTPROT_KERNEL,
@@ -318,6 +314,15 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
318 314
319#endif 315#endif
320 316
317static void rt6_info_init(struct rt6_info *rt)
318{
319 struct dst_entry *dst = &rt->dst;
320
321 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
322 INIT_LIST_HEAD(&rt->rt6i_siblings);
323 INIT_LIST_HEAD(&rt->rt6i_uncached);
324}
325
321/* allocate dst with ip6_dst_ops */ 326/* allocate dst with ip6_dst_ops */
322static struct rt6_info *__ip6_dst_alloc(struct net *net, 327static struct rt6_info *__ip6_dst_alloc(struct net *net,
323 struct net_device *dev, 328 struct net_device *dev,
@@ -326,13 +331,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
326 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 331 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
327 0, DST_OBSOLETE_FORCE_CHK, flags); 332 0, DST_OBSOLETE_FORCE_CHK, flags);
328 333
329 if (rt) { 334 if (rt)
330 struct dst_entry *dst = &rt->dst; 335 rt6_info_init(rt);
331 336
332 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
333 INIT_LIST_HEAD(&rt->rt6i_siblings);
334 INIT_LIST_HEAD(&rt->rt6i_uncached);
335 }
336 return rt; 337 return rt;
337} 338}
338 339
@@ -403,6 +404,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
403 } 404 }
404} 405}
405 406
407static bool __rt6_check_expired(const struct rt6_info *rt)
408{
409 if (rt->rt6i_flags & RTF_EXPIRES)
410 return time_after(jiffies, rt->dst.expires);
411 else
412 return false;
413}
414
406static bool rt6_check_expired(const struct rt6_info *rt) 415static bool rt6_check_expired(const struct rt6_info *rt)
407{ 416{
408 if (rt->rt6i_flags & RTF_EXPIRES) { 417 if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -421,31 +430,7 @@ static bool rt6_check_expired(const struct rt6_info *rt)
421static int rt6_info_hash_nhsfn(unsigned int candidate_count, 430static int rt6_info_hash_nhsfn(unsigned int candidate_count,
422 const struct flowi6 *fl6) 431 const struct flowi6 *fl6)
423{ 432{
424 unsigned int val = fl6->flowi6_proto; 433 return get_hash_from_flowi6(fl6) % candidate_count;
425
426 val ^= ipv6_addr_hash(&fl6->daddr);
427 val ^= ipv6_addr_hash(&fl6->saddr);
428
429 /* Work only if this not encapsulated */
430 switch (fl6->flowi6_proto) {
431 case IPPROTO_UDP:
432 case IPPROTO_TCP:
433 case IPPROTO_SCTP:
434 val ^= (__force u16)fl6->fl6_sport;
435 val ^= (__force u16)fl6->fl6_dport;
436 break;
437
438 case IPPROTO_ICMPV6:
439 val ^= (__force u16)fl6->fl6_icmp_type;
440 val ^= (__force u16)fl6->fl6_icmp_code;
441 break;
442 }
443 /* RFC6438 recommands to use flowlabel */
444 val ^= (__force u32)fl6->flowlabel;
445
446 /* Perhaps, we need to tune, this function? */
447 val = val ^ (val >> 7) ^ (val >> 12);
448 return val % candidate_count;
449} 434}
450 435
451static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 436static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
@@ -498,10 +483,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
498 if (dev->flags & IFF_LOOPBACK) { 483 if (dev->flags & IFF_LOOPBACK) {
499 if (!sprt->rt6i_idev || 484 if (!sprt->rt6i_idev ||
500 sprt->rt6i_idev->dev->ifindex != oif) { 485 sprt->rt6i_idev->dev->ifindex != oif) {
501 if (flags & RT6_LOOKUP_F_IFACE && oif) 486 if (flags & RT6_LOOKUP_F_IFACE)
502 continue; 487 continue;
503 if (local && (!oif || 488 if (local &&
504 local->rt6i_idev->dev->ifindex == oif)) 489 local->rt6i_idev->dev->ifindex == oif)
505 continue; 490 continue;
506 } 491 }
507 local = sprt; 492 local = sprt;
@@ -538,7 +523,7 @@ static void rt6_probe_deferred(struct work_struct *w)
538 container_of(w, struct __rt6_probe_work, work); 523 container_of(w, struct __rt6_probe_work, work);
539 524
540 addrconf_addr_solict_mult(&work->target, &mcaddr); 525 addrconf_addr_solict_mult(&work->target, &mcaddr);
541 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL); 526 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
542 dev_put(work->dev); 527 dev_put(work->dev);
543 kfree(work); 528 kfree(work);
544} 529}
@@ -1068,6 +1053,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1068 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1053 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1069 saved_fn = fn; 1054 saved_fn = fn;
1070 1055
1056 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1057 oif = 0;
1058
1071redo_rt6_select: 1059redo_rt6_select:
1072 rt = rt6_select(fn, oif, strict); 1060 rt = rt6_select(fn, oif, strict);
1073 if (rt->rt6i_nsiblings) 1061 if (rt->rt6i_nsiblings)
@@ -1165,7 +1153,7 @@ void ip6_route_input(struct sk_buff *skb)
1165 int flags = RT6_LOOKUP_F_HAS_SADDR; 1153 int flags = RT6_LOOKUP_F_HAS_SADDR;
1166 struct ip_tunnel_info *tun_info; 1154 struct ip_tunnel_info *tun_info;
1167 struct flowi6 fl6 = { 1155 struct flowi6 fl6 = {
1168 .flowi6_iif = skb->dev->ifindex, 1156 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1169 .daddr = iph->daddr, 1157 .daddr = iph->daddr,
1170 .saddr = iph->saddr, 1158 .saddr = iph->saddr,
1171 .flowlabel = ip6_flowinfo(iph), 1159 .flowlabel = ip6_flowinfo(iph),
@@ -1189,15 +1177,22 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
1189struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, 1177struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1190 struct flowi6 *fl6) 1178 struct flowi6 *fl6)
1191{ 1179{
1180 struct dst_entry *dst;
1192 int flags = 0; 1181 int flags = 0;
1182 bool any_src;
1183
1184 dst = l3mdev_rt6_dst_by_oif(net, fl6);
1185 if (dst)
1186 return dst;
1193 1187
1194 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1188 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1195 1189
1190 any_src = ipv6_addr_any(&fl6->saddr);
1196 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 1191 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1197 fl6->flowi6_oif) 1192 (fl6->flowi6_oif && any_src))
1198 flags |= RT6_LOOKUP_F_IFACE; 1193 flags |= RT6_LOOKUP_F_IFACE;
1199 1194
1200 if (!ipv6_addr_any(&fl6->saddr)) 1195 if (!any_src)
1201 flags |= RT6_LOOKUP_F_HAS_SADDR; 1196 flags |= RT6_LOOKUP_F_HAS_SADDR;
1202 else if (sk) 1197 else if (sk)
1203 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1198 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
@@ -1213,24 +1208,20 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1213 1208
1214 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 1209 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1215 if (rt) { 1210 if (rt) {
1216 new = &rt->dst; 1211 rt6_info_init(rt);
1217
1218 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1219 1212
1213 new = &rt->dst;
1220 new->__use = 1; 1214 new->__use = 1;
1221 new->input = dst_discard; 1215 new->input = dst_discard;
1222 new->output = dst_discard_sk; 1216 new->output = dst_discard_out;
1223 1217
1224 if (dst_metrics_read_only(&ort->dst)) 1218 dst_copy_metrics(new, &ort->dst);
1225 new->_metrics = ort->dst._metrics;
1226 else
1227 dst_copy_metrics(new, &ort->dst);
1228 rt->rt6i_idev = ort->rt6i_idev; 1219 rt->rt6i_idev = ort->rt6i_idev;
1229 if (rt->rt6i_idev) 1220 if (rt->rt6i_idev)
1230 in6_dev_hold(rt->rt6i_idev); 1221 in6_dev_hold(rt->rt6i_idev);
1231 1222
1232 rt->rt6i_gateway = ort->rt6i_gateway; 1223 rt->rt6i_gateway = ort->rt6i_gateway;
1233 rt->rt6i_flags = ort->rt6i_flags; 1224 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1234 rt->rt6i_metric = 0; 1225 rt->rt6i_metric = 0;
1235 1226
1236 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1227 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -1269,7 +1260,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1269 1260
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1261static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271{ 1262{
1272 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1263 if (!__rt6_check_expired(rt) &&
1264 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1273 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1265 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1274 return &rt->dst; 1266 return &rt->dst;
1275 else 1267 else
@@ -1289,7 +1281,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1289 1281
1290 rt6_dst_from_metrics_check(rt); 1282 rt6_dst_from_metrics_check(rt);
1291 1283
1292 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) 1284 if (rt->rt6i_flags & RTF_PCPU ||
1285 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1293 return rt6_dst_from_check(rt, cookie); 1286 return rt6_dst_from_check(rt, cookie);
1294 else 1287 else
1295 return rt6_check(rt, cookie); 1288 return rt6_check(rt, cookie);
@@ -1339,6 +1332,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1339 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 1332 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1340} 1333}
1341 1334
1335static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1336{
1337 return !(rt->rt6i_flags & RTF_CACHE) &&
1338 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1339}
1340
1342static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1341static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1343 const struct ipv6hdr *iph, u32 mtu) 1342 const struct ipv6hdr *iph, u32 mtu)
1344{ 1343{
@@ -1352,7 +1351,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1352 if (mtu >= dst_mtu(dst)) 1351 if (mtu >= dst_mtu(dst))
1353 return; 1352 return;
1354 1353
1355 if (rt6->rt6i_flags & RTF_CACHE) { 1354 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1356 rt6_do_update_pmtu(rt6, mtu); 1355 rt6_do_update_pmtu(rt6, mtu);
1357 } else { 1356 } else {
1358 const struct in6_addr *daddr, *saddr; 1357 const struct in6_addr *daddr, *saddr;
@@ -1748,21 +1747,21 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
1748 return -EINVAL; 1747 return -EINVAL;
1749} 1748}
1750 1749
1751int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) 1750static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1752{ 1751{
1753 int err;
1754 struct net *net = cfg->fc_nlinfo.nl_net; 1752 struct net *net = cfg->fc_nlinfo.nl_net;
1755 struct rt6_info *rt = NULL; 1753 struct rt6_info *rt = NULL;
1756 struct net_device *dev = NULL; 1754 struct net_device *dev = NULL;
1757 struct inet6_dev *idev = NULL; 1755 struct inet6_dev *idev = NULL;
1758 struct fib6_table *table; 1756 struct fib6_table *table;
1759 int addr_type; 1757 int addr_type;
1758 int err = -EINVAL;
1760 1759
1761 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1760 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1762 return -EINVAL; 1761 goto out;
1763#ifndef CONFIG_IPV6_SUBTREES 1762#ifndef CONFIG_IPV6_SUBTREES
1764 if (cfg->fc_src_len) 1763 if (cfg->fc_src_len)
1765 return -EINVAL; 1764 goto out;
1766#endif 1765#endif
1767 if (cfg->fc_ifindex) { 1766 if (cfg->fc_ifindex) {
1768 err = -ENODEV; 1767 err = -ENODEV;
@@ -1877,7 +1876,7 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1877 switch (cfg->fc_type) { 1876 switch (cfg->fc_type) {
1878 case RTN_BLACKHOLE: 1877 case RTN_BLACKHOLE:
1879 rt->dst.error = -EINVAL; 1878 rt->dst.error = -EINVAL;
1880 rt->dst.output = dst_discard_sk; 1879 rt->dst.output = dst_discard_out;
1881 rt->dst.input = dst_discard; 1880 rt->dst.input = dst_discard;
1882 break; 1881 break;
1883 case RTN_PROHIBIT: 1882 case RTN_PROHIBIT:
@@ -1982,9 +1981,7 @@ install_route:
1982 1981
1983 cfg->fc_nlinfo.nl_net = dev_net(dev); 1982 cfg->fc_nlinfo.nl_net = dev_net(dev);
1984 1983
1985 *rt_ret = rt; 1984 return rt;
1986
1987 return 0;
1988out: 1985out:
1989 if (dev) 1986 if (dev)
1990 dev_put(dev); 1987 dev_put(dev);
@@ -1993,20 +1990,21 @@ out:
1993 if (rt) 1990 if (rt)
1994 dst_free(&rt->dst); 1991 dst_free(&rt->dst);
1995 1992
1996 *rt_ret = NULL; 1993 return ERR_PTR(err);
1997
1998 return err;
1999} 1994}
2000 1995
2001int ip6_route_add(struct fib6_config *cfg) 1996int ip6_route_add(struct fib6_config *cfg)
2002{ 1997{
2003 struct mx6_config mxc = { .mx = NULL, }; 1998 struct mx6_config mxc = { .mx = NULL, };
2004 struct rt6_info *rt = NULL; 1999 struct rt6_info *rt;
2005 int err; 2000 int err;
2006 2001
2007 err = ip6_route_info_create(cfg, &rt); 2002 rt = ip6_route_info_create(cfg);
2008 if (err) 2003 if (IS_ERR(rt)) {
2004 err = PTR_ERR(rt);
2005 rt = NULL;
2009 goto out; 2006 goto out;
2007 }
2010 2008
2011 err = ip6_convert_metrics(&mxc, cfg); 2009 err = ip6_convert_metrics(&mxc, cfg);
2012 if (err) 2010 if (err)
@@ -2098,7 +2096,6 @@ static int ip6_route_del(struct fib6_config *cfg)
2098 2096
2099static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 2097static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2100{ 2098{
2101 struct net *net = dev_net(skb->dev);
2102 struct netevent_redirect netevent; 2099 struct netevent_redirect netevent;
2103 struct rt6_info *rt, *nrt = NULL; 2100 struct rt6_info *rt, *nrt = NULL;
2104 struct ndisc_options ndopts; 2101 struct ndisc_options ndopts;
@@ -2159,7 +2156,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
2159 } 2156 }
2160 2157
2161 rt = (struct rt6_info *) dst; 2158 rt = (struct rt6_info *) dst;
2162 if (rt == net->ipv6.ip6_null_entry) { 2159 if (rt->rt6i_flags & RTF_REJECT) {
2163 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 2160 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2164 return; 2161 return;
2165 } 2162 }
@@ -2288,7 +2285,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
2288 unsigned int pref) 2285 unsigned int pref)
2289{ 2286{
2290 struct fib6_config cfg = { 2287 struct fib6_config cfg = {
2291 .fc_table = RT6_TABLE_INFO,
2292 .fc_metric = IP6_RT_PRIO_USER, 2288 .fc_metric = IP6_RT_PRIO_USER,
2293 .fc_ifindex = ifindex, 2289 .fc_ifindex = ifindex,
2294 .fc_dst_len = prefixlen, 2290 .fc_dst_len = prefixlen,
@@ -2299,6 +2295,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
2299 .fc_nlinfo.nl_net = net, 2295 .fc_nlinfo.nl_net = net,
2300 }; 2296 };
2301 2297
2298 cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2302 cfg.fc_dst = *prefix; 2299 cfg.fc_dst = *prefix;
2303 cfg.fc_gateway = *gwaddr; 2300 cfg.fc_gateway = *gwaddr;
2304 2301
@@ -2339,7 +2336,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2339 unsigned int pref) 2336 unsigned int pref)
2340{ 2337{
2341 struct fib6_config cfg = { 2338 struct fib6_config cfg = {
2342 .fc_table = RT6_TABLE_DFLT, 2339 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2343 .fc_metric = IP6_RT_PRIO_USER, 2340 .fc_metric = IP6_RT_PRIO_USER,
2344 .fc_ifindex = dev->ifindex, 2341 .fc_ifindex = dev->ifindex,
2345 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 2342 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2386,7 +2383,8 @@ static void rtmsg_to_fib6_config(struct net *net,
2386{ 2383{
2387 memset(cfg, 0, sizeof(*cfg)); 2384 memset(cfg, 0, sizeof(*cfg));
2388 2385
2389 cfg->fc_table = RT6_TABLE_MAIN; 2386 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2387 : RT6_TABLE_MAIN;
2390 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 2388 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2391 cfg->fc_metric = rtmsg->rtmsg_metric; 2389 cfg->fc_metric = rtmsg->rtmsg_metric;
2392 cfg->fc_expires = rtmsg->rtmsg_info; 2390 cfg->fc_expires = rtmsg->rtmsg_info;
@@ -2470,7 +2468,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
2470 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2468 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2471} 2469}
2472 2470
2473static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) 2471static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2474{ 2472{
2475 skb->dev = skb_dst(skb)->dev; 2473 skb->dev = skb_dst(skb)->dev;
2476 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2474 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2481,7 +2479,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
2481 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2479 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2482} 2480}
2483 2481
2484static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) 2482static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2485{ 2483{
2486 skb->dev = skb_dst(skb)->dev; 2484 skb->dev = skb_dst(skb)->dev;
2487 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2485 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
@@ -2495,6 +2493,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2495 const struct in6_addr *addr, 2493 const struct in6_addr *addr,
2496 bool anycast) 2494 bool anycast)
2497{ 2495{
2496 u32 tb_id;
2498 struct net *net = dev_net(idev->dev); 2497 struct net *net = dev_net(idev->dev);
2499 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 2498 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2500 DST_NOCOUNT); 2499 DST_NOCOUNT);
@@ -2517,7 +2516,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2517 rt->rt6i_gateway = *addr; 2516 rt->rt6i_gateway = *addr;
2518 rt->rt6i_dst.addr = *addr; 2517 rt->rt6i_dst.addr = *addr;
2519 rt->rt6i_dst.plen = 128; 2518 rt->rt6i_dst.plen = 128;
2520 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2519 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2520 rt->rt6i_table = fib6_get_table(net, tb_id);
2521 rt->dst.flags |= DST_NOCACHE; 2521 rt->dst.flags |= DST_NOCACHE;
2522 2522
2523 atomic_set(&rt->dst.__refcnt, 1); 2523 atomic_set(&rt->dst.__refcnt, 1);
@@ -2622,7 +2622,8 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
2622 2622
2623 fib6_clean_all(net, fib6_ifdown, &adn); 2623 fib6_clean_all(net, fib6_ifdown, &adn);
2624 icmp6_clean_all(fib6_ifdown, &adn); 2624 icmp6_clean_all(fib6_ifdown, &adn);
2625 rt6_uncached_list_flush_dev(net, dev); 2625 if (dev)
2626 rt6_uncached_list_flush_dev(net, dev);
2626} 2627}
2627 2628
2628struct rt6_mtu_change_arg { 2629struct rt6_mtu_change_arg {
@@ -2895,9 +2896,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
2895 r_cfg.fc_encap_type = nla_get_u16(nla); 2896 r_cfg.fc_encap_type = nla_get_u16(nla);
2896 } 2897 }
2897 2898
2898 err = ip6_route_info_create(&r_cfg, &rt); 2899 rt = ip6_route_info_create(&r_cfg);
2899 if (err) 2900 if (IS_ERR(rt)) {
2901 err = PTR_ERR(rt);
2902 rt = NULL;
2900 goto cleanup; 2903 goto cleanup;
2904 }
2901 2905
2902 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 2906 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2903 if (err) { 2907 if (err) {
@@ -3276,6 +3280,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3276 } else { 3280 } else {
3277 fl6.flowi6_oif = oif; 3281 fl6.flowi6_oif = oif;
3278 3282
3283 if (netif_index_is_l3_master(net, oif)) {
3284 fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3285 FLOWI_FLAG_SKIP_NH_OIF;
3286 }
3287
3279 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 3288 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3280 } 3289 }
3281 3290
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 94428fd85b2f..dcccae86190f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev)
1394 return 0; 1394 return 0;
1395} 1395}
1396 1396
1397static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) 1397static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1398{ 1398{
1399 struct ip_tunnel *tunnel = netdev_priv(dev); 1399 struct ip_tunnel *tunnel = netdev_priv(dev);
1400 struct iphdr *iph = &tunnel->parms.iph; 1400 struct iphdr *iph = &tunnel->parms.iph;
1401 struct net *net = dev_net(dev); 1401 struct net *net = dev_net(dev);
1402 struct sit_net *sitn = net_generic(net, sit_net_id); 1402 struct sit_net *sitn = net_generic(net, sit_net_id);
1403 1403
1404 tunnel->dev = dev;
1405 tunnel->net = dev_net(dev);
1406
1407 iph->version = 4; 1404 iph->version = 4;
1408 iph->protocol = IPPROTO_IPV6; 1405 iph->protocol = IPPROTO_IPV6;
1409 iph->ihl = 5; 1406 iph->ihl = 5;
1410 iph->ttl = 64; 1407 iph->ttl = 64;
1411 1408
1412 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1413 if (!dev->tstats)
1414 return -ENOMEM;
1415
1416 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1417 if (!tunnel->dst_cache) {
1418 free_percpu(dev->tstats);
1419 return -ENOMEM;
1420 }
1421
1422 dev_hold(dev); 1409 dev_hold(dev);
1423 rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); 1410 rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
1424 return 0;
1425} 1411}
1426 1412
1427static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) 1413static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1831,23 +1817,19 @@ static int __net_init sit_init_net(struct net *net)
1831 */ 1817 */
1832 sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1818 sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1833 1819
1834 err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1835 if (err)
1836 goto err_dev_free;
1837
1838 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1839 err = register_netdev(sitn->fb_tunnel_dev); 1820 err = register_netdev(sitn->fb_tunnel_dev);
1840 if (err) 1821 if (err)
1841 goto err_reg_dev; 1822 goto err_reg_dev;
1842 1823
1824 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1825 ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1826
1843 t = netdev_priv(sitn->fb_tunnel_dev); 1827 t = netdev_priv(sitn->fb_tunnel_dev);
1844 1828
1845 strcpy(t->parms.name, sitn->fb_tunnel_dev->name); 1829 strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
1846 return 0; 1830 return 0;
1847 1831
1848err_reg_dev: 1832err_reg_dev:
1849 dev_put(sitn->fb_tunnel_dev);
1850err_dev_free:
1851 ipip6_dev_free(sitn->fb_tunnel_dev); 1833 ipip6_dev_free(sitn->fb_tunnel_dev);
1852err_alloc_dev: 1834err_alloc_dev:
1853 return err; 1835 return err;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 0909f4e0d53c..bb8f2fa1c7fb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -114,14 +114,11 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
114} 114}
115EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); 115EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
116 116
117__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) 117__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
118{ 118{
119 const struct ipv6hdr *iph = ipv6_hdr(skb); 119 const struct ipv6hdr *iph = ipv6_hdr(skb);
120 const struct tcphdr *th = tcp_hdr(skb); 120 const struct tcphdr *th = tcp_hdr(skb);
121 121
122 tcp_synq_overflow(sk);
123 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
124
125 return __cookie_v6_init_sequence(iph, th, mssp); 122 return __cookie_v6_init_sequence(iph, th, mssp);
126} 123}
127 124
@@ -173,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
173 goto out; 170 goto out;
174 171
175 ret = NULL; 172 ret = NULL;
176 req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); 173 req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
177 if (!req) 174 if (!req)
178 goto out; 175 goto out;
179 176
@@ -210,7 +207,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
210 ireq->wscale_ok = tcp_opt.wscale_ok; 207 ireq->wscale_ok = tcp_opt.wscale_ok;
211 ireq->tstamp_ok = tcp_opt.saw_tstamp; 208 ireq->tstamp_ok = tcp_opt.saw_tstamp;
212 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; 209 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
213 treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; 210 treq->snt_synack.v64 = 0;
214 treq->rcv_isn = ntohl(th->seq) - 1; 211 treq->rcv_isn = ntohl(th->seq) - 1;
215 treq->snt_isn = cookie; 212 treq->snt_isn = cookie;
216 213
@@ -238,9 +235,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
238 goto out_free; 235 goto out_free;
239 } 236 }
240 237
241 req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); 238 req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
242 tcp_select_initial_window(tcp_full_space(sk), req->mss, 239 tcp_select_initial_window(tcp_full_space(sk), req->mss,
243 &req->rcv_wnd, &req->window_clamp, 240 &req->rsk_rcv_wnd, &req->rsk_window_clamp,
244 ireq->wscale_ok, &rcv_wscale, 241 ireq->wscale_ok, &rcv_wscale,
245 dst_metric(dst, RTAX_INITRWND)); 242 dst_metric(dst, RTAX_INITRWND));
246 243
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 97d9314ea361..c5429a636f1a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -70,8 +70,8 @@
70#include <linux/crypto.h> 70#include <linux/crypto.h>
71#include <linux/scatterlist.h> 71#include <linux/scatterlist.h>
72 72
73static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); 73static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
74static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 74static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
75 struct request_sock *req); 75 struct request_sock *req);
76 76
77static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 77static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
@@ -82,7 +82,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific;
82static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 82static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
83static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 83static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84#else 84#else
85static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, 85static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
86 const struct in6_addr *addr) 86 const struct in6_addr *addr)
87{ 87{
88 return NULL; 88 return NULL;
@@ -434,11 +434,11 @@ out:
434} 434}
435 435
436 436
437static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, 437static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
438 struct flowi *fl, 438 struct flowi *fl,
439 struct request_sock *req, 439 struct request_sock *req,
440 u16 queue_mapping, 440 struct tcp_fastopen_cookie *foc,
441 struct tcp_fastopen_cookie *foc) 441 bool attach_req)
442{ 442{
443 struct inet_request_sock *ireq = inet_rsk(req); 443 struct inet_request_sock *ireq = inet_rsk(req);
444 struct ipv6_pinfo *np = inet6_sk(sk); 444 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -447,10 +447,11 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
447 int err = -ENOMEM; 447 int err = -ENOMEM;
448 448
449 /* First, grab a route. */ 449 /* First, grab a route. */
450 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) 450 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
451 IPPROTO_TCP)) == NULL)
451 goto done; 452 goto done;
452 453
453 skb = tcp_make_synack(sk, dst, req, foc); 454 skb = tcp_make_synack(sk, dst, req, foc, attach_req);
454 455
455 if (skb) { 456 if (skb) {
456 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 457 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -460,7 +461,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
460 if (np->repflow && ireq->pktopts) 461 if (np->repflow && ireq->pktopts)
461 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 462 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
462 463
463 skb_set_queue_mapping(skb, queue_mapping);
464 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); 464 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
465 err = net_xmit_eval(err); 465 err = net_xmit_eval(err);
466 } 466 }
@@ -476,13 +476,13 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
476} 476}
477 477
478#ifdef CONFIG_TCP_MD5SIG 478#ifdef CONFIG_TCP_MD5SIG
479static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, 479static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
480 const struct in6_addr *addr) 480 const struct in6_addr *addr)
481{ 481{
482 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6); 482 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
483} 483}
484 484
485static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, 485static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
486 const struct sock *addr_sk) 486 const struct sock *addr_sk)
487{ 487{
488 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); 488 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
@@ -621,8 +621,12 @@ clear_hash_noput:
621 return 1; 621 return 1;
622} 622}
623 623
624static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 624#endif
625
626static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
627 const struct sk_buff *skb)
625{ 628{
629#ifdef CONFIG_TCP_MD5SIG
626 const __u8 *hash_location = NULL; 630 const __u8 *hash_location = NULL;
627 struct tcp_md5sig_key *hash_expected; 631 struct tcp_md5sig_key *hash_expected;
628 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 632 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -659,26 +663,27 @@ static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
659 &ip6h->daddr, ntohs(th->dest)); 663 &ip6h->daddr, ntohs(th->dest));
660 return true; 664 return true;
661 } 665 }
666#endif
662 return false; 667 return false;
663} 668}
664#endif
665 669
666static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, 670static void tcp_v6_init_req(struct request_sock *req,
671 const struct sock *sk_listener,
667 struct sk_buff *skb) 672 struct sk_buff *skb)
668{ 673{
669 struct inet_request_sock *ireq = inet_rsk(req); 674 struct inet_request_sock *ireq = inet_rsk(req);
670 struct ipv6_pinfo *np = inet6_sk(sk); 675 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
671 676
672 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 677 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
673 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 678 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
674 679
675 /* So that link locals have meaning */ 680 /* So that link locals have meaning */
676 if (!sk->sk_bound_dev_if && 681 if (!sk_listener->sk_bound_dev_if &&
677 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 682 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
678 ireq->ir_iif = tcp_v6_iif(skb); 683 ireq->ir_iif = tcp_v6_iif(skb);
679 684
680 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 685 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
681 (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || 686 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
682 np->rxopt.bits.rxinfo || 687 np->rxopt.bits.rxinfo ||
683 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 688 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
684 np->rxopt.bits.rxohlim || np->repflow)) { 689 np->rxopt.bits.rxohlim || np->repflow)) {
@@ -687,13 +692,14 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
687 } 692 }
688} 693}
689 694
690static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, 695static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
696 struct flowi *fl,
691 const struct request_sock *req, 697 const struct request_sock *req,
692 bool *strict) 698 bool *strict)
693{ 699{
694 if (strict) 700 if (strict)
695 *strict = true; 701 *strict = true;
696 return inet6_csk_route_req(sk, &fl->u.ip6, req); 702 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
697} 703}
698 704
699struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 705struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
@@ -720,10 +726,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
720 .route_req = tcp_v6_route_req, 726 .route_req = tcp_v6_route_req,
721 .init_seq = tcp_v6_init_sequence, 727 .init_seq = tcp_v6_init_sequence,
722 .send_synack = tcp_v6_send_synack, 728 .send_synack = tcp_v6_send_synack,
723 .queue_hash_add = inet6_csk_reqsk_queue_hash_add,
724}; 729};
725 730
726static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, 731static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
727 u32 ack, u32 win, u32 tsval, u32 tsecr, 732 u32 ack, u32 win, u32 tsval, u32 tsecr,
728 int oif, struct tcp_md5sig_key *key, int rst, 733 int oif, struct tcp_md5sig_key *key, int rst,
729 u8 tclass, u32 label) 734 u8 tclass, u32 label)
@@ -822,7 +827,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
822 kfree_skb(buff); 827 kfree_skb(buff);
823} 828}
824 829
825static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) 830static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
826{ 831{
827 const struct tcphdr *th = tcp_hdr(skb); 832 const struct tcphdr *th = tcp_hdr(skb);
828 u32 seq = 0, ack_seq = 0; 833 u32 seq = 0, ack_seq = 0;
@@ -893,7 +898,7 @@ release_sk1:
893#endif 898#endif
894} 899}
895 900
896static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq, 901static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
897 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 902 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
898 struct tcp_md5sig_key *key, u8 tclass, 903 struct tcp_md5sig_key *key, u8 tclass,
899 u32 label) 904 u32 label)
@@ -916,7 +921,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
916 inet_twsk_put(tw); 921 inet_twsk_put(tw);
917} 922}
918 923
919static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 924static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
920 struct request_sock *req) 925 struct request_sock *req)
921{ 926{
922 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 927 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
@@ -924,44 +929,18 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
924 */ 929 */
925 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 930 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
926 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 931 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
927 tcp_rsk(req)->rcv_nxt, req->rcv_wnd, 932 tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
928 tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, 933 tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
929 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 934 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
930 0, 0); 935 0, 0);
931} 936}
932 937
933 938
934static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) 939static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
935{ 940{
941#ifdef CONFIG_SYN_COOKIES
936 const struct tcphdr *th = tcp_hdr(skb); 942 const struct tcphdr *th = tcp_hdr(skb);
937 struct request_sock *req;
938 struct sock *nsk;
939
940 /* Find possible connection requests. */
941 req = inet6_csk_search_req(sk, th->source,
942 &ipv6_hdr(skb)->saddr,
943 &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
944 if (req) {
945 nsk = tcp_check_req(sk, skb, req, false);
946 if (!nsk || nsk == sk)
947 reqsk_put(req);
948 return nsk;
949 }
950 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
951 &ipv6_hdr(skb)->saddr, th->source,
952 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
953 tcp_v6_iif(skb));
954
955 if (nsk) {
956 if (nsk->sk_state != TCP_TIME_WAIT) {
957 bh_lock_sock(nsk);
958 return nsk;
959 }
960 inet_twsk_put(inet_twsk(nsk));
961 return NULL;
962 }
963 943
964#ifdef CONFIG_SYN_COOKIES
965 if (!th->syn) 944 if (!th->syn)
966 sk = cookie_v6_check(sk, skb); 945 sk = cookie_v6_check(sk, skb);
967#endif 946#endif
@@ -984,12 +963,15 @@ drop:
984 return 0; /* don't send reset */ 963 return 0; /* don't send reset */
985} 964}
986 965
987static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 966static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
988 struct request_sock *req, 967 struct request_sock *req,
989 struct dst_entry *dst) 968 struct dst_entry *dst,
969 struct request_sock *req_unhash,
970 bool *own_req)
990{ 971{
991 struct inet_request_sock *ireq; 972 struct inet_request_sock *ireq;
992 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 973 struct ipv6_pinfo *newnp;
974 const struct ipv6_pinfo *np = inet6_sk(sk);
993 struct tcp6_sock *newtcp6sk; 975 struct tcp6_sock *newtcp6sk;
994 struct inet_sock *newinet; 976 struct inet_sock *newinet;
995 struct tcp_sock *newtp; 977 struct tcp_sock *newtp;
@@ -1004,7 +986,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1004 * v6 mapped 986 * v6 mapped
1005 */ 987 */
1006 988
1007 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst); 989 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
990 req_unhash, own_req);
1008 991
1009 if (!newsk) 992 if (!newsk)
1010 return NULL; 993 return NULL;
@@ -1057,7 +1040,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1057 goto out_overflow; 1040 goto out_overflow;
1058 1041
1059 if (!dst) { 1042 if (!dst) {
1060 dst = inet6_csk_route_req(sk, &fl6, req); 1043 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1061 if (!dst) 1044 if (!dst)
1062 goto out; 1045 goto out;
1063 } 1046 }
@@ -1090,8 +1073,6 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1090 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1073 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1091 newsk->sk_bound_dev_if = ireq->ir_iif; 1074 newsk->sk_bound_dev_if = ireq->ir_iif;
1092 1075
1093 sk_set_txhash(newsk);
1094
1095 /* Now IPv6 options... 1076 /* Now IPv6 options...
1096 1077
1097 First: no IPv4 options. 1078 First: no IPv4 options.
@@ -1103,16 +1084,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1103 /* Clone RX bits */ 1084 /* Clone RX bits */
1104 newnp->rxopt.all = np->rxopt.all; 1085 newnp->rxopt.all = np->rxopt.all;
1105 1086
1106 /* Clone pktoptions received with SYN */
1107 newnp->pktoptions = NULL; 1087 newnp->pktoptions = NULL;
1108 if (ireq->pktopts) {
1109 newnp->pktoptions = skb_clone(ireq->pktopts,
1110 sk_gfp_atomic(sk, GFP_ATOMIC));
1111 consume_skb(ireq->pktopts);
1112 ireq->pktopts = NULL;
1113 if (newnp->pktoptions)
1114 skb_set_owner_r(newnp->pktoptions, newsk);
1115 }
1116 newnp->opt = NULL; 1088 newnp->opt = NULL;
1117 newnp->mcast_oif = tcp_v6_iif(skb); 1089 newnp->mcast_oif = tcp_v6_iif(skb);
1118 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1090 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -1167,7 +1139,20 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1167 tcp_done(newsk); 1139 tcp_done(newsk);
1168 goto out; 1140 goto out;
1169 } 1141 }
1170 __inet_hash(newsk, NULL); 1142 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1143 if (*own_req) {
1144 tcp_move_syn(newtp, req);
1145
1146 /* Clone pktoptions received with SYN, if we own the req */
1147 if (ireq->pktopts) {
1148 newnp->pktoptions = skb_clone(ireq->pktopts,
1149 sk_gfp_atomic(sk, GFP_ATOMIC));
1150 consume_skb(ireq->pktopts);
1151 ireq->pktopts = NULL;
1152 if (newnp->pktoptions)
1153 skb_set_owner_r(newnp->pktoptions, newsk);
1154 }
1155 }
1171 1156
1172 return newsk; 1157 return newsk;
1173 1158
@@ -1181,7 +1166,7 @@ out:
1181} 1166}
1182 1167
1183/* The socket must have it's spinlock held when we get 1168/* The socket must have it's spinlock held when we get
1184 * here. 1169 * here, unless it is a TCP_LISTEN socket.
1185 * 1170 *
1186 * We have a potential double-lock case here, so even when 1171 * We have a potential double-lock case here, so even when
1187 * doing backlog processing we use the BH locking scheme. 1172 * doing backlog processing we use the BH locking scheme.
@@ -1252,18 +1237,14 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1252 goto csum_err; 1237 goto csum_err;
1253 1238
1254 if (sk->sk_state == TCP_LISTEN) { 1239 if (sk->sk_state == TCP_LISTEN) {
1255 struct sock *nsk = tcp_v6_hnd_req(sk, skb); 1240 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1241
1256 if (!nsk) 1242 if (!nsk)
1257 goto discard; 1243 goto discard;
1258 1244
1259 /*
1260 * Queue it on the new socket if the new socket is active,
1261 * otherwise we just shortcircuit this and continue with
1262 * the new socket..
1263 */
1264 if (nsk != sk) { 1245 if (nsk != sk) {
1265 sock_rps_save_rxhash(nsk, skb); 1246 sock_rps_save_rxhash(nsk, skb);
1266 sk_mark_napi_id(sk, skb); 1247 sk_mark_napi_id(nsk, skb);
1267 if (tcp_child_process(sk, nsk, skb)) 1248 if (tcp_child_process(sk, nsk, skb))
1268 goto reset; 1249 goto reset;
1269 if (opt_skb) 1250 if (opt_skb)
@@ -1273,7 +1254,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1273 } else 1254 } else
1274 sock_rps_save_rxhash(sk, skb); 1255 sock_rps_save_rxhash(sk, skb);
1275 1256
1276 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) 1257 if (tcp_rcv_state_process(sk, skb))
1277 goto reset; 1258 goto reset;
1278 if (opt_skb) 1259 if (opt_skb)
1279 goto ipv6_pktoptions; 1260 goto ipv6_pktoptions;
@@ -1387,6 +1368,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1387 th = tcp_hdr(skb); 1368 th = tcp_hdr(skb);
1388 hdr = ipv6_hdr(skb); 1369 hdr = ipv6_hdr(skb);
1389 1370
1371lookup:
1390 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, 1372 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
1391 inet6_iif(skb)); 1373 inet6_iif(skb));
1392 if (!sk) 1374 if (!sk)
@@ -1396,6 +1378,37 @@ process:
1396 if (sk->sk_state == TCP_TIME_WAIT) 1378 if (sk->sk_state == TCP_TIME_WAIT)
1397 goto do_time_wait; 1379 goto do_time_wait;
1398 1380
1381 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1382 struct request_sock *req = inet_reqsk(sk);
1383 struct sock *nsk = NULL;
1384
1385 sk = req->rsk_listener;
1386 tcp_v6_fill_cb(skb, hdr, th);
1387 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1388 reqsk_put(req);
1389 goto discard_it;
1390 }
1391 if (likely(sk->sk_state == TCP_LISTEN)) {
1392 nsk = tcp_check_req(sk, skb, req, false);
1393 } else {
1394 inet_csk_reqsk_queue_drop_and_put(sk, req);
1395 goto lookup;
1396 }
1397 if (!nsk) {
1398 reqsk_put(req);
1399 goto discard_it;
1400 }
1401 if (nsk == sk) {
1402 sock_hold(sk);
1403 reqsk_put(req);
1404 tcp_v6_restore_cb(skb);
1405 } else if (tcp_child_process(sk, nsk, skb)) {
1406 tcp_v6_send_reset(nsk, skb);
1407 goto discard_it;
1408 } else {
1409 return 0;
1410 }
1411 }
1399 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { 1412 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1400 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1413 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1401 goto discard_and_relse; 1414 goto discard_and_relse;
@@ -1406,17 +1419,21 @@ process:
1406 1419
1407 tcp_v6_fill_cb(skb, hdr, th); 1420 tcp_v6_fill_cb(skb, hdr, th);
1408 1421
1409#ifdef CONFIG_TCP_MD5SIG
1410 if (tcp_v6_inbound_md5_hash(sk, skb)) 1422 if (tcp_v6_inbound_md5_hash(sk, skb))
1411 goto discard_and_relse; 1423 goto discard_and_relse;
1412#endif
1413 1424
1414 if (sk_filter(sk, skb)) 1425 if (sk_filter(sk, skb))
1415 goto discard_and_relse; 1426 goto discard_and_relse;
1416 1427
1417 sk_incoming_cpu_update(sk);
1418 skb->dev = NULL; 1428 skb->dev = NULL;
1419 1429
1430 if (sk->sk_state == TCP_LISTEN) {
1431 ret = tcp_v6_do_rcv(sk, skb);
1432 goto put_and_return;
1433 }
1434
1435 sk_incoming_cpu_update(sk);
1436
1420 bh_lock_sock_nested(sk); 1437 bh_lock_sock_nested(sk);
1421 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 1438 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
1422 ret = 0; 1439 ret = 0;
@@ -1431,6 +1448,7 @@ process:
1431 } 1448 }
1432 bh_unlock_sock(sk); 1449 bh_unlock_sock(sk);
1433 1450
1451put_and_return:
1434 sock_put(sk); 1452 sock_put(sk);
1435 return ret ? -1 : 0; 1453 return ret ? -1 : 0;
1436 1454
@@ -1631,7 +1649,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
1631#ifdef CONFIG_PROC_FS 1649#ifdef CONFIG_PROC_FS
1632/* Proc filesystem TCPv6 sock list dumping. */ 1650/* Proc filesystem TCPv6 sock list dumping. */
1633static void get_openreq6(struct seq_file *seq, 1651static void get_openreq6(struct seq_file *seq,
1634 struct request_sock *req, int i, kuid_t uid) 1652 const struct request_sock *req, int i)
1635{ 1653{
1636 long ttd = req->rsk_timer.expires - jiffies; 1654 long ttd = req->rsk_timer.expires - jiffies;
1637 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1655 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
@@ -1655,7 +1673,8 @@ static void get_openreq6(struct seq_file *seq,
1655 1, /* timers active (only the expire timer) */ 1673 1, /* timers active (only the expire timer) */
1656 jiffies_to_clock_t(ttd), 1674 jiffies_to_clock_t(ttd),
1657 req->num_timeout, 1675 req->num_timeout,
1658 from_kuid_munged(seq_user_ns(seq), uid), 1676 from_kuid_munged(seq_user_ns(seq),
1677 sock_i_uid(req->rsk_listener)),
1659 0, /* non standard timer */ 1678 0, /* non standard timer */
1660 0, /* open_requests have no inode */ 1679 0, /* open_requests have no inode */
1661 0, req); 1680 0, req);
@@ -1670,7 +1689,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1670 const struct inet_sock *inet = inet_sk(sp); 1689 const struct inet_sock *inet = inet_sk(sp);
1671 const struct tcp_sock *tp = tcp_sk(sp); 1690 const struct tcp_sock *tp = tcp_sk(sp);
1672 const struct inet_connection_sock *icsk = inet_csk(sp); 1691 const struct inet_connection_sock *icsk = inet_csk(sp);
1673 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; 1692 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1693 int rx_queue;
1694 int state;
1674 1695
1675 dest = &sp->sk_v6_daddr; 1696 dest = &sp->sk_v6_daddr;
1676 src = &sp->sk_v6_rcv_saddr; 1697 src = &sp->sk_v6_rcv_saddr;
@@ -1691,6 +1712,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1691 timer_expires = jiffies; 1712 timer_expires = jiffies;
1692 } 1713 }
1693 1714
1715 state = sk_state_load(sp);
1716 if (state == TCP_LISTEN)
1717 rx_queue = sp->sk_ack_backlog;
1718 else
1719 /* Because we don't lock the socket,
1720 * we might find a transient negative value.
1721 */
1722 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1723
1694 seq_printf(seq, 1724 seq_printf(seq,
1695 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1725 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1696 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 1726 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1699,9 +1729,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1699 src->s6_addr32[2], src->s6_addr32[3], srcp, 1729 src->s6_addr32[2], src->s6_addr32[3], srcp,
1700 dest->s6_addr32[0], dest->s6_addr32[1], 1730 dest->s6_addr32[0], dest->s6_addr32[1],
1701 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1731 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1702 sp->sk_state, 1732 state,
1703 tp->write_seq-tp->snd_una, 1733 tp->write_seq - tp->snd_una,
1704 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), 1734 rx_queue,
1705 timer_active, 1735 timer_active,
1706 jiffies_delta_to_clock_t(timer_expires - jiffies), 1736 jiffies_delta_to_clock_t(timer_expires - jiffies),
1707 icsk->icsk_retransmits, 1737 icsk->icsk_retransmits,
@@ -1713,8 +1743,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1713 jiffies_to_clock_t(icsk->icsk_ack.ato), 1743 jiffies_to_clock_t(icsk->icsk_ack.ato),
1714 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 1744 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1715 tp->snd_cwnd, 1745 tp->snd_cwnd,
1716 sp->sk_state == TCP_LISTEN ? 1746 state == TCP_LISTEN ?
1717 (fastopenq ? fastopenq->max_qlen : 0) : 1747 fastopenq->max_qlen :
1718 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 1748 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1719 ); 1749 );
1720} 1750}
@@ -1760,18 +1790,12 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
1760 } 1790 }
1761 st = seq->private; 1791 st = seq->private;
1762 1792
1763 switch (st->state) { 1793 if (sk->sk_state == TCP_TIME_WAIT)
1764 case TCP_SEQ_STATE_LISTENING: 1794 get_timewait6_sock(seq, v, st->num);
1765 case TCP_SEQ_STATE_ESTABLISHED: 1795 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1766 if (sk->sk_state == TCP_TIME_WAIT) 1796 get_openreq6(seq, v, st->num);
1767 get_timewait6_sock(seq, v, st->num); 1797 else
1768 else 1798 get_tcp6_sock(seq, v, st->num);
1769 get_tcp6_sock(seq, v, st->num);
1770 break;
1771 case TCP_SEQ_STATE_OPENREQ:
1772 get_openreq6(seq, v, st->num, st->uid);
1773 break;
1774 }
1775out: 1799out:
1776 return 0; 1800 return 0;
1777} 1801}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 3c758007b327..dae25cad05cd 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -144,6 +144,16 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
144 break; 144 break;
145} 145}
146 146
147static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
148 u8 type, u8 code, int offset, __be32 info)
149{
150 struct xfrm6_tunnel *handler;
151
152 for_each_tunnel_rcu(tunnel46_handlers, handler)
153 if (!handler->err_handler(skb, opt, type, code, offset, info))
154 break;
155}
156
147static const struct inet6_protocol tunnel6_protocol = { 157static const struct inet6_protocol tunnel6_protocol = {
148 .handler = tunnel6_rcv, 158 .handler = tunnel6_rcv,
149 .err_handler = tunnel6_err, 159 .err_handler = tunnel6_err,
@@ -152,7 +162,7 @@ static const struct inet6_protocol tunnel6_protocol = {
152 162
153static const struct inet6_protocol tunnel46_protocol = { 163static const struct inet6_protocol tunnel46_protocol = {
154 .handler = tunnel46_rcv, 164 .handler = tunnel46_rcv,
155 .err_handler = tunnel6_err, 165 .err_handler = tunnel46_err,
156 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 166 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
157}; 167};
158 168
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0aba654f5b91..01bcb49619ee 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -182,10 +182,12 @@ static inline int compute_score(struct sock *sk, struct net *net,
182 score++; 182 score++;
183 } 183 }
184 184
185 if (sk->sk_incoming_cpu == raw_smp_processor_id())
186 score++;
187
185 return score; 188 return score;
186} 189}
187 190
188#define SCORE2_MAX (1 + 1 + 1)
189static inline int compute_score2(struct sock *sk, struct net *net, 191static inline int compute_score2(struct sock *sk, struct net *net,
190 const struct in6_addr *saddr, __be16 sport, 192 const struct in6_addr *saddr, __be16 sport,
191 const struct in6_addr *daddr, 193 const struct in6_addr *daddr,
@@ -223,6 +225,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
223 score++; 225 score++;
224 } 226 }
225 227
228 if (sk->sk_incoming_cpu == raw_smp_processor_id())
229 score++;
230
226 return score; 231 return score;
227} 232}
228 233
@@ -251,8 +256,7 @@ begin:
251 hash = udp6_ehashfn(net, daddr, hnum, 256 hash = udp6_ehashfn(net, daddr, hnum,
252 saddr, sport); 257 saddr, sport);
253 matches = 1; 258 matches = 1;
254 } else if (score == SCORE2_MAX) 259 }
255 goto exact_match;
256 } else if (score == badness && reuseport) { 260 } else if (score == badness && reuseport) {
257 matches++; 261 matches++;
258 if (reciprocal_scale(hash, matches) == 0) 262 if (reciprocal_scale(hash, matches) == 0)
@@ -269,7 +273,6 @@ begin:
269 goto begin; 273 goto begin;
270 274
271 if (result) { 275 if (result) {
272exact_match:
273 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 276 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
274 result = NULL; 277 result = NULL;
275 else if (unlikely(compute_score2(result, net, saddr, sport, 278 else if (unlikely(compute_score2(result, net, saddr, sport,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 74bd17882a2f..0eaab1fa6be5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -42,8 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
42 ipv6_hdr(skb)->payload_len = htons(skb->len); 42 ipv6_hdr(skb)->payload_len = htons(skb->len);
43 __skb_push(skb, skb->data - skb_network_header(skb)); 43 __skb_push(skb, skb->data - skb_network_header(skb));
44 44
45 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, 45 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
46 skb->dev, NULL, 46 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
47 ip6_rcv_finish); 47 ip6_rcv_finish);
48 return -1; 48 return -1;
49} 49}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 09c76a7b474d..4d09ce6fa90e 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
79 79
80 if (!skb->ignore_df && skb->len > mtu) { 80 if (!skb->ignore_df && skb->len > mtu) {
81 skb->dev = dst->dev; 81 skb->dev = dst->dev;
82 skb->protocol = htons(ETH_P_IPV6);
82 83
83 if (xfrm6_local_dontfrag(skb)) 84 if (xfrm6_local_dontfrag(skb))
84 xfrm6_local_rxpmtu(skb, mtu); 85 xfrm6_local_rxpmtu(skb, mtu);
@@ -131,44 +132,57 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
131 return xfrm_output(sk, skb); 132 return xfrm_output(sk, skb);
132} 133}
133 134
134static int __xfrm6_output(struct sock *sk, struct sk_buff *skb) 135static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
136{
137 struct xfrm_state *x = skb_dst(skb)->xfrm;
138
139 return x->outer_mode->afinfo->output_finish(sk, skb);
140}
141
142static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
135{ 143{
136 struct dst_entry *dst = skb_dst(skb); 144 struct dst_entry *dst = skb_dst(skb);
137 struct xfrm_state *x = dst->xfrm; 145 struct xfrm_state *x = dst->xfrm;
138 int mtu; 146 int mtu;
147 bool toobig;
139 148
140#ifdef CONFIG_NETFILTER 149#ifdef CONFIG_NETFILTER
141 if (!x) { 150 if (!x) {
142 IP6CB(skb)->flags |= IP6SKB_REROUTED; 151 IP6CB(skb)->flags |= IP6SKB_REROUTED;
143 return dst_output_sk(sk, skb); 152 return dst_output(net, sk, skb);
144 } 153 }
145#endif 154#endif
146 155
156 if (x->props.mode != XFRM_MODE_TUNNEL)
157 goto skip_frag;
158
147 if (skb->protocol == htons(ETH_P_IPV6)) 159 if (skb->protocol == htons(ETH_P_IPV6))
148 mtu = ip6_skb_dst_mtu(skb); 160 mtu = ip6_skb_dst_mtu(skb);
149 else 161 else
150 mtu = dst_mtu(skb_dst(skb)); 162 mtu = dst_mtu(skb_dst(skb));
151 163
152 if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { 164 toobig = skb->len > mtu && !skb_is_gso(skb);
165
166 if (toobig && xfrm6_local_dontfrag(skb)) {
153 xfrm6_local_rxpmtu(skb, mtu); 167 xfrm6_local_rxpmtu(skb, mtu);
154 return -EMSGSIZE; 168 return -EMSGSIZE;
155 } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { 169 } else if (!skb->ignore_df && toobig && skb->sk) {
156 xfrm_local_error(skb, mtu); 170 xfrm_local_error(skb, mtu);
157 return -EMSGSIZE; 171 return -EMSGSIZE;
158 } 172 }
159 173
160 if (x->props.mode == XFRM_MODE_TUNNEL && 174 if (toobig || dst_allfrag(skb_dst(skb)))
161 ((skb->len > mtu && !skb_is_gso(skb)) || 175 return ip6_fragment(net, sk, skb,
162 dst_allfrag(skb_dst(skb)))) { 176 __xfrm6_output_finish);
163 return ip6_fragment(sk, skb, 177
164 x->outer_mode->afinfo->output_finish); 178skip_frag:
165 }
166 return x->outer_mode->afinfo->output_finish(sk, skb); 179 return x->outer_mode->afinfo->output_finish(sk, skb);
167} 180}
168 181
169int xfrm6_output(struct sock *sk, struct sk_buff *skb) 182int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
170{ 183{
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, 184 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 NULL, skb_dst(skb)->dev, __xfrm6_output, 185 net, sk, skb, NULL, skb_dst(skb)->dev,
186 __xfrm6_output,
173 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 187 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
174} 188}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 30caa289c5db..5643423fe67a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
20#include <net/ip.h> 20#include <net/ip.h>
21#include <net/ipv6.h> 21#include <net/ipv6.h>
22#include <net/ip6_route.h> 22#include <net/ip6_route.h>
23#include <net/vrf.h> 23#include <net/l3mdev.h>
24#if IS_ENABLED(CONFIG_IPV6_MIP6) 24#if IS_ENABLED(CONFIG_IPV6_MIP6)
25#include <net/mip6.h> 25#include <net/mip6.h>
26#endif 26#endif
@@ -37,6 +37,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
37 37
38 memset(&fl6, 0, sizeof(fl6)); 38 memset(&fl6, 0, sizeof(fl6));
39 fl6.flowi6_oif = oif; 39 fl6.flowi6_oif = oif;
40 fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
40 memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); 41 memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
41 if (saddr) 42 if (saddr)
42 memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); 43 memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
@@ -132,10 +133,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
132 133
133 nexthdr = nh[nhoff]; 134 nexthdr = nh[nhoff];
134 135
135 if (skb_dst(skb)) { 136 if (skb_dst(skb))
136 oif = vrf_master_ifindex(skb_dst(skb)->dev) ? 137 oif = l3mdev_fib_oif(skb_dst(skb)->dev);
137 : skb_dst(skb)->dev->ifindex;
138 }
139 138
140 memset(fl6, 0, sizeof(struct flowi6)); 139 memset(fl6, 0, sizeof(struct flowi6));
141 fl6->flowi6_mark = skb->mark; 140 fl6->flowi6_mark = skb->mark;
@@ -178,7 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
178 return; 177 return;
179 178
180 case IPPROTO_ICMPV6: 179 case IPPROTO_ICMPV6:
181 if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { 180 if (!onlyproto && (nh + offset + 2 < skb->data ||
181 pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
182 u8 *icmp; 182 u8 *icmp;
183 183
184 nh = skb_network_header(skb); 184 nh = skb_network_header(skb);
@@ -192,7 +192,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
192#if IS_ENABLED(CONFIG_IPV6_MIP6) 192#if IS_ENABLED(CONFIG_IPV6_MIP6)
193 case IPPROTO_MH: 193 case IPPROTO_MH:
194 offset += ipv6_optlen(exthdr); 194 offset += ipv6_optlen(exthdr);
195 if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { 195 if (!onlyproto && (nh + offset + 3 < skb->data ||
196 pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
196 struct ip6_mh *mh; 197 struct ip6_mh *mh;
197 198
198 nh = skb_network_header(skb); 199 nh = skb_network_header(skb);
@@ -287,7 +288,7 @@ static struct dst_ops xfrm6_dst_ops = {
287 .destroy = xfrm6_dst_destroy, 288 .destroy = xfrm6_dst_destroy,
288 .ifdown = xfrm6_dst_ifdown, 289 .ifdown = xfrm6_dst_ifdown,
289 .local_out = __ip6_local_out, 290 .local_out = __ip6_local_out,
290 .gc_thresh = 32768, 291 .gc_thresh = INT_MAX,
291}; 292};
292 293
293static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { 294static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index fae6822cc367..e6aa48b5395c 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2123,8 +2123,7 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
2123 } 2123 }
2124 2124
2125 /* Unregister any old registration */ 2125 /* Unregister any old registration */
2126 if (self->skey) 2126 irlmp_unregister_service(self->skey);
2127 irlmp_unregister_service(self->skey);
2128 2127
2129 self->skey = irlmp_register_service((__u16) opt); 2128 self->skey = irlmp_register_service((__u16) opt);
2130 break; 2129 break;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 683346d2d633..a4237707f79d 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -335,8 +335,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
335 * specified, we cannot return before the IrCOMM link is 335 * specified, we cannot return before the IrCOMM link is
336 * ready 336 * ready
337 */ 337 */
338 if (!test_bit(ASYNCB_CLOSING, &port->flags) && 338 if ((do_clocal || tty_port_carrier_raised(port)) &&
339 (do_clocal || tty_port_carrier_raised(port)) &&
340 self->state == IRCOMM_TTY_READY) 339 self->state == IRCOMM_TTY_READY)
341 { 340 {
342 break; 341 break;
@@ -443,34 +442,6 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
443 /* Not really used by us, but lets do it anyway */ 442 /* Not really used by us, but lets do it anyway */
444 self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0; 443 self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
445 444
446 /*
447 * If the port is the middle of closing, bail out now
448 */
449 if (test_bit(ASYNCB_CLOSING, &self->port.flags)) {
450
451 /* Hm, why are we blocking on ASYNC_CLOSING if we
452 * do return -EAGAIN/-ERESTARTSYS below anyway?
453 * IMHO it's either not needed in the first place
454 * or for some reason we need to make sure the async
455 * closing has been finished - if so, wouldn't we
456 * probably better sleep uninterruptible?
457 */
458
459 if (wait_event_interruptible(self->port.close_wait,
460 !test_bit(ASYNCB_CLOSING, &self->port.flags))) {
461 net_warn_ratelimited("%s - got signal while blocking on ASYNC_CLOSING!\n",
462 __func__);
463 return -ERESTARTSYS;
464 }
465
466#ifdef SERIAL_DO_RESTART
467 return (self->port.flags & ASYNC_HUP_NOTIFY) ?
468 -EAGAIN : -ERESTARTSYS;
469#else
470 return -EAGAIN;
471#endif
472 }
473
474 /* Check if this is a "normal" ircomm device, or an irlpt device */ 445 /* Check if this is a "normal" ircomm device, or an irlpt device */
475 if (self->line < 0x10) { 446 if (self->line < 0x10) {
476 self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE; 447 self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE;
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index a26c401ef4a4..43964594aa12 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
1839 for (element = hashbin_get_first(iter->hashbin); 1839 for (element = hashbin_get_first(iter->hashbin);
1840 element != NULL; 1840 element != NULL;
1841 element = hashbin_get_next(iter->hashbin)) { 1841 element = hashbin_get_next(iter->hashbin)) {
1842 if (!off || *off-- == 0) { 1842 if (!off || (*off)-- == 0) {
1843 /* NB: hashbin left locked */ 1843 /* NB: hashbin left locked */
1844 return element; 1844 return element;
1845 } 1845 }
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 918151c11348..fcb2752419c6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -95,11 +95,10 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
95/* Call Back functions */ 95/* Call Back functions */
96static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); 96static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
97static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); 97static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
98static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]); 98static void iucv_callback_connack(struct iucv_path *, u8 *);
99static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], 99static int iucv_callback_connreq(struct iucv_path *, u8 *, u8 *);
100 u8 ipuser[16]); 100static void iucv_callback_connrej(struct iucv_path *, u8 *);
101static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]); 101static void iucv_callback_shutdown(struct iucv_path *, u8 *);
102static void iucv_callback_shutdown(struct iucv_path *, u8 ipuser[16]);
103 102
104static struct iucv_sock_list iucv_sk_list = { 103static struct iucv_sock_list iucv_sk_list = {
105 .lock = __RW_LOCK_UNLOCKED(iucv_sk_list.lock), 104 .lock = __RW_LOCK_UNLOCKED(iucv_sk_list.lock),
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 2a6a1fdd62c0..7eaa000c9258 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -713,7 +713,7 @@ static struct notifier_block __refdata iucv_cpu_notifier = {
713 * 713 *
714 * Sever an iucv path to free up the pathid. Used internally. 714 * Sever an iucv path to free up the pathid. Used internally.
715 */ 715 */
716static int iucv_sever_pathid(u16 pathid, u8 userdata[16]) 716static int iucv_sever_pathid(u16 pathid, u8 *userdata)
717{ 717{
718 union iucv_param *parm; 718 union iucv_param *parm;
719 719
@@ -876,7 +876,7 @@ static struct notifier_block iucv_reboot_notifier = {
876 * Returns the result of the CP IUCV call. 876 * Returns the result of the CP IUCV call.
877 */ 877 */
878int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, 878int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
879 u8 userdata[16], void *private) 879 u8 *userdata, void *private)
880{ 880{
881 union iucv_param *parm; 881 union iucv_param *parm;
882 int rc; 882 int rc;
@@ -923,7 +923,7 @@ EXPORT_SYMBOL(iucv_path_accept);
923 * Returns the result of the CP IUCV call. 923 * Returns the result of the CP IUCV call.
924 */ 924 */
925int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, 925int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
926 u8 userid[8], u8 system[8], u8 userdata[16], 926 u8 *userid, u8 *system, u8 *userdata,
927 void *private) 927 void *private)
928{ 928{
929 union iucv_param *parm; 929 union iucv_param *parm;
@@ -985,7 +985,7 @@ EXPORT_SYMBOL(iucv_path_connect);
985 * 985 *
986 * Returns the result from the CP IUCV call. 986 * Returns the result from the CP IUCV call.
987 */ 987 */
988int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16]) 988int iucv_path_quiesce(struct iucv_path *path, u8 *userdata)
989{ 989{
990 union iucv_param *parm; 990 union iucv_param *parm;
991 int rc; 991 int rc;
@@ -1017,7 +1017,7 @@ EXPORT_SYMBOL(iucv_path_quiesce);
1017 * 1017 *
1018 * Returns the result from the CP IUCV call. 1018 * Returns the result from the CP IUCV call.
1019 */ 1019 */
1020int iucv_path_resume(struct iucv_path *path, u8 userdata[16]) 1020int iucv_path_resume(struct iucv_path *path, u8 *userdata)
1021{ 1021{
1022 union iucv_param *parm; 1022 union iucv_param *parm;
1023 int rc; 1023 int rc;
@@ -1047,7 +1047,7 @@ out:
1047 * 1047 *
1048 * Returns the result from the CP IUCV call. 1048 * Returns the result from the CP IUCV call.
1049 */ 1049 */
1050int iucv_path_sever(struct iucv_path *path, u8 userdata[16]) 1050int iucv_path_sever(struct iucv_path *path, u8 *userdata)
1051{ 1051{
1052 int rc; 1052 int rc;
1053 1053
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 83a70688784b..f9c9ecb0cdd3 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -261,7 +261,7 @@ static int pfkey_broadcast(struct sk_buff *skb,
261 261
262 err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); 262 err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
263 263
264 /* Error is cleare after succecful sending to at least one 264 /* Error is cleared after successful sending to at least one
265 * registered KM */ 265 * registered KM */
266 if ((broadcast_flags & BROADCAST_REGISTERED) && err) 266 if ((broadcast_flags & BROADCAST_REGISTERED) && err)
267 err = err2; 267 err = err2;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 68aa9ffd4ae4..5871537af387 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -321,4 +321,7 @@ do { \
321#define l2tp_dbg(ptr, type, fmt, ...) \ 321#define l2tp_dbg(ptr, type, fmt, ...) \
322 l2tp_printk(ptr, type, pr_debug, fmt, ##__VA_ARGS__) 322 l2tp_printk(ptr, type, pr_debug, fmt, ##__VA_ARGS__)
323 323
324#define MODULE_ALIAS_L2TP_PWTYPE(type) \
325 MODULE_ALIAS("net-l2tp-type-" __stringify(type))
326
324#endif /* _L2TP_CORE_H_ */ 327#endif /* _L2TP_CORE_H_ */
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 4b552873b556..e253c26f31ac 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -358,3 +358,4 @@ MODULE_LICENSE("GPL");
358MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); 358MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
359MODULE_DESCRIPTION("L2TP ethernet pseudowire driver"); 359MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
360MODULE_VERSION("1.0"); 360MODULE_VERSION("1.0");
361MODULE_ALIAS_L2TP_PWTYPE(5);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 79649937ec71..ec22078b0914 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -655,3 +655,4 @@ MODULE_VERSION("1.0");
655 * enums 655 * enums
656 */ 656 */
657MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); 657MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
658MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d1ded3777815..aca38d8aed8e 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -801,3 +801,4 @@ MODULE_VERSION("1.0");
801 * enums 801 * enums
802 */ 802 */
803MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP); 803MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP);
804MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 9e13c2ff8789..f93c5be612a7 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -576,6 +576,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
576 if (info->attrs[L2TP_ATTR_MRU]) 576 if (info->attrs[L2TP_ATTR_MRU])
577 cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]); 577 cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
578 578
579#ifdef CONFIG_MODULES
580 if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) {
581 genl_unlock();
582 request_module("net-l2tp-type-%u", cfg.pw_type);
583 genl_lock();
584 }
585#endif
579 if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) || 586 if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
580 (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) { 587 (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
581 ret = -EPROTONOSUPPORT; 588 ret = -EPROTONOSUPPORT;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f56c9f69e9f2..1ad18c55064c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1863,3 +1863,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP");
1863MODULE_LICENSE("GPL"); 1863MODULE_LICENSE("GPL");
1864MODULE_VERSION(PPPOL2TP_DRV_VERSION); 1864MODULE_VERSION(PPPOL2TP_DRV_VERSION);
1865MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP)); 1865MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP));
1866MODULE_ALIAS_L2TP_PWTYPE(11);
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
new file mode 100644
index 000000000000..5d47325037bc
--- /dev/null
+++ b/net/l3mdev/Kconfig
@@ -0,0 +1,10 @@
1#
2# Configuration for L3 master device support
3#
4
5config NET_L3_MASTER_DEV
6 bool "L3 Master device support"
7 depends on INET || IPV6
8 ---help---
9 This module provides glue between core networking code and device
10 drivers to support L3 master devices like VRF.
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
new file mode 100644
index 000000000000..84a53a6f609a
--- /dev/null
+++ b/net/l3mdev/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for the L3 device API
3#
4
5obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
new file mode 100644
index 000000000000..8e5ead366e7f
--- /dev/null
+++ b/net/l3mdev/l3mdev.c
@@ -0,0 +1,92 @@
1/*
2 * net/l3mdev/l3mdev.c - L3 master device implementation
3 * Copyright (c) 2015 Cumulus Networks
4 * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/netdevice.h>
13#include <net/l3mdev.h>
14
15/**
16 * l3mdev_master_ifindex - get index of L3 master device
17 * @dev: targeted interface
18 */
19
20int l3mdev_master_ifindex_rcu(struct net_device *dev)
21{
22 int ifindex = 0;
23
24 if (!dev)
25 return 0;
26
27 if (netif_is_l3_master(dev)) {
28 ifindex = dev->ifindex;
29 } else if (netif_is_l3_slave(dev)) {
30 struct net_device *master;
31
32 master = netdev_master_upper_dev_get_rcu(dev);
33 if (master)
34 ifindex = master->ifindex;
35 }
36
37 return ifindex;
38}
39EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
40
41/**
42 * l3mdev_fib_table - get FIB table id associated with an L3
43 * master interface
44 * @dev: targeted interface
45 */
46
47u32 l3mdev_fib_table_rcu(const struct net_device *dev)
48{
49 u32 tb_id = 0;
50
51 if (!dev)
52 return 0;
53
54 if (netif_is_l3_master(dev)) {
55 if (dev->l3mdev_ops->l3mdev_fib_table)
56 tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
57 } else if (netif_is_l3_slave(dev)) {
58 /* Users of netdev_master_upper_dev_get_rcu need non-const,
59 * but current inet_*type functions take a const
60 */
61 struct net_device *_dev = (struct net_device *) dev;
62 const struct net_device *master;
63
64 master = netdev_master_upper_dev_get_rcu(_dev);
65 if (master &&
66 master->l3mdev_ops->l3mdev_fib_table)
67 tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
68 }
69
70 return tb_id;
71}
72EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu);
73
74u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
75{
76 struct net_device *dev;
77 u32 tb_id = 0;
78
79 if (!ifindex)
80 return 0;
81
82 rcu_read_lock();
83
84 dev = dev_get_by_index_rcu(net, ifindex);
85 if (dev)
86 tb_id = l3mdev_fib_table_rcu(dev);
87
88 rcu_read_unlock();
89
90 return tb_id;
91}
92EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 783e891b7525..f9137a8341f4 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -27,7 +27,6 @@ mac80211-y := \
27 key.o \ 27 key.o \
28 util.o \ 28 util.o \
29 wme.o \ 29 wme.o \
30 event.o \
31 chan.o \ 30 chan.o \
32 trace.o mlme.o \ 31 trace.o mlme.o \
33 tdls.o \ 32 tdls.o \
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 5c564a68fb50..10ad4ac1fa0b 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -79,7 +79,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
79 (int)reason); 79 (int)reason);
80 80
81 if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, 81 if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
82 &sta->sta, tid, NULL, 0)) 82 &sta->sta, tid, NULL, 0, false))
83 sdata_info(sta->sdata, 83 sdata_info(sta->sdata,
84 "HW problem - can not stop rx aggregation for %pM tid %d\n", 84 "HW problem - can not stop rx aggregation for %pM tid %d\n",
85 sta->sta.addr, tid); 85 sta->sta.addr, tid);
@@ -189,6 +189,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
189 struct ieee80211_local *local = sdata->local; 189 struct ieee80211_local *local = sdata->local;
190 struct sk_buff *skb; 190 struct sk_buff *skb;
191 struct ieee80211_mgmt *mgmt; 191 struct ieee80211_mgmt *mgmt;
192 bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU);
192 u16 capab; 193 u16 capab;
193 194
194 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); 195 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
@@ -217,7 +218,8 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
217 mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; 218 mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
218 mgmt->u.action.u.addba_resp.dialog_token = dialog_token; 219 mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
219 220
220 capab = (u16)(policy << 1); /* bit 1 aggregation policy */ 221 capab = (u16)(amsdu << 0); /* bit 0 A-MSDU support */
222 capab |= (u16)(policy << 1); /* bit 1 aggregation policy */
221 capab |= (u16)(tid << 2); /* bit 5:2 TID number */ 223 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
222 capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ 224 capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */
223 225
@@ -321,7 +323,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
321 __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]); 323 __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]);
322 324
323 ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, 325 ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
324 &sta->sta, tid, &start_seq_num, 0); 326 &sta->sta, tid, &start_seq_num, 0, false);
325 ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", 327 ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n",
326 sta->sta.addr, tid, ret); 328 sta->sta.addr, tid, ret);
327 if (ret) { 329 if (ret) {
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c8ba2e77737c..a758eb84e8f0 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -97,7 +97,8 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
97 mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; 97 mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
98 98
99 mgmt->u.action.u.addba_req.dialog_token = dialog_token; 99 mgmt->u.action.u.addba_req.dialog_token = dialog_token;
100 capab = (u16)(1 << 1); /* bit 1 aggregation policy */ 100 capab = (u16)(1 << 0); /* bit 0 A-MSDU support */
101 capab |= (u16)(1 << 1); /* bit 1 aggregation policy */
101 capab |= (u16)(tid << 2); /* bit 5:2 TID number */ 102 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
102 capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */ 103 capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */
103 104
@@ -331,7 +332,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
331 return -EALREADY; 332 return -EALREADY;
332 ret = drv_ampdu_action(local, sta->sdata, 333 ret = drv_ampdu_action(local, sta->sdata,
333 IEEE80211_AMPDU_TX_STOP_FLUSH_CONT, 334 IEEE80211_AMPDU_TX_STOP_FLUSH_CONT,
334 &sta->sta, tid, NULL, 0); 335 &sta->sta, tid, NULL, 0, false);
335 WARN_ON_ONCE(ret); 336 WARN_ON_ONCE(ret);
336 return 0; 337 return 0;
337 } 338 }
@@ -381,7 +382,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
381 tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST; 382 tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST;
382 383
383 ret = drv_ampdu_action(local, sta->sdata, action, 384 ret = drv_ampdu_action(local, sta->sdata, action,
384 &sta->sta, tid, NULL, 0); 385 &sta->sta, tid, NULL, 0, false);
385 386
386 /* HW shall not deny going back to legacy */ 387 /* HW shall not deny going back to legacy */
387 if (WARN_ON(ret)) { 388 if (WARN_ON(ret)) {
@@ -469,7 +470,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
469 start_seq_num = sta->tid_seq[tid] >> 4; 470 start_seq_num = sta->tid_seq[tid] >> 4;
470 471
471 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, 472 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
472 &sta->sta, tid, &start_seq_num, 0); 473 &sta->sta, tid, &start_seq_num, 0, false);
473 if (ret) { 474 if (ret) {
474 ht_dbg(sdata, 475 ht_dbg(sdata,
475 "BA request denied - HW unavailable for %pM tid %d\n", 476 "BA request denied - HW unavailable for %pM tid %d\n",
@@ -693,7 +694,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
693 694
694 drv_ampdu_action(local, sta->sdata, 695 drv_ampdu_action(local, sta->sdata,
695 IEEE80211_AMPDU_TX_OPERATIONAL, 696 IEEE80211_AMPDU_TX_OPERATIONAL,
696 &sta->sta, tid, NULL, tid_tx->buf_size); 697 &sta->sta, tid, NULL, tid_tx->buf_size,
698 tid_tx->amsdu);
697 699
698 /* 700 /*
699 * synchronize with TX path, while splicing the TX path 701 * synchronize with TX path, while splicing the TX path
@@ -918,8 +920,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
918 struct tid_ampdu_tx *tid_tx; 920 struct tid_ampdu_tx *tid_tx;
919 u16 capab, tid; 921 u16 capab, tid;
920 u8 buf_size; 922 u8 buf_size;
923 bool amsdu;
921 924
922 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); 925 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
926 amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
923 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; 927 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
924 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; 928 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
925 929
@@ -968,6 +972,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
968 } 972 }
969 973
970 tid_tx->buf_size = buf_size; 974 tid_tx->buf_size = buf_size;
975 tid_tx->amsdu = amsdu;
971 976
972 if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) 977 if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
973 ieee80211_agg_tx_operational(local, sta, tid); 978 ieee80211_agg_tx_operational(local, sta, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7a77a1470f25..c2bd1b6a6922 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -17,7 +17,6 @@
17#include <net/cfg80211.h> 17#include <net/cfg80211.h>
18#include "ieee80211_i.h" 18#include "ieee80211_i.h"
19#include "driver-ops.h" 19#include "driver-ops.h"
20#include "cfg.h"
21#include "rate.h" 20#include "rate.h"
22#include "mesh.h" 21#include "mesh.h"
23#include "wme.h" 22#include "wme.h"
@@ -469,45 +468,6 @@ void sta_set_rate_info_tx(struct sta_info *sta,
469 rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; 468 rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
470} 469}
471 470
472void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
473{
474 rinfo->flags = 0;
475
476 if (sta->last_rx_rate_flag & RX_FLAG_HT) {
477 rinfo->flags |= RATE_INFO_FLAGS_MCS;
478 rinfo->mcs = sta->last_rx_rate_idx;
479 } else if (sta->last_rx_rate_flag & RX_FLAG_VHT) {
480 rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS;
481 rinfo->nss = sta->last_rx_rate_vht_nss;
482 rinfo->mcs = sta->last_rx_rate_idx;
483 } else {
484 struct ieee80211_supported_band *sband;
485 int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
486 u16 brate;
487
488 sband = sta->local->hw.wiphy->bands[
489 ieee80211_get_sdata_band(sta->sdata)];
490 brate = sband->bitrates[sta->last_rx_rate_idx].bitrate;
491 rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
492 }
493
494 if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
495 rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
496
497 if (sta->last_rx_rate_flag & RX_FLAG_5MHZ)
498 rinfo->bw = RATE_INFO_BW_5;
499 else if (sta->last_rx_rate_flag & RX_FLAG_10MHZ)
500 rinfo->bw = RATE_INFO_BW_10;
501 else if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
502 rinfo->bw = RATE_INFO_BW_40;
503 else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
504 rinfo->bw = RATE_INFO_BW_80;
505 else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
506 rinfo->bw = RATE_INFO_BW_160;
507 else
508 rinfo->bw = RATE_INFO_BW_20;
509}
510
511static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, 471static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
512 int idx, u8 *mac, struct station_info *sinfo) 472 int idx, u8 *mac, struct station_info *sinfo)
513{ 473{
@@ -981,7 +941,7 @@ static int sta_apply_auth_flags(struct ieee80211_local *local,
981 * well. Some drivers require rate control initialized 941 * well. Some drivers require rate control initialized
982 * before drv_sta_state() is called. 942 * before drv_sta_state() is called.
983 */ 943 */
984 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) 944 if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
985 rate_control_rate_init(sta); 945 rate_control_rate_init(sta);
986 946
987 ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); 947 ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
@@ -1120,8 +1080,11 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1120 local->hw.queues >= IEEE80211_NUM_ACS) 1080 local->hw.queues >= IEEE80211_NUM_ACS)
1121 sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); 1081 sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
1122 1082
1123 /* auth flags will be set later for TDLS stations */ 1083 /* auth flags will be set later for TDLS,
1124 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { 1084 * and for unassociated stations that move to assocaited */
1085 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
1086 !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) &&
1087 (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
1125 ret = sta_apply_auth_flags(local, sta, mask, set); 1088 ret = sta_apply_auth_flags(local, sta, mask, set);
1126 if (ret) 1089 if (ret)
1127 return ret; 1090 return ret;
@@ -1135,6 +1098,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1135 } 1098 }
1136 1099
1137 if (mask & BIT(NL80211_STA_FLAG_MFP)) { 1100 if (mask & BIT(NL80211_STA_FLAG_MFP)) {
1101 sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP));
1138 if (set & BIT(NL80211_STA_FLAG_MFP)) 1102 if (set & BIT(NL80211_STA_FLAG_MFP))
1139 set_sta_flag(sta, WLAN_STA_MFP); 1103 set_sta_flag(sta, WLAN_STA_MFP);
1140 else 1104 else
@@ -1156,6 +1120,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1156 set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH); 1120 set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH);
1157 1121
1158 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && 1122 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
1123 !sdata->u.mgd.tdls_wider_bw_prohibited &&
1159 ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && 1124 ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
1160 params->ext_capab_len >= 8 && 1125 params->ext_capab_len >= 8 &&
1161 params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED) 1126 params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED)
@@ -1212,7 +1177,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1212 sta_apply_mesh_params(local, sta, params); 1177 sta_apply_mesh_params(local, sta, params);
1213 1178
1214 /* set the STA state after all sta info from usermode has been set */ 1179 /* set the STA state after all sta info from usermode has been set */
1215 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { 1180 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
1181 set & BIT(NL80211_STA_FLAG_ASSOCIATED)) {
1216 ret = sta_apply_auth_flags(local, sta, mask, set); 1182 ret = sta_apply_auth_flags(local, sta, mask, set);
1217 if (ret) 1183 if (ret)
1218 return ret; 1184 return ret;
@@ -1254,12 +1220,14 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
1254 * defaults -- if userspace wants something else we'll 1220 * defaults -- if userspace wants something else we'll
1255 * change it accordingly in sta_apply_parameters() 1221 * change it accordingly in sta_apply_parameters()
1256 */ 1222 */
1257 if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { 1223 if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
1224 !(params->sta_flags_set & (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
1225 BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
1258 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); 1226 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
1259 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); 1227 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
1260 } else {
1261 sta->sta.tdls = true;
1262 } 1228 }
1229 if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
1230 sta->sta.tdls = true;
1263 1231
1264 err = sta_apply_parameters(local, sta, params); 1232 err = sta_apply_parameters(local, sta, params);
1265 if (err) { 1233 if (err) {
@@ -1268,10 +1236,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
1268 } 1236 }
1269 1237
1270 /* 1238 /*
1271 * for TDLS, rate control should be initialized only when 1239 * for TDLS and for unassociated station, rate control should be
1272 * rates are known and station is marked authorized 1240 * initialized only when rates are known and station is marked
1241 * authorized/associated
1273 */ 1242 */
1274 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) 1243 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
1244 test_sta_flag(sta, WLAN_STA_ASSOC))
1275 rate_control_rate_init(sta); 1245 rate_control_rate_init(sta);
1276 1246
1277 layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN || 1247 layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
@@ -1346,7 +1316,10 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1346 break; 1316 break;
1347 case NL80211_IFTYPE_AP: 1317 case NL80211_IFTYPE_AP:
1348 case NL80211_IFTYPE_AP_VLAN: 1318 case NL80211_IFTYPE_AP_VLAN:
1349 statype = CFG80211_STA_AP_CLIENT; 1319 if (test_sta_flag(sta, WLAN_STA_ASSOC))
1320 statype = CFG80211_STA_AP_CLIENT;
1321 else
1322 statype = CFG80211_STA_AP_CLIENT_UNASSOC;
1350 break; 1323 break;
1351 default: 1324 default:
1352 err = -EOPNOTSUPP; 1325 err = -EOPNOTSUPP;
@@ -1415,7 +1388,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1415 1388
1416 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1389 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1417 params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { 1390 params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
1418 ieee80211_recalc_ps(local, -1); 1391 ieee80211_recalc_ps(local);
1419 ieee80211_recalc_ps_vif(sdata); 1392 ieee80211_recalc_ps_vif(sdata);
1420 } 1393 }
1421 1394
@@ -2037,12 +2010,12 @@ ieee80211_sched_scan_start(struct wiphy *wiphy,
2037static int 2010static int
2038ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev) 2011ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev)
2039{ 2012{
2040 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 2013 struct ieee80211_local *local = wiphy_priv(wiphy);
2041 2014
2042 if (!sdata->local->ops->sched_scan_stop) 2015 if (!local->ops->sched_scan_stop)
2043 return -EOPNOTSUPP; 2016 return -EOPNOTSUPP;
2044 2017
2045 return ieee80211_request_sched_scan_stop(sdata); 2018 return ieee80211_request_sched_scan_stop(local);
2046} 2019}
2047 2020
2048static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, 2021static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
@@ -2450,7 +2423,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
2450 if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) 2423 if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
2451 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 2424 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
2452 2425
2453 ieee80211_recalc_ps(local, -1); 2426 ieee80211_recalc_ps(local);
2454 ieee80211_recalc_ps_vif(sdata); 2427 ieee80211_recalc_ps_vif(sdata);
2455 2428
2456 return 0; 2429 return 0;
@@ -3522,18 +3495,32 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
3522 u16 frame_type, bool reg) 3495 u16 frame_type, bool reg)
3523{ 3496{
3524 struct ieee80211_local *local = wiphy_priv(wiphy); 3497 struct ieee80211_local *local = wiphy_priv(wiphy);
3498 struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
3525 3499
3526 switch (frame_type) { 3500 switch (frame_type) {
3527 case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ: 3501 case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ:
3528 if (reg) 3502 if (reg) {
3529 local->probe_req_reg++; 3503 local->probe_req_reg++;
3530 else 3504 sdata->vif.probe_req_reg++;
3531 local->probe_req_reg--; 3505 } else {
3506 if (local->probe_req_reg)
3507 local->probe_req_reg--;
3508
3509 if (sdata->vif.probe_req_reg)
3510 sdata->vif.probe_req_reg--;
3511 }
3532 3512
3533 if (!local->open_count) 3513 if (!local->open_count)
3534 break; 3514 break;
3535 3515
3536 ieee80211_queue_work(&local->hw, &local->reconfig_filter); 3516 if (sdata->vif.probe_req_reg == 1)
3517 drv_config_iface_filter(local, sdata, FIF_PROBE_REQ,
3518 FIF_PROBE_REQ);
3519 else if (sdata->vif.probe_req_reg == 0)
3520 drv_config_iface_filter(local, sdata, 0,
3521 FIF_PROBE_REQ);
3522
3523 ieee80211_configure_filter(local);
3537 break; 3524 break;
3538 default: 3525 default:
3539 break; 3526 break;
diff --git a/net/mac80211/cfg.h b/net/mac80211/cfg.h
deleted file mode 100644
index 2d51f62dc76c..000000000000
--- a/net/mac80211/cfg.h
+++ /dev/null
@@ -1,9 +0,0 @@
1/*
2 * mac80211 configuration hooks for cfg80211
3 */
4#ifndef __CFG_H
5#define __CFG_H
6
7extern const struct cfg80211_ops mac80211_config_ops;
8
9#endif /* __CFG_H */
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index ced6bf3be8d6..4d2aaebd4f97 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -123,6 +123,8 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
123 FLAG(SUPPORTS_CLONED_SKBS), 123 FLAG(SUPPORTS_CLONED_SKBS),
124 FLAG(SINGLE_SCAN_ON_ALL_BANDS), 124 FLAG(SINGLE_SCAN_ON_ALL_BANDS),
125 FLAG(TDLS_WIDER_BW), 125 FLAG(TDLS_WIDER_BW),
126 FLAG(SUPPORTS_AMSDU_IN_AMPDU),
127 FLAG(BEACON_TX_STATUS),
126 128
127 /* keep last for the build bug below */ 129 /* keep last for the build bug below */
128 (void *)0x1 130 (void *)0x1
@@ -149,7 +151,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
149 151
150 for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) { 152 for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) {
151 if (test_bit(i, local->hw.flags)) 153 if (test_bit(i, local->hw.flags))
152 pos += scnprintf(pos, end - pos, "%s", 154 pos += scnprintf(pos, end - pos, "%s\n",
153 hw_flag_names[i]); 155 hw_flag_names[i]);
154 } 156 }
155 157
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 702ca122c498..7961e7d0b61e 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -2,6 +2,7 @@
2 * Copyright 2003-2005 Devicescape Software, Inc. 2 * Copyright 2003-2005 Devicescape Software, Inc.
3 * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> 3 * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
4 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
5 * Copyright (C) 2015 Intel Deutschland GmbH
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -34,6 +35,14 @@ static const struct file_operations key_ ##name## _ops = { \
34 .llseek = generic_file_llseek, \ 35 .llseek = generic_file_llseek, \
35} 36}
36 37
38#define KEY_OPS_W(name) \
39static const struct file_operations key_ ##name## _ops = { \
40 .read = key_##name##_read, \
41 .write = key_##name##_write, \
42 .open = simple_open, \
43 .llseek = generic_file_llseek, \
44}
45
37#define KEY_FILE(name, format) \ 46#define KEY_FILE(name, format) \
38 KEY_READ_##format(name) \ 47 KEY_READ_##format(name) \
39 KEY_OPS(name) 48 KEY_OPS(name)
@@ -74,6 +83,41 @@ static ssize_t key_algorithm_read(struct file *file,
74} 83}
75KEY_OPS(algorithm); 84KEY_OPS(algorithm);
76 85
86static ssize_t key_tx_spec_write(struct file *file, const char __user *userbuf,
87 size_t count, loff_t *ppos)
88{
89 struct ieee80211_key *key = file->private_data;
90 u64 pn;
91 int ret;
92
93 switch (key->conf.cipher) {
94 case WLAN_CIPHER_SUITE_WEP40:
95 case WLAN_CIPHER_SUITE_WEP104:
96 return -EINVAL;
97 case WLAN_CIPHER_SUITE_TKIP:
98 /* not supported yet */
99 return -EOPNOTSUPP;
100 case WLAN_CIPHER_SUITE_CCMP:
101 case WLAN_CIPHER_SUITE_CCMP_256:
102 case WLAN_CIPHER_SUITE_AES_CMAC:
103 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
104 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
105 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
106 case WLAN_CIPHER_SUITE_GCMP:
107 case WLAN_CIPHER_SUITE_GCMP_256:
108 ret = kstrtou64_from_user(userbuf, count, 16, &pn);
109 if (ret)
110 return ret;
111 /* PN is a 48-bit counter */
112 if (pn >= (1ULL << 48))
113 return -ERANGE;
114 atomic64_set(&key->conf.tx_pn, pn);
115 return count;
116 default:
117 return 0;
118 }
119}
120
77static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, 121static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
78 size_t count, loff_t *ppos) 122 size_t count, loff_t *ppos)
79{ 123{
@@ -110,7 +154,7 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
110 } 154 }
111 return simple_read_from_buffer(userbuf, count, ppos, buf, len); 155 return simple_read_from_buffer(userbuf, count, ppos, buf, len);
112} 156}
113KEY_OPS(tx_spec); 157KEY_OPS_W(tx_spec);
114 158
115static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, 159static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
116 size_t count, loff_t *ppos) 160 size_t count, loff_t *ppos)
@@ -278,6 +322,9 @@ KEY_OPS(key);
278#define DEBUGFS_ADD(name) \ 322#define DEBUGFS_ADD(name) \
279 debugfs_create_file(#name, 0400, key->debugfs.dir, \ 323 debugfs_create_file(#name, 0400, key->debugfs.dir, \
280 key, &key_##name##_ops); 324 key, &key_##name##_ops);
325#define DEBUGFS_ADD_W(name) \
326 debugfs_create_file(#name, 0600, key->debugfs.dir, \
327 key, &key_##name##_ops);
281 328
282void ieee80211_debugfs_key_add(struct ieee80211_key *key) 329void ieee80211_debugfs_key_add(struct ieee80211_key *key)
283{ 330{
@@ -310,7 +357,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
310 DEBUGFS_ADD(keyidx); 357 DEBUGFS_ADD(keyidx);
311 DEBUGFS_ADD(hw_key_idx); 358 DEBUGFS_ADD(hw_key_idx);
312 DEBUGFS_ADD(algorithm); 359 DEBUGFS_ADD(algorithm);
313 DEBUGFS_ADD(tx_spec); 360 DEBUGFS_ADD_W(tx_spec);
314 DEBUGFS_ADD(rx_spec); 361 DEBUGFS_ADD(rx_spec);
315 DEBUGFS_ADD(replays); 362 DEBUGFS_ADD(replays);
316 DEBUGFS_ADD(icverrors); 363 DEBUGFS_ADD(icverrors);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 1021e87c051f..37ea30e0754c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -114,14 +114,6 @@ static ssize_t ieee80211_if_fmt_##name( \
114 return scnprintf(buf, buflen, "%pM\n", sdata->field); \ 114 return scnprintf(buf, buflen, "%pM\n", sdata->field); \
115} 115}
116 116
117#define IEEE80211_IF_FMT_DEC_DIV_16(name, field) \
118static ssize_t ieee80211_if_fmt_##name( \
119 const struct ieee80211_sub_if_data *sdata, \
120 char *buf, int buflen) \
121{ \
122 return scnprintf(buf, buflen, "%d\n", sdata->field / 16); \
123}
124
125#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \ 117#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \
126static ssize_t ieee80211_if_fmt_##name( \ 118static ssize_t ieee80211_if_fmt_##name( \
127 const struct ieee80211_sub_if_data *sdata, \ 119 const struct ieee80211_sub_if_data *sdata, \
@@ -247,8 +239,6 @@ IEEE80211_IF_FILE_R(hw_queues);
247/* STA attributes */ 239/* STA attributes */
248IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); 240IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
249IEEE80211_IF_FILE(aid, u.mgd.aid, DEC); 241IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
250IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
251IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
252IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS); 242IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
253 243
254static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, 244static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
@@ -455,6 +445,34 @@ static ssize_t ieee80211_if_parse_uapsd_max_sp_len(
455} 445}
456IEEE80211_IF_FILE_RW(uapsd_max_sp_len); 446IEEE80211_IF_FILE_RW(uapsd_max_sp_len);
457 447
448static ssize_t ieee80211_if_fmt_tdls_wider_bw(
449 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
450{
451 const struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
452 bool tdls_wider_bw;
453
454 tdls_wider_bw = ieee80211_hw_check(&sdata->local->hw, TDLS_WIDER_BW) &&
455 !ifmgd->tdls_wider_bw_prohibited;
456
457 return snprintf(buf, buflen, "%d\n", tdls_wider_bw);
458}
459
460static ssize_t ieee80211_if_parse_tdls_wider_bw(
461 struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
462{
463 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
464 u8 val;
465 int ret;
466
467 ret = kstrtou8(buf, 0, &val);
468 if (ret)
469 return ret;
470
471 ifmgd->tdls_wider_bw_prohibited = !val;
472 return buflen;
473}
474IEEE80211_IF_FILE_RW(tdls_wider_bw);
475
458/* AP attributes */ 476/* AP attributes */
459IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC); 477IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC);
460IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC); 478IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC);
@@ -606,14 +624,13 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
606{ 624{
607 DEBUGFS_ADD(bssid); 625 DEBUGFS_ADD(bssid);
608 DEBUGFS_ADD(aid); 626 DEBUGFS_ADD(aid);
609 DEBUGFS_ADD(last_beacon);
610 DEBUGFS_ADD(ave_beacon);
611 DEBUGFS_ADD(beacon_timeout); 627 DEBUGFS_ADD(beacon_timeout);
612 DEBUGFS_ADD_MODE(smps, 0600); 628 DEBUGFS_ADD_MODE(smps, 0600);
613 DEBUGFS_ADD_MODE(tkip_mic_test, 0200); 629 DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
614 DEBUGFS_ADD_MODE(beacon_loss, 0200); 630 DEBUGFS_ADD_MODE(beacon_loss, 0200);
615 DEBUGFS_ADD_MODE(uapsd_queues, 0600); 631 DEBUGFS_ADD_MODE(uapsd_queues, 0600);
616 DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600); 632 DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600);
633 DEBUGFS_ADD_MODE(tdls_wider_bw, 0600);
617} 634}
618 635
619static void add_ap_files(struct ieee80211_sub_if_data *sdata) 636static void add_ap_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 06d52935036d..a39512f09f9e 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -50,7 +50,6 @@ static const struct file_operations sta_ ##name## _ops = { \
50 STA_OPS(name) 50 STA_OPS(name)
51 51
52STA_FILE(aid, sta.aid, D); 52STA_FILE(aid, sta.aid, D);
53STA_FILE(last_ack_signal, last_ack_signal, D);
54 53
55static ssize_t sta_flags_read(struct file *file, char __user *userbuf, 54static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
56 size_t count, loff_t *ppos) 55 size_t count, loff_t *ppos)
@@ -366,11 +365,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
366 DEBUGFS_ADD(agg_status); 365 DEBUGFS_ADD(agg_status);
367 DEBUGFS_ADD(ht_capa); 366 DEBUGFS_ADD(ht_capa);
368 DEBUGFS_ADD(vht_capa); 367 DEBUGFS_ADD(vht_capa);
369 DEBUGFS_ADD(last_ack_signal);
370 368
371 DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates); 369 DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates);
372 DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments); 370 DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments);
373 DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); 371 DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered);
374 372
375 if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) 373 if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
376 debugfs_create_x32("driver_buffered_tids", 0400, 374 debugfs_create_x32("driver_buffered_tids", 0400,
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index 267c3b1ca047..ca1fe5576103 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -1,4 +1,6 @@
1/* 1/*
2 * Copyright 2015 Intel Deutschland GmbH
3 *
2 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
@@ -8,6 +10,102 @@
8#include "trace.h" 10#include "trace.h"
9#include "driver-ops.h" 11#include "driver-ops.h"
10 12
13int drv_start(struct ieee80211_local *local)
14{
15 int ret;
16
17 might_sleep();
18
19 if (WARN_ON(local->started))
20 return -EALREADY;
21
22 trace_drv_start(local);
23 local->started = true;
24 /* allow rx frames */
25 smp_mb();
26 ret = local->ops->start(&local->hw);
27 trace_drv_return_int(local, ret);
28
29 if (ret)
30 local->started = false;
31
32 return ret;
33}
34
35void drv_stop(struct ieee80211_local *local)
36{
37 might_sleep();
38
39 if (WARN_ON(!local->started))
40 return;
41
42 trace_drv_stop(local);
43 local->ops->stop(&local->hw);
44 trace_drv_return_void(local);
45
46 /* sync away all work on the tasklet before clearing started */
47 tasklet_disable(&local->tasklet);
48 tasklet_enable(&local->tasklet);
49
50 barrier();
51
52 local->started = false;
53}
54
55int drv_add_interface(struct ieee80211_local *local,
56 struct ieee80211_sub_if_data *sdata)
57{
58 int ret;
59
60 might_sleep();
61
62 if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
63 (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
64 !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
65 !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
66 return -EINVAL;
67
68 trace_drv_add_interface(local, sdata);
69 ret = local->ops->add_interface(&local->hw, &sdata->vif);
70 trace_drv_return_int(local, ret);
71
72 if (ret == 0)
73 sdata->flags |= IEEE80211_SDATA_IN_DRIVER;
74
75 return ret;
76}
77
78int drv_change_interface(struct ieee80211_local *local,
79 struct ieee80211_sub_if_data *sdata,
80 enum nl80211_iftype type, bool p2p)
81{
82 int ret;
83
84 might_sleep();
85
86 if (!check_sdata_in_driver(sdata))
87 return -EIO;
88
89 trace_drv_change_interface(local, sdata, type, p2p);
90 ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
91 trace_drv_return_int(local, ret);
92 return ret;
93}
94
95void drv_remove_interface(struct ieee80211_local *local,
96 struct ieee80211_sub_if_data *sdata)
97{
98 might_sleep();
99
100 if (!check_sdata_in_driver(sdata))
101 return;
102
103 trace_drv_remove_interface(local, sdata);
104 local->ops->remove_interface(&local->hw, &sdata->vif);
105 sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
106 trace_drv_return_void(local);
107}
108
11__must_check 109__must_check
12int drv_sta_state(struct ieee80211_local *local, 110int drv_sta_state(struct ieee80211_local *local,
13 struct ieee80211_sub_if_data *sdata, 111 struct ieee80211_sub_if_data *sdata,
@@ -39,3 +137,173 @@ int drv_sta_state(struct ieee80211_local *local,
39 trace_drv_return_int(local, ret); 137 trace_drv_return_int(local, ret);
40 return ret; 138 return ret;
41} 139}
140
141void drv_sta_rc_update(struct ieee80211_local *local,
142 struct ieee80211_sub_if_data *sdata,
143 struct ieee80211_sta *sta, u32 changed)
144{
145 sdata = get_bss_sdata(sdata);
146 if (!check_sdata_in_driver(sdata))
147 return;
148
149 WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
150 (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
151 sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
152
153 trace_drv_sta_rc_update(local, sdata, sta, changed);
154 if (local->ops->sta_rc_update)
155 local->ops->sta_rc_update(&local->hw, &sdata->vif,
156 sta, changed);
157
158 trace_drv_return_void(local);
159}
160
161int drv_conf_tx(struct ieee80211_local *local,
162 struct ieee80211_sub_if_data *sdata, u16 ac,
163 const struct ieee80211_tx_queue_params *params)
164{
165 int ret = -EOPNOTSUPP;
166
167 might_sleep();
168
169 if (!check_sdata_in_driver(sdata))
170 return -EIO;
171
172 if (WARN_ONCE(params->cw_min == 0 ||
173 params->cw_min > params->cw_max,
174 "%s: invalid CW_min/CW_max: %d/%d\n",
175 sdata->name, params->cw_min, params->cw_max))
176 return -EINVAL;
177
178 trace_drv_conf_tx(local, sdata, ac, params);
179 if (local->ops->conf_tx)
180 ret = local->ops->conf_tx(&local->hw, &sdata->vif,
181 ac, params);
182 trace_drv_return_int(local, ret);
183 return ret;
184}
185
186u64 drv_get_tsf(struct ieee80211_local *local,
187 struct ieee80211_sub_if_data *sdata)
188{
189 u64 ret = -1ULL;
190
191 might_sleep();
192
193 if (!check_sdata_in_driver(sdata))
194 return ret;
195
196 trace_drv_get_tsf(local, sdata);
197 if (local->ops->get_tsf)
198 ret = local->ops->get_tsf(&local->hw, &sdata->vif);
199 trace_drv_return_u64(local, ret);
200 return ret;
201}
202
203void drv_set_tsf(struct ieee80211_local *local,
204 struct ieee80211_sub_if_data *sdata,
205 u64 tsf)
206{
207 might_sleep();
208
209 if (!check_sdata_in_driver(sdata))
210 return;
211
212 trace_drv_set_tsf(local, sdata, tsf);
213 if (local->ops->set_tsf)
214 local->ops->set_tsf(&local->hw, &sdata->vif, tsf);
215 trace_drv_return_void(local);
216}
217
218void drv_reset_tsf(struct ieee80211_local *local,
219 struct ieee80211_sub_if_data *sdata)
220{
221 might_sleep();
222
223 if (!check_sdata_in_driver(sdata))
224 return;
225
226 trace_drv_reset_tsf(local, sdata);
227 if (local->ops->reset_tsf)
228 local->ops->reset_tsf(&local->hw, &sdata->vif);
229 trace_drv_return_void(local);
230}
231
232int drv_switch_vif_chanctx(struct ieee80211_local *local,
233 struct ieee80211_vif_chanctx_switch *vifs,
234 int n_vifs, enum ieee80211_chanctx_switch_mode mode)
235{
236 int ret = 0;
237 int i;
238
239 might_sleep();
240
241 if (!local->ops->switch_vif_chanctx)
242 return -EOPNOTSUPP;
243
244 for (i = 0; i < n_vifs; i++) {
245 struct ieee80211_chanctx *new_ctx =
246 container_of(vifs[i].new_ctx,
247 struct ieee80211_chanctx,
248 conf);
249 struct ieee80211_chanctx *old_ctx =
250 container_of(vifs[i].old_ctx,
251 struct ieee80211_chanctx,
252 conf);
253
254 WARN_ON_ONCE(!old_ctx->driver_present);
255 WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
256 new_ctx->driver_present) ||
257 (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
258 !new_ctx->driver_present));
259 }
260
261 trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
262 ret = local->ops->switch_vif_chanctx(&local->hw,
263 vifs, n_vifs, mode);
264 trace_drv_return_int(local, ret);
265
266 if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
267 for (i = 0; i < n_vifs; i++) {
268 struct ieee80211_chanctx *new_ctx =
269 container_of(vifs[i].new_ctx,
270 struct ieee80211_chanctx,
271 conf);
272 struct ieee80211_chanctx *old_ctx =
273 container_of(vifs[i].old_ctx,
274 struct ieee80211_chanctx,
275 conf);
276
277 new_ctx->driver_present = true;
278 old_ctx->driver_present = false;
279 }
280 }
281
282 return ret;
283}
284
285int drv_ampdu_action(struct ieee80211_local *local,
286 struct ieee80211_sub_if_data *sdata,
287 enum ieee80211_ampdu_mlme_action action,
288 struct ieee80211_sta *sta, u16 tid,
289 u16 *ssn, u8 buf_size, bool amsdu)
290{
291 int ret = -EOPNOTSUPP;
292
293 might_sleep();
294
295 sdata = get_bss_sdata(sdata);
296 if (!check_sdata_in_driver(sdata))
297 return -EIO;
298
299 trace_drv_ampdu_action(local, sdata, action, sta, tid,
300 ssn, buf_size, amsdu);
301
302 if (local->ops->ampdu_action)
303 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
304 sta, tid, ssn, buf_size, amsdu);
305
306 trace_drv_return_int(local, ret);
307
308 return ret;
309}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 02d91332d7dd..154ce4b13406 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -66,36 +66,8 @@ static inline int drv_get_et_sset_count(struct ieee80211_sub_if_data *sdata,
66 return rv; 66 return rv;
67} 67}
68 68
69static inline int drv_start(struct ieee80211_local *local) 69int drv_start(struct ieee80211_local *local);
70{ 70void drv_stop(struct ieee80211_local *local);
71 int ret;
72
73 might_sleep();
74
75 trace_drv_start(local);
76 local->started = true;
77 smp_mb();
78 ret = local->ops->start(&local->hw);
79 trace_drv_return_int(local, ret);
80 return ret;
81}
82
83static inline void drv_stop(struct ieee80211_local *local)
84{
85 might_sleep();
86
87 trace_drv_stop(local);
88 local->ops->stop(&local->hw);
89 trace_drv_return_void(local);
90
91 /* sync away all work on the tasklet before clearing started */
92 tasklet_disable(&local->tasklet);
93 tasklet_enable(&local->tasklet);
94
95 barrier();
96
97 local->started = false;
98}
99 71
100#ifdef CONFIG_PM 72#ifdef CONFIG_PM
101static inline int drv_suspend(struct ieee80211_local *local, 73static inline int drv_suspend(struct ieee80211_local *local,
@@ -137,59 +109,15 @@ static inline void drv_set_wakeup(struct ieee80211_local *local,
137} 109}
138#endif 110#endif
139 111
140static inline int drv_add_interface(struct ieee80211_local *local, 112int drv_add_interface(struct ieee80211_local *local,
141 struct ieee80211_sub_if_data *sdata) 113 struct ieee80211_sub_if_data *sdata);
142{
143 int ret;
144
145 might_sleep();
146
147 if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
148 (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
149 !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
150 !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
151 return -EINVAL;
152
153 trace_drv_add_interface(local, sdata);
154 ret = local->ops->add_interface(&local->hw, &sdata->vif);
155 trace_drv_return_int(local, ret);
156
157 if (ret == 0)
158 sdata->flags |= IEEE80211_SDATA_IN_DRIVER;
159
160 return ret;
161}
162
163static inline int drv_change_interface(struct ieee80211_local *local,
164 struct ieee80211_sub_if_data *sdata,
165 enum nl80211_iftype type, bool p2p)
166{
167 int ret;
168
169 might_sleep();
170
171 if (!check_sdata_in_driver(sdata))
172 return -EIO;
173
174 trace_drv_change_interface(local, sdata, type, p2p);
175 ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
176 trace_drv_return_int(local, ret);
177 return ret;
178}
179 114
180static inline void drv_remove_interface(struct ieee80211_local *local, 115int drv_change_interface(struct ieee80211_local *local,
181 struct ieee80211_sub_if_data *sdata) 116 struct ieee80211_sub_if_data *sdata,
182{ 117 enum nl80211_iftype type, bool p2p);
183 might_sleep();
184 118
185 if (!check_sdata_in_driver(sdata)) 119void drv_remove_interface(struct ieee80211_local *local,
186 return; 120 struct ieee80211_sub_if_data *sdata);
187
188 trace_drv_remove_interface(local, sdata);
189 local->ops->remove_interface(&local->hw, &sdata->vif);
190 sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
191 trace_drv_return_void(local);
192}
193 121
194static inline int drv_config(struct ieee80211_local *local, u32 changed) 122static inline int drv_config(struct ieee80211_local *local, u32 changed)
195{ 123{
@@ -260,6 +188,22 @@ static inline void drv_configure_filter(struct ieee80211_local *local,
260 trace_drv_return_void(local); 188 trace_drv_return_void(local);
261} 189}
262 190
191static inline void drv_config_iface_filter(struct ieee80211_local *local,
192 struct ieee80211_sub_if_data *sdata,
193 unsigned int filter_flags,
194 unsigned int changed_flags)
195{
196 might_sleep();
197
198 trace_drv_config_iface_filter(local, sdata, filter_flags,
199 changed_flags);
200 if (local->ops->config_iface_filter)
201 local->ops->config_iface_filter(&local->hw, &sdata->vif,
202 filter_flags,
203 changed_flags);
204 trace_drv_return_void(local);
205}
206
263static inline int drv_set_tim(struct ieee80211_local *local, 207static inline int drv_set_tim(struct ieee80211_local *local,
264 struct ieee80211_sta *sta, bool set) 208 struct ieee80211_sta *sta, bool set)
265{ 209{
@@ -580,25 +524,9 @@ int drv_sta_state(struct ieee80211_local *local,
580 enum ieee80211_sta_state old_state, 524 enum ieee80211_sta_state old_state,
581 enum ieee80211_sta_state new_state); 525 enum ieee80211_sta_state new_state);
582 526
583static inline void drv_sta_rc_update(struct ieee80211_local *local, 527void drv_sta_rc_update(struct ieee80211_local *local,
584 struct ieee80211_sub_if_data *sdata, 528 struct ieee80211_sub_if_data *sdata,
585 struct ieee80211_sta *sta, u32 changed) 529 struct ieee80211_sta *sta, u32 changed);
586{
587 sdata = get_bss_sdata(sdata);
588 if (!check_sdata_in_driver(sdata))
589 return;
590
591 WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
592 (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
593 sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
594
595 trace_drv_sta_rc_update(local, sdata, sta, changed);
596 if (local->ops->sta_rc_update)
597 local->ops->sta_rc_update(&local->hw, &sdata->vif,
598 sta, changed);
599
600 trace_drv_return_void(local);
601}
602 530
603static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local, 531static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local,
604 struct ieee80211_sub_if_data *sdata, 532 struct ieee80211_sub_if_data *sdata,
@@ -630,76 +558,17 @@ static inline void drv_sta_statistics(struct ieee80211_local *local,
630 trace_drv_return_void(local); 558 trace_drv_return_void(local);
631} 559}
632 560
633static inline int drv_conf_tx(struct ieee80211_local *local, 561int drv_conf_tx(struct ieee80211_local *local,
634 struct ieee80211_sub_if_data *sdata, u16 ac, 562 struct ieee80211_sub_if_data *sdata, u16 ac,
635 const struct ieee80211_tx_queue_params *params) 563 const struct ieee80211_tx_queue_params *params);
636{
637 int ret = -EOPNOTSUPP;
638 564
639 might_sleep(); 565u64 drv_get_tsf(struct ieee80211_local *local,
640 566 struct ieee80211_sub_if_data *sdata);
641 if (!check_sdata_in_driver(sdata)) 567void drv_set_tsf(struct ieee80211_local *local,
642 return -EIO; 568 struct ieee80211_sub_if_data *sdata,
643 569 u64 tsf);
644 if (WARN_ONCE(params->cw_min == 0 || 570void drv_reset_tsf(struct ieee80211_local *local,
645 params->cw_min > params->cw_max, 571 struct ieee80211_sub_if_data *sdata);
646 "%s: invalid CW_min/CW_max: %d/%d\n",
647 sdata->name, params->cw_min, params->cw_max))
648 return -EINVAL;
649
650 trace_drv_conf_tx(local, sdata, ac, params);
651 if (local->ops->conf_tx)
652 ret = local->ops->conf_tx(&local->hw, &sdata->vif,
653 ac, params);
654 trace_drv_return_int(local, ret);
655 return ret;
656}
657
658static inline u64 drv_get_tsf(struct ieee80211_local *local,
659 struct ieee80211_sub_if_data *sdata)
660{
661 u64 ret = -1ULL;
662
663 might_sleep();
664
665 if (!check_sdata_in_driver(sdata))
666 return ret;
667
668 trace_drv_get_tsf(local, sdata);
669 if (local->ops->get_tsf)
670 ret = local->ops->get_tsf(&local->hw, &sdata->vif);
671 trace_drv_return_u64(local, ret);
672 return ret;
673}
674
675static inline void drv_set_tsf(struct ieee80211_local *local,
676 struct ieee80211_sub_if_data *sdata,
677 u64 tsf)
678{
679 might_sleep();
680
681 if (!check_sdata_in_driver(sdata))
682 return;
683
684 trace_drv_set_tsf(local, sdata, tsf);
685 if (local->ops->set_tsf)
686 local->ops->set_tsf(&local->hw, &sdata->vif, tsf);
687 trace_drv_return_void(local);
688}
689
690static inline void drv_reset_tsf(struct ieee80211_local *local,
691 struct ieee80211_sub_if_data *sdata)
692{
693 might_sleep();
694
695 if (!check_sdata_in_driver(sdata))
696 return;
697
698 trace_drv_reset_tsf(local, sdata);
699 if (local->ops->reset_tsf)
700 local->ops->reset_tsf(&local->hw, &sdata->vif);
701 trace_drv_return_void(local);
702}
703 572
704static inline int drv_tx_last_beacon(struct ieee80211_local *local) 573static inline int drv_tx_last_beacon(struct ieee80211_local *local)
705{ 574{
@@ -714,30 +583,11 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
714 return ret; 583 return ret;
715} 584}
716 585
717static inline int drv_ampdu_action(struct ieee80211_local *local, 586int drv_ampdu_action(struct ieee80211_local *local,
718 struct ieee80211_sub_if_data *sdata, 587 struct ieee80211_sub_if_data *sdata,
719 enum ieee80211_ampdu_mlme_action action, 588 enum ieee80211_ampdu_mlme_action action,
720 struct ieee80211_sta *sta, u16 tid, 589 struct ieee80211_sta *sta, u16 tid,
721 u16 *ssn, u8 buf_size) 590 u16 *ssn, u8 buf_size, bool amsdu);
722{
723 int ret = -EOPNOTSUPP;
724
725 might_sleep();
726
727 sdata = get_bss_sdata(sdata);
728 if (!check_sdata_in_driver(sdata))
729 return -EIO;
730
731 trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
732
733 if (local->ops->ampdu_action)
734 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
735 sta, tid, ssn, buf_size);
736
737 trace_drv_return_int(local, ret);
738
739 return ret;
740}
741 591
742static inline int drv_get_survey(struct ieee80211_local *local, int idx, 592static inline int drv_get_survey(struct ieee80211_local *local, int idx,
743 struct survey_info *survey) 593 struct survey_info *survey)
@@ -993,6 +843,8 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
993{ 843{
994 int ret = -EOPNOTSUPP; 844 int ret = -EOPNOTSUPP;
995 845
846 might_sleep();
847
996 trace_drv_add_chanctx(local, ctx); 848 trace_drv_add_chanctx(local, ctx);
997 if (local->ops->add_chanctx) 849 if (local->ops->add_chanctx)
998 ret = local->ops->add_chanctx(&local->hw, &ctx->conf); 850 ret = local->ops->add_chanctx(&local->hw, &ctx->conf);
@@ -1006,6 +858,8 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
1006static inline void drv_remove_chanctx(struct ieee80211_local *local, 858static inline void drv_remove_chanctx(struct ieee80211_local *local,
1007 struct ieee80211_chanctx *ctx) 859 struct ieee80211_chanctx *ctx)
1008{ 860{
861 might_sleep();
862
1009 if (WARN_ON(!ctx->driver_present)) 863 if (WARN_ON(!ctx->driver_present))
1010 return; 864 return;
1011 865
@@ -1020,6 +874,8 @@ static inline void drv_change_chanctx(struct ieee80211_local *local,
1020 struct ieee80211_chanctx *ctx, 874 struct ieee80211_chanctx *ctx,
1021 u32 changed) 875 u32 changed)
1022{ 876{
877 might_sleep();
878
1023 trace_drv_change_chanctx(local, ctx, changed); 879 trace_drv_change_chanctx(local, ctx, changed);
1024 if (local->ops->change_chanctx) { 880 if (local->ops->change_chanctx) {
1025 WARN_ON_ONCE(!ctx->driver_present); 881 WARN_ON_ONCE(!ctx->driver_present);
@@ -1053,6 +909,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
1053 struct ieee80211_sub_if_data *sdata, 909 struct ieee80211_sub_if_data *sdata,
1054 struct ieee80211_chanctx *ctx) 910 struct ieee80211_chanctx *ctx)
1055{ 911{
912 might_sleep();
913
1056 if (!check_sdata_in_driver(sdata)) 914 if (!check_sdata_in_driver(sdata))
1057 return; 915 return;
1058 916
@@ -1066,64 +924,17 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
1066 trace_drv_return_void(local); 924 trace_drv_return_void(local);
1067} 925}
1068 926
1069static inline int 927int drv_switch_vif_chanctx(struct ieee80211_local *local,
1070drv_switch_vif_chanctx(struct ieee80211_local *local, 928 struct ieee80211_vif_chanctx_switch *vifs,
1071 struct ieee80211_vif_chanctx_switch *vifs, 929 int n_vifs, enum ieee80211_chanctx_switch_mode mode);
1072 int n_vifs,
1073 enum ieee80211_chanctx_switch_mode mode)
1074{
1075 int ret = 0;
1076 int i;
1077
1078 if (!local->ops->switch_vif_chanctx)
1079 return -EOPNOTSUPP;
1080
1081 for (i = 0; i < n_vifs; i++) {
1082 struct ieee80211_chanctx *new_ctx =
1083 container_of(vifs[i].new_ctx,
1084 struct ieee80211_chanctx,
1085 conf);
1086 struct ieee80211_chanctx *old_ctx =
1087 container_of(vifs[i].old_ctx,
1088 struct ieee80211_chanctx,
1089 conf);
1090
1091 WARN_ON_ONCE(!old_ctx->driver_present);
1092 WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
1093 new_ctx->driver_present) ||
1094 (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
1095 !new_ctx->driver_present));
1096 }
1097
1098 trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
1099 ret = local->ops->switch_vif_chanctx(&local->hw,
1100 vifs, n_vifs, mode);
1101 trace_drv_return_int(local, ret);
1102
1103 if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
1104 for (i = 0; i < n_vifs; i++) {
1105 struct ieee80211_chanctx *new_ctx =
1106 container_of(vifs[i].new_ctx,
1107 struct ieee80211_chanctx,
1108 conf);
1109 struct ieee80211_chanctx *old_ctx =
1110 container_of(vifs[i].old_ctx,
1111 struct ieee80211_chanctx,
1112 conf);
1113
1114 new_ctx->driver_present = true;
1115 old_ctx->driver_present = false;
1116 }
1117 }
1118
1119 return ret;
1120}
1121 930
1122static inline int drv_start_ap(struct ieee80211_local *local, 931static inline int drv_start_ap(struct ieee80211_local *local,
1123 struct ieee80211_sub_if_data *sdata) 932 struct ieee80211_sub_if_data *sdata)
1124{ 933{
1125 int ret = 0; 934 int ret = 0;
1126 935
936 might_sleep();
937
1127 if (!check_sdata_in_driver(sdata)) 938 if (!check_sdata_in_driver(sdata))
1128 return -EIO; 939 return -EIO;
1129 940
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index 188faab11c24..9cc986deda61 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -40,7 +40,7 @@ static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
40 "rx_duplicates", "rx_fragments", "rx_dropped", 40 "rx_duplicates", "rx_fragments", "rx_dropped",
41 "tx_packets", "tx_bytes", 41 "tx_packets", "tx_bytes",
42 "tx_filtered", "tx_retry_failed", "tx_retries", 42 "tx_filtered", "tx_retry_failed", "tx_retries",
43 "beacon_loss", "sta_state", "txrate", "rxrate", "signal", 43 "sta_state", "txrate", "rxrate", "signal",
44 "channel", "noise", "ch_time", "ch_time_busy", 44 "channel", "noise", "ch_time", "ch_time_busy",
45 "ch_time_ext_busy", "ch_time_rx", "ch_time_tx" 45 "ch_time_ext_busy", "ch_time_rx", "ch_time_tx"
46}; 46};
@@ -77,20 +77,19 @@ static void ieee80211_get_stats(struct net_device *dev,
77 77
78 memset(data, 0, sizeof(u64) * STA_STATS_LEN); 78 memset(data, 0, sizeof(u64) * STA_STATS_LEN);
79 79
80#define ADD_STA_STATS(sta) \ 80#define ADD_STA_STATS(sta) \
81 do { \ 81 do { \
82 data[i++] += sta->rx_packets; \ 82 data[i++] += sta->rx_stats.packets; \
83 data[i++] += sta->rx_bytes; \ 83 data[i++] += sta->rx_stats.bytes; \
84 data[i++] += sta->num_duplicates; \ 84 data[i++] += sta->rx_stats.num_duplicates; \
85 data[i++] += sta->rx_fragments; \ 85 data[i++] += sta->rx_stats.fragments; \
86 data[i++] += sta->rx_dropped; \ 86 data[i++] += sta->rx_stats.dropped; \
87 \ 87 \
88 data[i++] += sinfo.tx_packets; \ 88 data[i++] += sinfo.tx_packets; \
89 data[i++] += sinfo.tx_bytes; \ 89 data[i++] += sinfo.tx_bytes; \
90 data[i++] += sta->tx_filtered_count; \ 90 data[i++] += sta->status_stats.filtered; \
91 data[i++] += sta->tx_retry_failed; \ 91 data[i++] += sta->status_stats.retry_failed; \
92 data[i++] += sta->tx_retry_count; \ 92 data[i++] += sta->status_stats.retry_count; \
93 data[i++] += sta->beacon_loss_count; \
94 } while (0) 93 } while (0)
95 94
96 /* For Managed stations, find the single station based on BSSID 95 /* For Managed stations, find the single station based on BSSID
diff --git a/net/mac80211/event.c b/net/mac80211/event.c
deleted file mode 100644
index 01ae759518f6..000000000000
--- a/net/mac80211/event.c
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * mac80211 - events
9 */
10#include <net/cfg80211.h>
11#include "ieee80211_i.h"
12
13/*
14 * Indicate a failed Michael MIC to userspace. If the caller knows the TSC of
15 * the frame that generated the MIC failure (i.e., if it was provided by the
16 * driver or is still in the frame), it should provide that information.
17 */
18void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
19 struct ieee80211_hdr *hdr, const u8 *tsc,
20 gfp_t gfp)
21{
22 cfg80211_michael_mic_failure(sdata->dev, hdr->addr2,
23 (hdr->addr1[0] & 0x01) ?
24 NL80211_KEYTYPE_GROUP :
25 NL80211_KEYTYPE_PAIRWISE,
26 keyidx, tsc, gfp);
27}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 7f72bc9bae2e..337bb5d78003 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -188,7 +188,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
188 * keep them at 0 188 * keep them at 0
189 */ 189 */
190 pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap, 190 pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap,
191 chandef, 0); 191 chandef, 0, false);
192 192
193 /* add VHT capability and information IEs */ 193 /* add VHT capability and information IEs */
194 if (chandef->width != NL80211_CHAN_WIDTH_20 && 194 if (chandef->width != NL80211_CHAN_WIDTH_20 &&
@@ -229,7 +229,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
229 struct cfg80211_chan_def chandef; 229 struct cfg80211_chan_def chandef;
230 struct ieee80211_channel *chan; 230 struct ieee80211_channel *chan;
231 struct beacon_data *presp; 231 struct beacon_data *presp;
232 enum nl80211_bss_scan_width scan_width; 232 struct cfg80211_inform_bss bss_meta = {};
233 bool have_higher_than_11mbit; 233 bool have_higher_than_11mbit;
234 bool radar_required; 234 bool radar_required;
235 int err; 235 int err;
@@ -356,7 +356,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
356 else 356 else
357 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; 357 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
358 358
359 ieee80211_set_wmm_default(sdata, true); 359 ieee80211_set_wmm_default(sdata, true, false);
360 360
361 sdata->vif.bss_conf.ibss_joined = true; 361 sdata->vif.bss_conf.ibss_joined = true;
362 sdata->vif.bss_conf.ibss_creator = creator; 362 sdata->vif.bss_conf.ibss_creator = creator;
@@ -383,10 +383,11 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
383 mod_timer(&ifibss->timer, 383 mod_timer(&ifibss->timer,
384 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); 384 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
385 385
386 scan_width = cfg80211_chandef_to_scan_width(&chandef); 386 bss_meta.chan = chan;
387 bss = cfg80211_inform_bss_width_frame(local->hw.wiphy, chan, 387 bss_meta.scan_width = cfg80211_chandef_to_scan_width(&chandef);
388 scan_width, mgmt, 388 bss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, mgmt,
389 presp->head_len, 0, GFP_KERNEL); 389 presp->head_len, GFP_KERNEL);
390
390 cfg80211_put_bss(local->hw.wiphy, bss); 391 cfg80211_put_bss(local->hw.wiphy, bss);
391 netif_carrier_on(sdata->dev); 392 netif_carrier_on(sdata->dev);
392 cfg80211_ibss_joined(sdata->dev, ifibss->bssid, chan, GFP_KERNEL); 393 cfg80211_ibss_joined(sdata->dev, ifibss->bssid, chan, GFP_KERNEL);
@@ -646,7 +647,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
646 return NULL; 647 return NULL;
647 } 648 }
648 649
649 sta->last_rx = jiffies; 650 sta->rx_stats.last_rx = jiffies;
650 651
651 /* make sure mandatory rates are always added */ 652 /* make sure mandatory rates are always added */
652 sband = local->hw.wiphy->bands[band]; 653 sband = local->hw.wiphy->bands[band];
@@ -668,7 +669,8 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
668 669
669 list_for_each_entry_rcu(sta, &local->sta_list, list) { 670 list_for_each_entry_rcu(sta, &local->sta_list, list) {
670 if (sta->sdata == sdata && 671 if (sta->sdata == sdata &&
671 time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, 672 time_after(sta->rx_stats.last_rx +
673 IEEE80211_IBSS_MERGE_INTERVAL,
672 jiffies)) { 674 jiffies)) {
673 active++; 675 active++;
674 break; 676 break;
@@ -1234,7 +1236,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
1234 if (!sta) 1236 if (!sta)
1235 return; 1237 return;
1236 1238
1237 sta->last_rx = jiffies; 1239 sta->rx_stats.last_rx = jiffies;
1238 1240
1239 /* make sure mandatory rates are always added */ 1241 /* make sure mandatory rates are always added */
1240 sband = local->hw.wiphy->bands[band]; 1242 sband = local->hw.wiphy->bands[band];
@@ -1252,7 +1254,7 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
1252 struct ieee80211_local *local = sdata->local; 1254 struct ieee80211_local *local = sdata->local;
1253 struct sta_info *sta, *tmp; 1255 struct sta_info *sta, *tmp;
1254 unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT; 1256 unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT;
1255 unsigned long exp_rsn_time = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT; 1257 unsigned long exp_rsn = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT;
1256 1258
1257 mutex_lock(&local->sta_mtx); 1259 mutex_lock(&local->sta_mtx);
1258 1260
@@ -1260,8 +1262,8 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
1260 if (sdata != sta->sdata) 1262 if (sdata != sta->sdata)
1261 continue; 1263 continue;
1262 1264
1263 if (time_after(jiffies, sta->last_rx + exp_time) || 1265 if (time_after(jiffies, sta->rx_stats.last_rx + exp_time) ||
1264 (time_after(jiffies, sta->last_rx + exp_rsn_time) && 1266 (time_after(jiffies, sta->rx_stats.last_rx + exp_rsn) &&
1265 sta->sta_state != IEEE80211_STA_AUTHORIZED)) { 1267 sta->sta_state != IEEE80211_STA_AUTHORIZED)) {
1266 sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n", 1268 sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n",
1267 sta->sta_state != IEEE80211_STA_AUTHORIZED ? 1269 sta->sta_state != IEEE80211_STA_AUTHORIZED ?
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6e52659f923f..d832bd59236b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -34,6 +34,8 @@
34#include "sta_info.h" 34#include "sta_info.h"
35#include "debug.h" 35#include "debug.h"
36 36
37extern const struct cfg80211_ops mac80211_config_ops;
38
37struct ieee80211_local; 39struct ieee80211_local;
38 40
39/* Maximum number of broadcast/multicast frames to buffer when some of the 41/* Maximum number of broadcast/multicast frames to buffer when some of the
@@ -419,6 +421,8 @@ struct ieee80211_sta_tx_tspec {
419 bool downgraded; 421 bool downgraded;
420}; 422};
421 423
424DECLARE_EWMA(beacon_signal, 16, 4)
425
422struct ieee80211_if_managed { 426struct ieee80211_if_managed {
423 struct timer_list timer; 427 struct timer_list timer;
424 struct timer_list conn_mon_timer; 428 struct timer_list conn_mon_timer;
@@ -490,16 +494,7 @@ struct ieee80211_if_managed {
490 494
491 s16 p2p_noa_index; 495 s16 p2p_noa_index;
492 496
493 /* Signal strength from the last Beacon frame in the current BSS. */ 497 struct ewma_beacon_signal ave_beacon_signal;
494 int last_beacon_signal;
495
496 /*
497 * Weighted average of the signal strength from Beacon frames in the
498 * current BSS. This is in units of 1/16 of the signal unit to maintain
499 * accuracy and to speed up calculations, i.e., the value need to be
500 * divided by 16 to get the actual value.
501 */
502 int ave_beacon_signal;
503 498
504 /* 499 /*
505 * Number of Beacon frames used in ave_beacon_signal. This can be used 500 * Number of Beacon frames used in ave_beacon_signal. This can be used
@@ -508,6 +503,9 @@ struct ieee80211_if_managed {
508 */ 503 */
509 unsigned int count_beacon_signal; 504 unsigned int count_beacon_signal;
510 505
506 /* Number of times beacon loss was invoked. */
507 unsigned int beacon_loss_count;
508
511 /* 509 /*
512 * Last Beacon frame signal strength average (ave_beacon_signal / 16) 510 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
513 * that triggered a cqm event. 0 indicates that no event has been 511 * that triggered a cqm event. 0 indicates that no event has been
@@ -535,6 +533,7 @@ struct ieee80211_if_managed {
535 struct sk_buff *teardown_skb; /* A copy to send through the AP */ 533 struct sk_buff *teardown_skb; /* A copy to send through the AP */
536 spinlock_t teardown_lock; /* To lock changing teardown_skb */ 534 spinlock_t teardown_lock; /* To lock changing teardown_skb */
537 bool tdls_chan_switch_prohibited; 535 bool tdls_chan_switch_prohibited;
536 bool tdls_wider_bw_prohibited;
538 537
539 /* WMM-AC TSPEC support */ 538 /* WMM-AC TSPEC support */
540 struct ieee80211_sta_tx_tspec tx_tspec[IEEE80211_NUM_ACS]; 539 struct ieee80211_sta_tx_tspec tx_tspec[IEEE80211_NUM_ACS];
@@ -1311,7 +1310,6 @@ struct ieee80211_local {
1311 struct work_struct dynamic_ps_enable_work; 1310 struct work_struct dynamic_ps_enable_work;
1312 struct work_struct dynamic_ps_disable_work; 1311 struct work_struct dynamic_ps_disable_work;
1313 struct timer_list dynamic_ps_timer; 1312 struct timer_list dynamic_ps_timer;
1314 struct notifier_block network_latency_notifier;
1315 struct notifier_block ifa_notifier; 1313 struct notifier_block ifa_notifier;
1316 struct notifier_block ifa6_notifier; 1314 struct notifier_block ifa6_notifier;
1317 1315
@@ -1497,10 +1495,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
1497 struct cfg80211_disassoc_request *req); 1495 struct cfg80211_disassoc_request *req);
1498void ieee80211_send_pspoll(struct ieee80211_local *local, 1496void ieee80211_send_pspoll(struct ieee80211_local *local,
1499 struct ieee80211_sub_if_data *sdata); 1497 struct ieee80211_sub_if_data *sdata);
1500void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); 1498void ieee80211_recalc_ps(struct ieee80211_local *local);
1501void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); 1499void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
1502int ieee80211_max_network_latency(struct notifier_block *nb,
1503 unsigned long data, void *dummy);
1504int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); 1500int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
1505void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); 1501void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
1506void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1502void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -1577,7 +1573,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
1577 struct cfg80211_sched_scan_request *req); 1573 struct cfg80211_sched_scan_request *req);
1578int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, 1574int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
1579 struct cfg80211_sched_scan_request *req); 1575 struct cfg80211_sched_scan_request *req);
1580int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); 1576int ieee80211_request_sched_scan_stop(struct ieee80211_local *local);
1581void ieee80211_sched_scan_end(struct ieee80211_local *local); 1577void ieee80211_sched_scan_end(struct ieee80211_local *local);
1582void ieee80211_sched_scan_stopped_work(struct work_struct *work); 1578void ieee80211_sched_scan_stopped_work(struct work_struct *work);
1583 1579
@@ -1641,6 +1637,9 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
1641struct sk_buff * 1637struct sk_buff *
1642ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, 1638ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
1643 struct sk_buff *skb, u32 info_flags); 1639 struct sk_buff *skb, u32 info_flags);
1640void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
1641 struct ieee80211_supported_band *sband,
1642 int retry_count, int shift, bool send_to_cooked);
1644 1643
1645void ieee80211_check_fast_xmit(struct sta_info *sta); 1644void ieee80211_check_fast_xmit(struct sta_info *sta);
1646void ieee80211_check_fast_xmit_all(struct ieee80211_local *local); 1645void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
@@ -1769,11 +1768,8 @@ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
1769int ieee80211_frame_duration(enum ieee80211_band band, size_t len, 1768int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
1770 int rate, int erp, int short_preamble, 1769 int rate, int erp, int short_preamble,
1771 int shift); 1770 int shift);
1772void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
1773 struct ieee80211_hdr *hdr, const u8 *tsc,
1774 gfp_t gfp);
1775void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, 1771void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
1776 bool bss_notify); 1772 bool bss_notify, bool enable_qos);
1777void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, 1773void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
1778 struct sta_info *sta, struct sk_buff *skb); 1774 struct sta_info *sta, struct sk_buff *skb);
1779 1775
@@ -1853,7 +1849,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
1853void ieee80211_dynamic_ps_timer(unsigned long data); 1849void ieee80211_dynamic_ps_timer(unsigned long data);
1854void ieee80211_send_nullfunc(struct ieee80211_local *local, 1850void ieee80211_send_nullfunc(struct ieee80211_local *local,
1855 struct ieee80211_sub_if_data *sdata, 1851 struct ieee80211_sub_if_data *sdata,
1856 int powersave); 1852 bool powersave);
1857void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 1853void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1858 struct ieee80211_hdr *hdr); 1854 struct ieee80211_hdr *hdr);
1859void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, 1855void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
@@ -1966,7 +1962,7 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
1966 u16 cap); 1962 u16 cap);
1967u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, 1963u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
1968 const struct cfg80211_chan_def *chandef, 1964 const struct cfg80211_chan_def *chandef,
1969 u16 prot_mode); 1965 u16 prot_mode, bool rifs_mode);
1970u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, 1966u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
1971 u32 cap); 1967 u32 cap);
1972u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, 1968u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6964fc6a8ea2..d0dc1bfaeec2 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -661,11 +661,13 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
661 } 661 }
662 662
663 /* 663 /*
664 * set default queue parameters so drivers don't 664 * Set default queue parameters so drivers don't
665 * need to initialise the hardware if the hardware 665 * need to initialise the hardware if the hardware
666 * doesn't start up with sane defaults 666 * doesn't start up with sane defaults.
667 * Enable QoS for anything but station interfaces.
667 */ 668 */
668 ieee80211_set_wmm_default(sdata, true); 669 ieee80211_set_wmm_default(sdata, true,
670 sdata->vif.type != NL80211_IFTYPE_STATION);
669 } 671 }
670 672
671 set_bit(SDATA_STATE_RUNNING, &sdata->state); 673 set_bit(SDATA_STATE_RUNNING, &sdata->state);
@@ -709,7 +711,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
709 if (hw_reconf_flags) 711 if (hw_reconf_flags)
710 ieee80211_hw_config(local, hw_reconf_flags); 712 ieee80211_hw_config(local, hw_reconf_flags);
711 713
712 ieee80211_recalc_ps(local, -1); 714 ieee80211_recalc_ps(local);
713 715
714 if (sdata->vif.type == NL80211_IFTYPE_MONITOR || 716 if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
715 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { 717 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
@@ -1016,7 +1018,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
1016 drv_remove_interface(local, sdata); 1018 drv_remove_interface(local, sdata);
1017 } 1019 }
1018 1020
1019 ieee80211_recalc_ps(local, -1); 1021 ieee80211_recalc_ps(local);
1020 1022
1021 if (cancel_scan) 1023 if (cancel_scan)
1022 flush_delayed_work(&local->scan_work); 1024 flush_delayed_work(&local->scan_work);
@@ -1204,7 +1206,7 @@ static void ieee80211_iface_work(struct work_struct *work)
1204 if (!ieee80211_sdata_running(sdata)) 1206 if (!ieee80211_sdata_running(sdata))
1205 return; 1207 return;
1206 1208
1207 if (local->scanning) 1209 if (test_bit(SCAN_SW_SCANNING, &local->scanning))
1208 return; 1210 return;
1209 1211
1210 if (!ieee80211_can_run_worker(local)) 1212 if (!ieee80211_can_run_worker(local))
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ff79a13d231d..858f6b1cb149 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -20,7 +20,6 @@
20#include <linux/if_arp.h> 20#include <linux/if_arp.h>
21#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
22#include <linux/bitmap.h> 22#include <linux/bitmap.h>
23#include <linux/pm_qos.h>
24#include <linux/inetdevice.h> 23#include <linux/inetdevice.h>
25#include <net/net_namespace.h> 24#include <net/net_namespace.h>
26#include <net/cfg80211.h> 25#include <net/cfg80211.h>
@@ -32,7 +31,6 @@
32#include "mesh.h" 31#include "mesh.h"
33#include "wep.h" 32#include "wep.h"
34#include "led.h" 33#include "led.h"
35#include "cfg.h"
36#include "debugfs.h" 34#include "debugfs.h"
37 35
38void ieee80211_configure_filter(struct ieee80211_local *local) 36void ieee80211_configure_filter(struct ieee80211_local *local)
@@ -283,7 +281,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
283 local->in_reconfig = true; 281 local->in_reconfig = true;
284 barrier(); 282 barrier();
285 283
286 schedule_work(&local->restart_work); 284 queue_work(system_freezable_wq, &local->restart_work);
287} 285}
288EXPORT_SYMBOL(ieee80211_restart_hw); 286EXPORT_SYMBOL(ieee80211_restart_hw);
289 287
@@ -543,7 +541,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
543 NL80211_FEATURE_HT_IBSS | 541 NL80211_FEATURE_HT_IBSS |
544 NL80211_FEATURE_VIF_TXPOWER | 542 NL80211_FEATURE_VIF_TXPOWER |
545 NL80211_FEATURE_MAC_ON_CREATE | 543 NL80211_FEATURE_MAC_ON_CREATE |
546 NL80211_FEATURE_USERSPACE_MPM; 544 NL80211_FEATURE_USERSPACE_MPM |
545 NL80211_FEATURE_FULL_AP_CLIENT_STATE;
547 546
548 if (!ops->hw_scan) 547 if (!ops->hw_scan)
549 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | 548 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -1082,13 +1081,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1082 1081
1083 rtnl_unlock(); 1082 rtnl_unlock();
1084 1083
1085 local->network_latency_notifier.notifier_call =
1086 ieee80211_max_network_latency;
1087 result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
1088 &local->network_latency_notifier);
1089 if (result)
1090 goto fail_pm_qos;
1091
1092#ifdef CONFIG_INET 1084#ifdef CONFIG_INET
1093 local->ifa_notifier.notifier_call = ieee80211_ifa_changed; 1085 local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
1094 result = register_inetaddr_notifier(&local->ifa_notifier); 1086 result = register_inetaddr_notifier(&local->ifa_notifier);
@@ -1113,10 +1105,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1113#endif 1105#endif
1114#if defined(CONFIG_INET) || defined(CONFIG_IPV6) 1106#if defined(CONFIG_INET) || defined(CONFIG_IPV6)
1115 fail_ifa: 1107 fail_ifa:
1116 pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
1117 &local->network_latency_notifier);
1118#endif 1108#endif
1119 fail_pm_qos:
1120 rtnl_lock(); 1109 rtnl_lock();
1121 rate_control_deinitialize(local); 1110 rate_control_deinitialize(local);
1122 ieee80211_remove_interfaces(local); 1111 ieee80211_remove_interfaces(local);
@@ -1142,8 +1131,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
1142 tasklet_kill(&local->tx_pending_tasklet); 1131 tasklet_kill(&local->tx_pending_tasklet);
1143 tasklet_kill(&local->tasklet); 1132 tasklet_kill(&local->tasklet);
1144 1133
1145 pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
1146 &local->network_latency_notifier);
1147#ifdef CONFIG_INET 1134#ifdef CONFIG_INET
1148 unregister_inetaddr_notifier(&local->ifa_notifier); 1135 unregister_inetaddr_notifier(&local->ifa_notifier);
1149#endif 1136#endif
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index e06a5ca7c9a9..fa28500f28fd 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -94,6 +94,9 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
94 ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, 94 ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan,
95 ie->ht_operation, &sta_chan_def); 95 ie->ht_operation, &sta_chan_def);
96 96
97 ieee80211_vht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan,
98 ie->vht_operation, &sta_chan_def);
99
97 if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, 100 if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
98 &sta_chan_def)) 101 &sta_chan_def))
99 return false; 102 return false;
@@ -436,8 +439,6 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
436 struct ieee80211_local *local = sdata->local; 439 struct ieee80211_local *local = sdata->local;
437 struct ieee80211_chanctx_conf *chanctx_conf; 440 struct ieee80211_chanctx_conf *chanctx_conf;
438 struct ieee80211_channel *channel; 441 struct ieee80211_channel *channel;
439 enum nl80211_channel_type channel_type =
440 cfg80211_get_chandef_type(&sdata->vif.bss_conf.chandef);
441 struct ieee80211_supported_band *sband; 442 struct ieee80211_supported_band *sband;
442 struct ieee80211_sta_ht_cap *ht_cap; 443 struct ieee80211_sta_ht_cap *ht_cap;
443 u8 *pos; 444 u8 *pos;
@@ -454,7 +455,10 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
454 sband = local->hw.wiphy->bands[channel->band]; 455 sband = local->hw.wiphy->bands[channel->band];
455 ht_cap = &sband->ht_cap; 456 ht_cap = &sband->ht_cap;
456 457
457 if (!ht_cap->ht_supported || channel_type == NL80211_CHAN_NO_HT) 458 if (!ht_cap->ht_supported ||
459 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
460 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
461 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
458 return 0; 462 return 0;
459 463
460 if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_operation)) 464 if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_operation))
@@ -462,7 +466,70 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
462 466
463 pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); 467 pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
464 ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chandef, 468 ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chandef,
465 sdata->vif.bss_conf.ht_operation_mode); 469 sdata->vif.bss_conf.ht_operation_mode,
470 false);
471
472 return 0;
473}
474
475int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
476 struct sk_buff *skb)
477{
478 struct ieee80211_local *local = sdata->local;
479 enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
480 struct ieee80211_supported_band *sband;
481 u8 *pos;
482
483 sband = local->hw.wiphy->bands[band];
484 if (!sband->vht_cap.vht_supported ||
485 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
486 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
487 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
488 return 0;
489
490 if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap))
491 return -ENOMEM;
492
493 pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_cap));
494 ieee80211_ie_build_vht_cap(pos, &sband->vht_cap, sband->vht_cap.cap);
495
496 return 0;
497}
498
499int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
500 struct sk_buff *skb)
501{
502 struct ieee80211_local *local = sdata->local;
503 struct ieee80211_chanctx_conf *chanctx_conf;
504 struct ieee80211_channel *channel;
505 struct ieee80211_supported_band *sband;
506 struct ieee80211_sta_vht_cap *vht_cap;
507 u8 *pos;
508
509 rcu_read_lock();
510 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
511 if (WARN_ON(!chanctx_conf)) {
512 rcu_read_unlock();
513 return -EINVAL;
514 }
515 channel = chanctx_conf->def.chan;
516 rcu_read_unlock();
517
518 sband = local->hw.wiphy->bands[channel->band];
519 vht_cap = &sband->vht_cap;
520
521 if (!vht_cap->vht_supported ||
522 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
523 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
524 sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
525 return 0;
526
527 if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_operation))
528 return -ENOMEM;
529
530 pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
531 ieee80211_ie_build_vht_oper(pos, vht_cap,
532 &sdata->vif.bss_conf.chandef);
466 533
467 return 0; 534 return 0;
468} 535}
@@ -540,9 +607,9 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
540 * 607 *
541 * Return the header length. 608 * Return the header length.
542 */ 609 */
543int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, 610unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
544 struct ieee80211s_hdr *meshhdr, 611 struct ieee80211s_hdr *meshhdr,
545 const char *addr4or5, const char *addr6) 612 const char *addr4or5, const char *addr6)
546{ 613{
547 if (WARN_ON(!addr4or5 && addr6)) 614 if (WARN_ON(!addr4or5 && addr6))
548 return 0; 615 return 0;
@@ -637,6 +704,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
637 2 + ifmsh->mesh_id_len + 704 2 + ifmsh->mesh_id_len +
638 2 + sizeof(struct ieee80211_meshconf_ie) + 705 2 + sizeof(struct ieee80211_meshconf_ie) +
639 2 + sizeof(__le16) + /* awake window */ 706 2 + sizeof(__le16) + /* awake window */
707 2 + sizeof(struct ieee80211_vht_cap) +
708 2 + sizeof(struct ieee80211_vht_operation) +
640 ifmsh->ie_len; 709 ifmsh->ie_len;
641 710
642 bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL); 711 bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
@@ -718,6 +787,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
718 mesh_add_meshid_ie(sdata, skb) || 787 mesh_add_meshid_ie(sdata, skb) ||
719 mesh_add_meshconf_ie(sdata, skb) || 788 mesh_add_meshconf_ie(sdata, skb) ||
720 mesh_add_awake_window_ie(sdata, skb) || 789 mesh_add_awake_window_ie(sdata, skb) ||
790 mesh_add_vht_cap_ie(sdata, skb) ||
791 mesh_add_vht_oper_ie(sdata, skb) ||
721 mesh_add_vendor_ies(sdata, skb)) 792 mesh_add_vendor_ies(sdata, skb))
722 goto out_free; 793 goto out_free;
723 794
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 50c8473cf9dc..a1596344c3ba 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -207,9 +207,9 @@ struct mesh_rmc {
207/* Various */ 207/* Various */
208int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, 208int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
209 const u8 *da, const u8 *sa); 209 const u8 *da, const u8 *sa);
210int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, 210unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
211 struct ieee80211s_hdr *meshhdr, 211 struct ieee80211s_hdr *meshhdr,
212 const char *addr4or5, const char *addr6); 212 const char *addr4or5, const char *addr6);
213int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, 213int mesh_rmc_check(struct ieee80211_sub_if_data *sdata,
214 const u8 *addr, struct ieee80211s_hdr *mesh_hdr); 214 const u8 *addr, struct ieee80211s_hdr *mesh_hdr);
215bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, 215bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
@@ -227,6 +227,10 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
227 struct sk_buff *skb); 227 struct sk_buff *skb);
228int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, 228int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
229 struct sk_buff *skb); 229 struct sk_buff *skb);
230int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
231 struct sk_buff *skb);
232int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
233 struct sk_buff *skb);
230void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); 234void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
231int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); 235int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
232void ieee80211s_init(void); 236void ieee80211s_init(void);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d80e0a4c16cf..c6be0b4f4058 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -329,7 +329,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
329 if (sta->mesh->fail_avg >= 100) 329 if (sta->mesh->fail_avg >= 100)
330 return MAX_METRIC; 330 return MAX_METRIC;
331 331
332 sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo); 332 sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
333 rate = cfg80211_calculate_bitrate(&rinfo); 333 rate = cfg80211_calculate_bitrate(&rinfo);
334 if (WARN_ON(!rate)) 334 if (WARN_ON(!rate))
335 return MAX_METRIC; 335 return MAX_METRIC;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 58384642e03c..bd3d55eb21d4 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -60,7 +60,9 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
60{ 60{
61 s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold; 61 s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold;
62 return rssi_threshold == 0 || 62 return rssi_threshold == 0 ||
63 (sta && (s8) -ewma_signal_read(&sta->avg_signal) > rssi_threshold); 63 (sta &&
64 (s8)-ewma_signal_read(&sta->rx_stats.avg_signal) >
65 rssi_threshold);
64} 66}
65 67
66/** 68/**
@@ -226,6 +228,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
226 2 + sizeof(struct ieee80211_meshconf_ie) + 228 2 + sizeof(struct ieee80211_meshconf_ie) +
227 2 + sizeof(struct ieee80211_ht_cap) + 229 2 + sizeof(struct ieee80211_ht_cap) +
228 2 + sizeof(struct ieee80211_ht_operation) + 230 2 + sizeof(struct ieee80211_ht_operation) +
231 2 + sizeof(struct ieee80211_vht_cap) +
232 2 + sizeof(struct ieee80211_vht_operation) +
229 2 + 8 + /* peering IE */ 233 2 + 8 + /* peering IE */
230 sdata->u.mesh.ie_len); 234 sdata->u.mesh.ie_len);
231 if (!skb) 235 if (!skb)
@@ -306,7 +310,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
306 310
307 if (action != WLAN_SP_MESH_PEERING_CLOSE) { 311 if (action != WLAN_SP_MESH_PEERING_CLOSE) {
308 if (mesh_add_ht_cap_ie(sdata, skb) || 312 if (mesh_add_ht_cap_ie(sdata, skb) ||
309 mesh_add_ht_oper_ie(sdata, skb)) 313 mesh_add_ht_oper_ie(sdata, skb) ||
314 mesh_add_vht_cap_ie(sdata, skb) ||
315 mesh_add_vht_oper_ie(sdata, skb))
310 goto free; 316 goto free;
311 } 317 }
312 318
@@ -386,7 +392,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
386 rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates); 392 rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
387 393
388 spin_lock_bh(&sta->mesh->plink_lock); 394 spin_lock_bh(&sta->mesh->plink_lock);
389 sta->last_rx = jiffies; 395 sta->rx_stats.last_rx = jiffies;
390 396
391 /* rates and capabilities don't change during peering */ 397 /* rates and capabilities don't change during peering */
392 if (sta->mesh->plink_state == NL80211_PLINK_ESTAB && 398 if (sta->mesh->plink_state == NL80211_PLINK_ESTAB &&
@@ -402,6 +408,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
402 elems->ht_cap_elem, sta)) 408 elems->ht_cap_elem, sta))
403 changed |= IEEE80211_RC_BW_CHANGED; 409 changed |= IEEE80211_RC_BW_CHANGED;
404 410
411 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
412 elems->vht_cap_elem, sta);
413
405 if (bw != sta->sta.bandwidth) 414 if (bw != sta->sta.bandwidth)
406 changed |= IEEE80211_RC_BW_CHANGED; 415 changed |= IEEE80211_RC_BW_CHANGED;
407 416
@@ -677,6 +686,9 @@ static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
677 686
678 rcu_read_lock(); 687 rcu_read_lock();
679 list_for_each_entry_rcu(sta, &local->sta_list, list) { 688 list_for_each_entry_rcu(sta, &local->sta_list, list) {
689 if (sdata != sta->sdata)
690 continue;
691
680 if (!memcmp(&sta->mesh->llid, &llid, sizeof(llid))) { 692 if (!memcmp(&sta->mesh->llid, &llid, sizeof(llid))) {
681 in_use = true; 693 in_use = true;
682 break; 694 break;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cd7e55e08a23..b140cc6651f4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -20,7 +20,6 @@
20#include <linux/etherdevice.h> 20#include <linux/etherdevice.h>
21#include <linux/moduleparam.h> 21#include <linux/moduleparam.h>
22#include <linux/rtnetlink.h> 22#include <linux/rtnetlink.h>
23#include <linux/pm_qos.h>
24#include <linux/crc32.h> 23#include <linux/crc32.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
26#include <linux/export.h> 25#include <linux/export.h>
@@ -82,13 +81,6 @@ MODULE_PARM_DESC(probe_wait_ms,
82 " before disconnecting (reason 4)."); 81 " before disconnecting (reason 4).");
83 82
84/* 83/*
85 * Weight given to the latest Beacon frame when calculating average signal
86 * strength for Beacon frames received in the current BSS. This must be
87 * between 1 and 15.
88 */
89#define IEEE80211_SIGNAL_AVE_WEIGHT 3
90
91/*
92 * How many Beacon frames need to have been used in average signal strength 84 * How many Beacon frames need to have been used in average signal strength
93 * before starting to indicate signal change events. 85 * before starting to indicate signal change events.
94 */ 86 */
@@ -943,7 +935,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
943 935
944void ieee80211_send_nullfunc(struct ieee80211_local *local, 936void ieee80211_send_nullfunc(struct ieee80211_local *local,
945 struct ieee80211_sub_if_data *sdata, 937 struct ieee80211_sub_if_data *sdata,
946 int powersave) 938 bool powersave)
947{ 939{
948 struct sk_buff *skb; 940 struct sk_buff *skb;
949 struct ieee80211_hdr_3addr *nullfunc; 941 struct ieee80211_hdr_3addr *nullfunc;
@@ -1427,7 +1419,7 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
1427 msecs_to_jiffies(conf->dynamic_ps_timeout)); 1419 msecs_to_jiffies(conf->dynamic_ps_timeout));
1428 } else { 1420 } else {
1429 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) 1421 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
1430 ieee80211_send_nullfunc(local, sdata, 1); 1422 ieee80211_send_nullfunc(local, sdata, true);
1431 1423
1432 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) && 1424 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
1433 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) 1425 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
@@ -1483,7 +1475,7 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata)
1483} 1475}
1484 1476
1485/* need to hold RTNL or interface lock */ 1477/* need to hold RTNL or interface lock */
1486void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) 1478void ieee80211_recalc_ps(struct ieee80211_local *local)
1487{ 1479{
1488 struct ieee80211_sub_if_data *sdata, *found = NULL; 1480 struct ieee80211_sub_if_data *sdata, *found = NULL;
1489 int count = 0; 1481 int count = 0;
@@ -1512,48 +1504,23 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
1512 } 1504 }
1513 1505
1514 if (count == 1 && ieee80211_powersave_allowed(found)) { 1506 if (count == 1 && ieee80211_powersave_allowed(found)) {
1507 u8 dtimper = found->u.mgd.dtim_period;
1515 s32 beaconint_us; 1508 s32 beaconint_us;
1516 1509
1517 if (latency < 0)
1518 latency = pm_qos_request(PM_QOS_NETWORK_LATENCY);
1519
1520 beaconint_us = ieee80211_tu_to_usec( 1510 beaconint_us = ieee80211_tu_to_usec(
1521 found->vif.bss_conf.beacon_int); 1511 found->vif.bss_conf.beacon_int);
1522 1512
1523 timeout = local->dynamic_ps_forced_timeout; 1513 timeout = local->dynamic_ps_forced_timeout;
1524 if (timeout < 0) { 1514 if (timeout < 0)
1525 /* 1515 timeout = 100;
1526 * Go to full PSM if the user configures a very low
1527 * latency requirement.
1528 * The 2000 second value is there for compatibility
1529 * until the PM_QOS_NETWORK_LATENCY is configured
1530 * with real values.
1531 */
1532 if (latency > (1900 * USEC_PER_MSEC) &&
1533 latency != (2000 * USEC_PER_SEC))
1534 timeout = 0;
1535 else
1536 timeout = 100;
1537 }
1538 local->hw.conf.dynamic_ps_timeout = timeout; 1516 local->hw.conf.dynamic_ps_timeout = timeout;
1539 1517
1540 if (beaconint_us > latency) { 1518 /* If the TIM IE is invalid, pretend the value is 1 */
1541 local->ps_sdata = NULL; 1519 if (!dtimper)
1542 } else { 1520 dtimper = 1;
1543 int maxslp = 1; 1521
1544 u8 dtimper = found->u.mgd.dtim_period; 1522 local->hw.conf.ps_dtim_period = dtimper;
1545 1523 local->ps_sdata = found;
1546 /* If the TIM IE is invalid, pretend the value is 1 */
1547 if (!dtimper)
1548 dtimper = 1;
1549 else if (dtimper > 1)
1550 maxslp = min_t(int, dtimper,
1551 latency / beaconint_us);
1552
1553 local->hw.conf.max_sleep_period = maxslp;
1554 local->hw.conf.ps_dtim_period = dtimper;
1555 local->ps_sdata = found;
1556 }
1557 } else { 1524 } else {
1558 local->ps_sdata = NULL; 1525 local->ps_sdata = NULL;
1559 } 1526 }
@@ -1642,7 +1609,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
1642 msecs_to_jiffies( 1609 msecs_to_jiffies(
1643 local->hw.conf.dynamic_ps_timeout)); 1610 local->hw.conf.dynamic_ps_timeout));
1644 } else { 1611 } else {
1645 ieee80211_send_nullfunc(local, sdata, 1); 1612 ieee80211_send_nullfunc(local, sdata, true);
1646 /* Flush to get the tx status of nullfunc frame */ 1613 /* Flush to get the tx status of nullfunc frame */
1647 ieee80211_flush_queues(local, sdata, false); 1614 ieee80211_flush_queues(local, sdata, false);
1648 } 1615 }
@@ -1777,10 +1744,10 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
1777 struct ieee80211_sub_if_data *sdata, 1744 struct ieee80211_sub_if_data *sdata,
1778 const u8 *wmm_param, size_t wmm_param_len) 1745 const u8 *wmm_param, size_t wmm_param_len)
1779{ 1746{
1780 struct ieee80211_tx_queue_params params; 1747 struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS];
1781 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1748 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1782 size_t left; 1749 size_t left;
1783 int count; 1750 int count, ac;
1784 const u8 *pos; 1751 const u8 *pos;
1785 u8 uapsd_queues = 0; 1752 u8 uapsd_queues = 0;
1786 1753
@@ -1814,25 +1781,24 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
1814 int aci = (pos[0] >> 5) & 0x03; 1781 int aci = (pos[0] >> 5) & 0x03;
1815 int acm = (pos[0] >> 4) & 0x01; 1782 int acm = (pos[0] >> 4) & 0x01;
1816 bool uapsd = false; 1783 bool uapsd = false;
1817 int queue;
1818 1784
1819 switch (aci) { 1785 switch (aci) {
1820 case 1: /* AC_BK */ 1786 case 1: /* AC_BK */
1821 queue = 3; 1787 ac = IEEE80211_AC_BK;
1822 if (acm) 1788 if (acm)
1823 sdata->wmm_acm |= BIT(1) | BIT(2); /* BK/- */ 1789 sdata->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
1824 if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK) 1790 if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
1825 uapsd = true; 1791 uapsd = true;
1826 break; 1792 break;
1827 case 2: /* AC_VI */ 1793 case 2: /* AC_VI */
1828 queue = 1; 1794 ac = IEEE80211_AC_VI;
1829 if (acm) 1795 if (acm)
1830 sdata->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */ 1796 sdata->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
1831 if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI) 1797 if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
1832 uapsd = true; 1798 uapsd = true;
1833 break; 1799 break;
1834 case 3: /* AC_VO */ 1800 case 3: /* AC_VO */
1835 queue = 0; 1801 ac = IEEE80211_AC_VO;
1836 if (acm) 1802 if (acm)
1837 sdata->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */ 1803 sdata->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
1838 if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO) 1804 if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
@@ -1840,7 +1806,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
1840 break; 1806 break;
1841 case 0: /* AC_BE */ 1807 case 0: /* AC_BE */
1842 default: 1808 default:
1843 queue = 2; 1809 ac = IEEE80211_AC_BE;
1844 if (acm) 1810 if (acm)
1845 sdata->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */ 1811 sdata->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
1846 if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE) 1812 if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
@@ -1848,25 +1814,41 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
1848 break; 1814 break;
1849 } 1815 }
1850 1816
1851 params.aifs = pos[0] & 0x0f; 1817 params[ac].aifs = pos[0] & 0x0f;
1852 params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); 1818
1853 params.cw_min = ecw2cw(pos[1] & 0x0f); 1819 if (params[ac].aifs < 2) {
1854 params.txop = get_unaligned_le16(pos + 2); 1820 sdata_info(sdata,
1855 params.acm = acm; 1821 "AP has invalid WMM params (AIFSN=%d for ACI %d), will use 2\n",
1856 params.uapsd = uapsd; 1822 params[ac].aifs, aci);
1823 params[ac].aifs = 2;
1824 }
1825 params[ac].cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
1826 params[ac].cw_min = ecw2cw(pos[1] & 0x0f);
1827 params[ac].txop = get_unaligned_le16(pos + 2);
1828 params[ac].acm = acm;
1829 params[ac].uapsd = uapsd;
1857 1830
1831 if (params[ac].cw_min > params[ac].cw_max) {
1832 sdata_info(sdata,
1833 "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n",
1834 params[ac].cw_min, params[ac].cw_max, aci);
1835 return false;
1836 }
1837 }
1838
1839 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
1858 mlme_dbg(sdata, 1840 mlme_dbg(sdata,
1859 "WMM queue=%d aci=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n", 1841 "WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n",
1860 queue, aci, acm, 1842 ac, params[ac].acm,
1861 params.aifs, params.cw_min, params.cw_max, 1843 params[ac].aifs, params[ac].cw_min, params[ac].cw_max,
1862 params.txop, params.uapsd, 1844 params[ac].txop, params[ac].uapsd,
1863 ifmgd->tx_tspec[queue].downgraded); 1845 ifmgd->tx_tspec[ac].downgraded);
1864 sdata->tx_conf[queue] = params; 1846 sdata->tx_conf[ac] = params[ac];
1865 if (!ifmgd->tx_tspec[queue].downgraded && 1847 if (!ifmgd->tx_tspec[ac].downgraded &&
1866 drv_conf_tx(local, sdata, queue, &params)) 1848 drv_conf_tx(local, sdata, ac, &params[ac]))
1867 sdata_err(sdata, 1849 sdata_err(sdata,
1868 "failed to set TX queue parameters for queue %d\n", 1850 "failed to set TX queue parameters for AC %d\n",
1869 queue); 1851 ac);
1870 } 1852 }
1871 1853
1872 /* enable WMM or activate new settings */ 1854 /* enable WMM or activate new settings */
@@ -2004,7 +1986,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
2004 ieee80211_bss_info_change_notify(sdata, bss_info_changed); 1986 ieee80211_bss_info_change_notify(sdata, bss_info_changed);
2005 1987
2006 mutex_lock(&local->iflist_mtx); 1988 mutex_lock(&local->iflist_mtx);
2007 ieee80211_recalc_ps(local, -1); 1989 ieee80211_recalc_ps(local);
2008 mutex_unlock(&local->iflist_mtx); 1990 mutex_unlock(&local->iflist_mtx);
2009 1991
2010 ieee80211_recalc_smps(sdata); 1992 ieee80211_recalc_smps(sdata);
@@ -2110,7 +2092,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
2110 ieee80211_bss_info_change_notify(sdata, changed); 2092 ieee80211_bss_info_change_notify(sdata, changed);
2111 2093
2112 /* disassociated - set to defaults now */ 2094 /* disassociated - set to defaults now */
2113 ieee80211_set_wmm_default(sdata, false); 2095 ieee80211_set_wmm_default(sdata, false, false);
2114 2096
2115 del_timer_sync(&sdata->u.mgd.conn_mon_timer); 2097 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
2116 del_timer_sync(&sdata->u.mgd.bcn_mon_timer); 2098 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
@@ -2172,7 +2154,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
2172 __ieee80211_stop_poll(sdata); 2154 __ieee80211_stop_poll(sdata);
2173 2155
2174 mutex_lock(&local->iflist_mtx); 2156 mutex_lock(&local->iflist_mtx);
2175 ieee80211_recalc_ps(local, -1); 2157 ieee80211_recalc_ps(local);
2176 mutex_unlock(&local->iflist_mtx); 2158 mutex_unlock(&local->iflist_mtx);
2177 2159
2178 if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) 2160 if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
@@ -2275,7 +2257,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
2275 2257
2276 if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { 2258 if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
2277 ifmgd->nullfunc_failed = false; 2259 ifmgd->nullfunc_failed = false;
2278 ieee80211_send_nullfunc(sdata->local, sdata, 0); 2260 ieee80211_send_nullfunc(sdata->local, sdata, false);
2279 } else { 2261 } else {
2280 int ssid_len; 2262 int ssid_len;
2281 2263
@@ -2348,7 +2330,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
2348 goto out; 2330 goto out;
2349 2331
2350 mutex_lock(&sdata->local->iflist_mtx); 2332 mutex_lock(&sdata->local->iflist_mtx);
2351 ieee80211_recalc_ps(sdata->local, -1); 2333 ieee80211_recalc_ps(sdata->local);
2352 mutex_unlock(&sdata->local->iflist_mtx); 2334 mutex_unlock(&sdata->local->iflist_mtx);
2353 2335
2354 ifmgd->probe_send_count = 0; 2336 ifmgd->probe_send_count = 0;
@@ -2453,15 +2435,9 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
2453 container_of(work, struct ieee80211_sub_if_data, 2435 container_of(work, struct ieee80211_sub_if_data,
2454 u.mgd.beacon_connection_loss_work); 2436 u.mgd.beacon_connection_loss_work);
2455 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2437 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2456 struct sta_info *sta;
2457 2438
2458 if (ifmgd->associated) { 2439 if (ifmgd->associated)
2459 rcu_read_lock(); 2440 ifmgd->beacon_loss_count++;
2460 sta = sta_info_get(sdata, ifmgd->bssid);
2461 if (sta)
2462 sta->beacon_loss_count++;
2463 rcu_read_unlock();
2464 }
2465 2441
2466 if (ifmgd->connection_loss) { 2442 if (ifmgd->connection_loss) {
2467 sdata_info(sdata, "Connection to AP %pM lost\n", 2443 sdata_info(sdata, "Connection to AP %pM lost\n",
@@ -3051,8 +3027,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
3051 3027
3052 rate_control_rate_init(sta); 3028 rate_control_rate_init(sta);
3053 3029
3054 if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) 3030 if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) {
3055 set_sta_flag(sta, WLAN_STA_MFP); 3031 set_sta_flag(sta, WLAN_STA_MFP);
3032 sta->sta.mfp = true;
3033 } else {
3034 sta->sta.mfp = false;
3035 }
3056 3036
3057 sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS; 3037 sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS;
3058 3038
@@ -3079,11 +3059,21 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
3079 */ 3059 */
3080 ifmgd->wmm_last_param_set = -1; 3060 ifmgd->wmm_last_param_set = -1;
3081 3061
3082 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) && elems.wmm_param) 3062 if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
3083 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, 3063 ieee80211_set_wmm_default(sdata, false, false);
3084 elems.wmm_param_len); 3064 } else if (!ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
3085 else 3065 elems.wmm_param_len)) {
3086 ieee80211_set_wmm_default(sdata, false); 3066 /* still enable QoS since we might have HT/VHT */
3067 ieee80211_set_wmm_default(sdata, false, true);
3068 /* set the disable-WMM flag in this case to disable
3069 * tracking WMM parameter changes in the beacon if
3070 * the parameters weren't actually valid. Doing so
3071 * avoids changing parameters very strangely when
3072 * the AP is going back and forth between valid and
3073 * invalid parameters.
3074 */
3075 ifmgd->flags |= IEEE80211_STA_DISABLE_WMM;
3076 }
3087 changed |= BSS_CHANGED_QOS; 3077 changed |= BSS_CHANGED_QOS;
3088 3078
3089 /* set AID and assoc capability, 3079 /* set AID and assoc capability,
@@ -3262,16 +3252,6 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
3262 if (ifmgd->associated && 3252 if (ifmgd->associated &&
3263 ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) 3253 ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
3264 ieee80211_reset_ap_probe(sdata); 3254 ieee80211_reset_ap_probe(sdata);
3265
3266 if (ifmgd->auth_data && !ifmgd->auth_data->bss->proberesp_ies &&
3267 ether_addr_equal(mgmt->bssid, ifmgd->auth_data->bss->bssid)) {
3268 /* got probe response, continue with auth */
3269 sdata_info(sdata, "direct probe responded\n");
3270 ifmgd->auth_data->tries = 0;
3271 ifmgd->auth_data->timeout = jiffies;
3272 ifmgd->auth_data->timeout_started = true;
3273 run_again(sdata, ifmgd->auth_data->timeout);
3274 }
3275} 3255}
3276 3256
3277/* 3257/*
@@ -3374,24 +3354,21 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3374 bssid = ifmgd->associated->bssid; 3354 bssid = ifmgd->associated->bssid;
3375 3355
3376 /* Track average RSSI from the Beacon frames of the current AP */ 3356 /* Track average RSSI from the Beacon frames of the current AP */
3377 ifmgd->last_beacon_signal = rx_status->signal;
3378 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) { 3357 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
3379 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE; 3358 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
3380 ifmgd->ave_beacon_signal = rx_status->signal * 16; 3359 ewma_beacon_signal_init(&ifmgd->ave_beacon_signal);
3381 ifmgd->last_cqm_event_signal = 0; 3360 ifmgd->last_cqm_event_signal = 0;
3382 ifmgd->count_beacon_signal = 1; 3361 ifmgd->count_beacon_signal = 1;
3383 ifmgd->last_ave_beacon_signal = 0; 3362 ifmgd->last_ave_beacon_signal = 0;
3384 } else { 3363 } else {
3385 ifmgd->ave_beacon_signal =
3386 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
3387 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
3388 ifmgd->ave_beacon_signal) / 16;
3389 ifmgd->count_beacon_signal++; 3364 ifmgd->count_beacon_signal++;
3390 } 3365 }
3391 3366
3367 ewma_beacon_signal_add(&ifmgd->ave_beacon_signal, -rx_status->signal);
3368
3392 if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold && 3369 if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold &&
3393 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) { 3370 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
3394 int sig = ifmgd->ave_beacon_signal; 3371 int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
3395 int last_sig = ifmgd->last_ave_beacon_signal; 3372 int last_sig = ifmgd->last_ave_beacon_signal;
3396 struct ieee80211_event event = { 3373 struct ieee80211_event event = {
3397 .type = RSSI_EVENT, 3374 .type = RSSI_EVENT,
@@ -3418,10 +3395,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3418 if (bss_conf->cqm_rssi_thold && 3395 if (bss_conf->cqm_rssi_thold &&
3419 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT && 3396 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
3420 !(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) { 3397 !(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) {
3421 int sig = ifmgd->ave_beacon_signal / 16; 3398 int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
3422 int last_event = ifmgd->last_cqm_event_signal; 3399 int last_event = ifmgd->last_cqm_event_signal;
3423 int thold = bss_conf->cqm_rssi_thold; 3400 int thold = bss_conf->cqm_rssi_thold;
3424 int hyst = bss_conf->cqm_rssi_hyst; 3401 int hyst = bss_conf->cqm_rssi_hyst;
3402
3425 if (sig < thold && 3403 if (sig < thold &&
3426 (last_event == 0 || sig < last_event - hyst)) { 3404 (last_event == 0 || sig < last_event - hyst)) {
3427 ifmgd->last_cqm_event_signal = sig; 3405 ifmgd->last_cqm_event_signal = sig;
@@ -3456,31 +3434,27 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3456 len - baselen, false, &elems, 3434 len - baselen, false, &elems,
3457 care_about_ies, ncrc); 3435 care_about_ies, ncrc);
3458 3436
3459 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) { 3437 if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
3460 bool directed_tim = ieee80211_check_tim(elems.tim, 3438 ieee80211_check_tim(elems.tim, elems.tim_len, ifmgd->aid)) {
3461 elems.tim_len, 3439 if (local->hw.conf.dynamic_ps_timeout > 0) {
3462 ifmgd->aid); 3440 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
3463 if (directed_tim) { 3441 local->hw.conf.flags &= ~IEEE80211_CONF_PS;
3464 if (local->hw.conf.dynamic_ps_timeout > 0) { 3442 ieee80211_hw_config(local,
3465 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 3443 IEEE80211_CONF_CHANGE_PS);
3466 local->hw.conf.flags &= ~IEEE80211_CONF_PS;
3467 ieee80211_hw_config(local,
3468 IEEE80211_CONF_CHANGE_PS);
3469 }
3470 ieee80211_send_nullfunc(local, sdata, 0);
3471 } else if (!local->pspolling && sdata->u.mgd.powersave) {
3472 local->pspolling = true;
3473
3474 /*
3475 * Here is assumed that the driver will be
3476 * able to send ps-poll frame and receive a
3477 * response even though power save mode is
3478 * enabled, but some drivers might require
3479 * to disable power save here. This needs
3480 * to be investigated.
3481 */
3482 ieee80211_send_pspoll(local, sdata);
3483 } 3444 }
3445 ieee80211_send_nullfunc(local, sdata, false);
3446 } else if (!local->pspolling && sdata->u.mgd.powersave) {
3447 local->pspolling = true;
3448
3449 /*
3450 * Here is assumed that the driver will be
3451 * able to send ps-poll frame and receive a
3452 * response even though power save mode is
3453 * enabled, but some drivers might require
3454 * to disable power save here. This needs
3455 * to be investigated.
3456 */
3457 ieee80211_send_pspoll(local, sdata);
3484 } 3458 }
3485 } 3459 }
3486 3460
@@ -3567,7 +3541,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3567 ifmgd->have_beacon = true; 3541 ifmgd->have_beacon = true;
3568 3542
3569 mutex_lock(&local->iflist_mtx); 3543 mutex_lock(&local->iflist_mtx);
3570 ieee80211_recalc_ps(local, -1); 3544 ieee80211_recalc_ps(local);
3571 mutex_unlock(&local->iflist_mtx); 3545 mutex_unlock(&local->iflist_mtx);
3572 3546
3573 ieee80211_recalc_ps_vif(sdata); 3547 ieee80211_recalc_ps_vif(sdata);
@@ -3717,12 +3691,14 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
3717 reason); 3691 reason);
3718} 3692}
3719 3693
3720static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) 3694static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
3721{ 3695{
3722 struct ieee80211_local *local = sdata->local; 3696 struct ieee80211_local *local = sdata->local;
3723 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 3697 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3724 struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data; 3698 struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data;
3725 u32 tx_flags = 0; 3699 u32 tx_flags = 0;
3700 u16 trans = 1;
3701 u16 status = 0;
3726 3702
3727 sdata_assert_lock(sdata); 3703 sdata_assert_lock(sdata);
3728 3704
@@ -3746,54 +3722,27 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
3746 3722
3747 drv_mgd_prepare_tx(local, sdata); 3723 drv_mgd_prepare_tx(local, sdata);
3748 3724
3749 if (auth_data->bss->proberesp_ies) { 3725 sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
3750 u16 trans = 1; 3726 auth_data->bss->bssid, auth_data->tries,
3751 u16 status = 0; 3727 IEEE80211_AUTH_MAX_TRIES);
3752
3753 sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
3754 auth_data->bss->bssid, auth_data->tries,
3755 IEEE80211_AUTH_MAX_TRIES);
3756
3757 auth_data->expected_transaction = 2;
3758 3728
3759 if (auth_data->algorithm == WLAN_AUTH_SAE) { 3729 auth_data->expected_transaction = 2;
3760 trans = auth_data->sae_trans;
3761 status = auth_data->sae_status;
3762 auth_data->expected_transaction = trans;
3763 }
3764 3730
3765 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) 3731 if (auth_data->algorithm == WLAN_AUTH_SAE) {
3766 tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | 3732 trans = auth_data->sae_trans;
3767 IEEE80211_TX_INTFL_MLME_CONN_TX; 3733 status = auth_data->sae_status;
3768 3734 auth_data->expected_transaction = trans;
3769 ieee80211_send_auth(sdata, trans, auth_data->algorithm, status, 3735 }
3770 auth_data->data, auth_data->data_len,
3771 auth_data->bss->bssid,
3772 auth_data->bss->bssid, NULL, 0, 0,
3773 tx_flags);
3774 } else {
3775 const u8 *ssidie;
3776 3736
3777 sdata_info(sdata, "direct probe to %pM (try %d/%i)\n", 3737 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
3778 auth_data->bss->bssid, auth_data->tries, 3738 tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
3779 IEEE80211_AUTH_MAX_TRIES); 3739 IEEE80211_TX_INTFL_MLME_CONN_TX;
3780 3740
3781 rcu_read_lock(); 3741 ieee80211_send_auth(sdata, trans, auth_data->algorithm, status,
3782 ssidie = ieee80211_bss_get_ie(auth_data->bss, WLAN_EID_SSID); 3742 auth_data->data, auth_data->data_len,
3783 if (!ssidie) { 3743 auth_data->bss->bssid,
3784 rcu_read_unlock(); 3744 auth_data->bss->bssid, NULL, 0, 0,
3785 return -EINVAL; 3745 tx_flags);
3786 }
3787 /*
3788 * Direct probe is sent to broadcast address as some APs
3789 * will not answer to direct packet in unassociated state.
3790 */
3791 ieee80211_send_probe_req(sdata, sdata->vif.addr, NULL,
3792 ssidie + 2, ssidie[1],
3793 NULL, 0, (u32) -1, true, 0,
3794 auth_data->bss->channel, false);
3795 rcu_read_unlock();
3796 }
3797 3746
3798 if (tx_flags == 0) { 3747 if (tx_flags == 0) {
3799 auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; 3748 auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
@@ -3874,8 +3823,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3874 bool status_acked = ifmgd->status_acked; 3823 bool status_acked = ifmgd->status_acked;
3875 3824
3876 ifmgd->status_received = false; 3825 ifmgd->status_received = false;
3877 if (ifmgd->auth_data && 3826 if (ifmgd->auth_data && ieee80211_is_auth(fc)) {
3878 (ieee80211_is_probe_req(fc) || ieee80211_is_auth(fc))) {
3879 if (status_acked) { 3827 if (status_acked) {
3880 ifmgd->auth_data->timeout = 3828 ifmgd->auth_data->timeout =
3881 jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; 3829 jiffies + IEEE80211_AUTH_TIMEOUT_SHORT;
@@ -3906,7 +3854,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3906 * so let's just kill the auth data 3854 * so let's just kill the auth data
3907 */ 3855 */
3908 ieee80211_destroy_auth_data(sdata, false); 3856 ieee80211_destroy_auth_data(sdata, false);
3909 } else if (ieee80211_probe_auth(sdata)) { 3857 } else if (ieee80211_auth(sdata)) {
3910 u8 bssid[ETH_ALEN]; 3858 u8 bssid[ETH_ALEN];
3911 struct ieee80211_event event = { 3859 struct ieee80211_event event = {
3912 .type = MLME_EVENT, 3860 .type = MLME_EVENT,
@@ -4197,21 +4145,6 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
4197 rcu_read_unlock(); 4145 rcu_read_unlock();
4198} 4146}
4199 4147
4200int ieee80211_max_network_latency(struct notifier_block *nb,
4201 unsigned long data, void *dummy)
4202{
4203 s32 latency_usec = (s32) data;
4204 struct ieee80211_local *local =
4205 container_of(nb, struct ieee80211_local,
4206 network_latency_notifier);
4207
4208 mutex_lock(&local->iflist_mtx);
4209 ieee80211_recalc_ps(local, latency_usec);
4210 mutex_unlock(&local->iflist_mtx);
4211
4212 return NOTIFY_OK;
4213}
4214
4215static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, 4148static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
4216 struct cfg80211_bss *cbss) 4149 struct cfg80211_bss *cbss)
4217{ 4150{
@@ -4613,7 +4546,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4613 if (err) 4546 if (err)
4614 goto err_clear; 4547 goto err_clear;
4615 4548
4616 err = ieee80211_probe_auth(sdata); 4549 err = ieee80211_auth(sdata);
4617 if (err) { 4550 if (err) {
4618 sta_info_destroy_addr(sdata, req->bss->bssid); 4551 sta_info_destroy_addr(sdata, req->bss->bssid);
4619 goto err_clear; 4552 goto err_clear;
@@ -4635,44 +4568,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4635 return err; 4568 return err;
4636} 4569}
4637 4570
4638static bool ieee80211_usable_wmm_params(struct ieee80211_sub_if_data *sdata,
4639 const u8 *wmm_param, int len)
4640{
4641 const u8 *pos;
4642 size_t left;
4643
4644 if (len < 8)
4645 return false;
4646
4647 if (wmm_param[5] != 1 /* version */)
4648 return false;
4649
4650 pos = wmm_param + 8;
4651 left = len - 8;
4652
4653 for (; left >= 4; left -= 4, pos += 4) {
4654 u8 aifsn = pos[0] & 0x0f;
4655 u8 ecwmin = pos[1] & 0x0f;
4656 u8 ecwmax = (pos[1] & 0xf0) >> 4;
4657 int aci = (pos[0] >> 5) & 0x03;
4658
4659 if (aifsn < 2) {
4660 sdata_info(sdata,
4661 "AP has invalid WMM params (AIFSN=%d for ACI %d), disabling WMM\n",
4662 aifsn, aci);
4663 return false;
4664 }
4665 if (ecwmin > ecwmax) {
4666 sdata_info(sdata,
4667 "AP has invalid WMM params (ECWmin/max=%d/%d for ACI %d), disabling WMM\n",
4668 ecwmin, ecwmax, aci);
4669 return false;
4670 }
4671 }
4672
4673 return true;
4674}
4675
4676int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, 4571int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4677 struct cfg80211_assoc_request *req) 4572 struct cfg80211_assoc_request *req)
4678{ 4573{
@@ -4737,39 +4632,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4737 4632
4738 assoc_data->wmm = bss->wmm_used && 4633 assoc_data->wmm = bss->wmm_used &&
4739 (local->hw.queues >= IEEE80211_NUM_ACS); 4634 (local->hw.queues >= IEEE80211_NUM_ACS);
4740 if (assoc_data->wmm) {
4741 /* try to check validity of WMM params IE */
4742 const struct cfg80211_bss_ies *ies;
4743 const u8 *wp, *start, *end;
4744
4745 rcu_read_lock();
4746 ies = rcu_dereference(req->bss->ies);
4747 start = ies->data;
4748 end = start + ies->len;
4749
4750 while (true) {
4751 wp = cfg80211_find_vendor_ie(
4752 WLAN_OUI_MICROSOFT,
4753 WLAN_OUI_TYPE_MICROSOFT_WMM,
4754 start, end - start);
4755 if (!wp)
4756 break;
4757 start = wp + wp[1] + 2;
4758 /* if this IE is too short, try the next */
4759 if (wp[1] <= 4)
4760 continue;
4761 /* if this IE is WMM params, we found what we wanted */
4762 if (wp[6] == 1)
4763 break;
4764 }
4765
4766 if (!wp || !ieee80211_usable_wmm_params(sdata, wp + 2,
4767 wp[1] - 2)) {
4768 assoc_data->wmm = false;
4769 ifmgd->flags |= IEEE80211_STA_DISABLE_WMM;
4770 }
4771 rcu_read_unlock();
4772 }
4773 4635
4774 /* 4636 /*
4775 * IEEE802.11n does not allow TKIP/WEP as pairwise ciphers in HT mode. 4637 * IEEE802.11n does not allow TKIP/WEP as pairwise ciphers in HT mode.
@@ -5028,6 +4890,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
5028 return 0; 4890 return 0;
5029 } 4891 }
5030 4892
4893 if (ifmgd->assoc_data &&
4894 ether_addr_equal(ifmgd->assoc_data->bss->bssid, req->bssid)) {
4895 sdata_info(sdata,
4896 "aborting association with %pM by local choice (Reason: %u=%s)\n",
4897 req->bssid, req->reason_code,
4898 ieee80211_get_reason_code_string(req->reason_code));
4899
4900 drv_mgd_prepare_tx(sdata->local, sdata);
4901 ieee80211_send_deauth_disassoc(sdata, req->bssid,
4902 IEEE80211_STYPE_DEAUTH,
4903 req->reason_code, tx,
4904 frame_buf);
4905 ieee80211_destroy_assoc_data(sdata, false);
4906 ieee80211_report_disconnect(sdata, frame_buf,
4907 sizeof(frame_buf), true,
4908 req->reason_code);
4909 return 0;
4910 }
4911
5031 if (ifmgd->associated && 4912 if (ifmgd->associated &&
5032 ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { 4913 ether_addr_equal(ifmgd->associated->bssid, req->bssid)) {
5033 sdata_info(sdata, 4914 sdata_info(sdata,
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 573b81a1fb2d..0be0aadfc559 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -75,7 +75,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
75 if (!sta) 75 if (!sta)
76 return; 76 return;
77 77
78 sta->last_rx = jiffies; 78 sta->rx_stats.last_rx = jiffies;
79 79
80 /* Add only mandatory rates for now */ 80 /* Add only mandatory rates for now */
81 sband = local->hw.wiphy->bands[band]; 81 sband = local->hw.wiphy->bands[band];
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f2c75cf491fc..04401037140e 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -57,7 +57,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
57 * to send a new nullfunc frame to inform the AP that we 57 * to send a new nullfunc frame to inform the AP that we
58 * are again sleeping. 58 * are again sleeping.
59 */ 59 */
60 ieee80211_send_nullfunc(local, sdata, 1); 60 ieee80211_send_nullfunc(local, sdata, true);
61} 61}
62 62
63/* inform AP that we are awake again, unless power save is enabled */ 63/* inform AP that we are awake again, unless power save is enabled */
@@ -66,7 +66,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
66 struct ieee80211_local *local = sdata->local; 66 struct ieee80211_local *local = sdata->local;
67 67
68 if (!local->ps_sdata) 68 if (!local->ps_sdata)
69 ieee80211_send_nullfunc(local, sdata, 0); 69 ieee80211_send_nullfunc(local, sdata, false);
70 else if (local->offchannel_ps_enabled) { 70 else if (local->offchannel_ps_enabled) {
71 /* 71 /*
72 * In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware 72 * In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware
@@ -93,7 +93,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
93 * restart the timer now and send a nullfunc frame to inform 93 * restart the timer now and send a nullfunc frame to inform
94 * the AP that we are awake. 94 * the AP that we are awake.
95 */ 95 */
96 ieee80211_send_nullfunc(local, sdata, 0); 96 ieee80211_send_nullfunc(local, sdata, false);
97 mod_timer(&local->dynamic_ps_timer, jiffies + 97 mod_timer(&local->dynamic_ps_timer, jiffies +
98 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); 98 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
99 } 99 }
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index b676b9fa707b..00a43a70e1fc 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -6,6 +6,13 @@
6#include "driver-ops.h" 6#include "driver-ops.h"
7#include "led.h" 7#include "led.h"
8 8
9static void ieee80211_sched_scan_cancel(struct ieee80211_local *local)
10{
11 if (ieee80211_request_sched_scan_stop(local))
12 return;
13 cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
14}
15
9int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) 16int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
10{ 17{
11 struct ieee80211_local *local = hw_to_local(hw); 18 struct ieee80211_local *local = hw_to_local(hw);
@@ -23,7 +30,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
23 30
24 ieee80211_del_virtual_monitor(local); 31 ieee80211_del_virtual_monitor(local);
25 32
26 if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { 33 if (ieee80211_hw_check(hw, AMPDU_AGGREGATION) &&
34 !(wowlan && wowlan->any)) {
27 mutex_lock(&local->sta_mtx); 35 mutex_lock(&local->sta_mtx);
28 list_for_each_entry(sta, &local->sta_list, list) { 36 list_for_each_entry(sta, &local->sta_list, list) {
29 set_sta_flag(sta, WLAN_STA_BLOCK_BA); 37 set_sta_flag(sta, WLAN_STA_BLOCK_BA);
@@ -33,6 +41,10 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
33 mutex_unlock(&local->sta_mtx); 41 mutex_unlock(&local->sta_mtx);
34 } 42 }
35 43
44 /* keep sched_scan only in case of 'any' trigger */
45 if (!(wowlan && wowlan->any))
46 ieee80211_sched_scan_cancel(local);
47
36 ieee80211_stop_queues_by_reason(hw, 48 ieee80211_stop_queues_by_reason(hw,
37 IEEE80211_MAX_QUEUE_MAP, 49 IEEE80211_MAX_QUEUE_MAP,
38 IEEE80211_QUEUE_STOP_REASON_SUSPEND, 50 IEEE80211_QUEUE_STOP_REASON_SUSPEND,
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 9ce8883d5f44..a4e2f4e67f94 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -305,7 +305,10 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
305 info->control.rates[0].idx = i; 305 info->control.rates[0].idx = i;
306 break; 306 break;
307 } 307 }
308 WARN_ON_ONCE(i == sband->n_bitrates); 308 WARN_ONCE(i == sband->n_bitrates,
309 "no supported rates (0x%x) in rate_mask 0x%x with flags 0x%x\n",
310 sta ? sta->supp_rates[sband->band] : -1,
311 rate_mask, rate_flags);
309 312
310 info->control.rates[0].count = 313 info->control.rates[0].count =
311 (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 314 (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 1db5f7c3318a..820b0abc9c0d 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -85,12 +85,10 @@ minstrel_stats_open(struct inode *inode, struct file *file)
85 file->private_data = ms; 85 file->private_data = ms;
86 p = ms->buf; 86 p = ms->buf;
87 p += sprintf(p, "\n"); 87 p += sprintf(p, "\n");
88 p += sprintf(p, "best __________rate_________ ______" 88 p += sprintf(p,
89 "statistics______ ________last_______ " 89 "best __________rate_________ ________statistics________ ________last_______ ______sum-of________\n");
90 "______sum-of________\n"); 90 p += sprintf(p,
91 p += sprintf(p, "rate [name idx airtime max_tp] [ ø(tp) ø(prob) " 91 "rate [name idx airtime max_tp] [avg(tp) avg(prob) sd(prob)] [prob.|retry|suc|att] [#success | #attempts]\n");
92 "sd(prob)] [prob.|retry|suc|att] "
93 "[#success | #attempts]\n");
94 92
95 for (i = 0; i < mi->n_rates; i++) { 93 for (i = 0; i < mi->n_rates; i++) {
96 struct minstrel_rate *mr = &mi->r[i]; 94 struct minstrel_rate *mr = &mi->r[i];
@@ -112,7 +110,7 @@ minstrel_stats_open(struct inode *inode, struct file *file)
112 prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); 110 prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
113 eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); 111 eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
114 112
115 p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u" 113 p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
116 " %3u.%1u %3u %3u %-3u " 114 " %3u.%1u %3u %3u %-3u "
117 "%9llu %-9llu\n", 115 "%9llu %-9llu\n",
118 tp_max / 10, tp_max % 10, 116 tp_max / 10, tp_max % 10,
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 6822ce0f95e5..5320e35ed3d0 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -86,7 +86,7 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
86 prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); 86 prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
87 eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); 87 eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
88 88
89 p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u" 89 p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
90 " %3u.%1u %3u %3u %-3u " 90 " %3u.%1u %3u %3u %-3u "
91 "%9llu %-9llu\n", 91 "%9llu %-9llu\n",
92 tp_max / 10, tp_max % 10, 92 tp_max / 10, tp_max % 10,
@@ -129,12 +129,10 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
129 p = ms->buf; 129 p = ms->buf;
130 130
131 p += sprintf(p, "\n"); 131 p += sprintf(p, "\n");
132 p += sprintf(p, " best ____________rate__________ " 132 p += sprintf(p,
133 "______statistics______ ________last_______ " 133 " best ____________rate__________ ________statistics________ ________last_______ ______sum-of________\n");
134 "______sum-of________\n"); 134 p += sprintf(p,
135 p += sprintf(p, "mode guard # rate [name idx airtime max_tp] " 135 "mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob) sd(prob)] [prob.|retry|suc|att] [#success | #attempts]\n");
136 "[ ø(tp) ø(prob) sd(prob)] [prob.|retry|suc|att] [#success | "
137 "#attempts]\n");
138 136
139 p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p); 137 p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p);
140 for (i = 0; i < MINSTREL_CCK_GROUP; i++) 138 for (i = 0; i < MINSTREL_CCK_GROUP; i++)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5bc0b88d9eb1..8bae5de0dc44 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1113,16 +1113,16 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
1113 is_multicast_ether_addr(hdr->addr1)) 1113 is_multicast_ether_addr(hdr->addr1))
1114 return RX_CONTINUE; 1114 return RX_CONTINUE;
1115 1115
1116 if (rx->sta) { 1116 if (!rx->sta)
1117 if (unlikely(ieee80211_has_retry(hdr->frame_control) && 1117 return RX_CONTINUE;
1118 rx->sta->last_seq_ctrl[rx->seqno_idx] == 1118
1119 hdr->seq_ctrl)) { 1119 if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
1120 I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); 1120 rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) {
1121 rx->sta->num_duplicates++; 1121 I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
1122 return RX_DROP_UNUSABLE; 1122 rx->sta->rx_stats.num_duplicates++;
1123 } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { 1123 return RX_DROP_UNUSABLE;
1124 rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; 1124 } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
1125 } 1125 rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
1126 } 1126 }
1127 1127
1128 return RX_CONTINUE; 1128 return RX_CONTINUE;
@@ -1396,51 +1396,56 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1396 NL80211_IFTYPE_ADHOC); 1396 NL80211_IFTYPE_ADHOC);
1397 if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && 1397 if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) &&
1398 test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { 1398 test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
1399 sta->last_rx = jiffies; 1399 sta->rx_stats.last_rx = jiffies;
1400 if (ieee80211_is_data(hdr->frame_control) && 1400 if (ieee80211_is_data(hdr->frame_control) &&
1401 !is_multicast_ether_addr(hdr->addr1)) { 1401 !is_multicast_ether_addr(hdr->addr1)) {
1402 sta->last_rx_rate_idx = status->rate_idx; 1402 sta->rx_stats.last_rate_idx =
1403 sta->last_rx_rate_flag = status->flag; 1403 status->rate_idx;
1404 sta->last_rx_rate_vht_flag = status->vht_flag; 1404 sta->rx_stats.last_rate_flag =
1405 sta->last_rx_rate_vht_nss = status->vht_nss; 1405 status->flag;
1406 sta->rx_stats.last_rate_vht_flag =
1407 status->vht_flag;
1408 sta->rx_stats.last_rate_vht_nss =
1409 status->vht_nss;
1406 } 1410 }
1407 } 1411 }
1408 } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { 1412 } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) {
1409 sta->last_rx = jiffies; 1413 sta->rx_stats.last_rx = jiffies;
1410 } else if (!is_multicast_ether_addr(hdr->addr1)) { 1414 } else if (!is_multicast_ether_addr(hdr->addr1)) {
1411 /* 1415 /*
1412 * Mesh beacons will update last_rx when if they are found to 1416 * Mesh beacons will update last_rx when if they are found to
1413 * match the current local configuration when processed. 1417 * match the current local configuration when processed.
1414 */ 1418 */
1415 sta->last_rx = jiffies; 1419 sta->rx_stats.last_rx = jiffies;
1416 if (ieee80211_is_data(hdr->frame_control)) { 1420 if (ieee80211_is_data(hdr->frame_control)) {
1417 sta->last_rx_rate_idx = status->rate_idx; 1421 sta->rx_stats.last_rate_idx = status->rate_idx;
1418 sta->last_rx_rate_flag = status->flag; 1422 sta->rx_stats.last_rate_flag = status->flag;
1419 sta->last_rx_rate_vht_flag = status->vht_flag; 1423 sta->rx_stats.last_rate_vht_flag = status->vht_flag;
1420 sta->last_rx_rate_vht_nss = status->vht_nss; 1424 sta->rx_stats.last_rate_vht_nss = status->vht_nss;
1421 } 1425 }
1422 } 1426 }
1423 1427
1424 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) 1428 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
1425 ieee80211_sta_rx_notify(rx->sdata, hdr); 1429 ieee80211_sta_rx_notify(rx->sdata, hdr);
1426 1430
1427 sta->rx_fragments++; 1431 sta->rx_stats.fragments++;
1428 sta->rx_bytes += rx->skb->len; 1432 sta->rx_stats.bytes += rx->skb->len;
1429 if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { 1433 if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
1430 sta->last_signal = status->signal; 1434 sta->rx_stats.last_signal = status->signal;
1431 ewma_signal_add(&sta->avg_signal, -status->signal); 1435 ewma_signal_add(&sta->rx_stats.avg_signal, -status->signal);
1432 } 1436 }
1433 1437
1434 if (status->chains) { 1438 if (status->chains) {
1435 sta->chains = status->chains; 1439 sta->rx_stats.chains = status->chains;
1436 for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { 1440 for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
1437 int signal = status->chain_signal[i]; 1441 int signal = status->chain_signal[i];
1438 1442
1439 if (!(status->chains & BIT(i))) 1443 if (!(status->chains & BIT(i)))
1440 continue; 1444 continue;
1441 1445
1442 sta->chain_signal_last[i] = signal; 1446 sta->rx_stats.chain_signal_last[i] = signal;
1443 ewma_signal_add(&sta->chain_signal_avg[i], -signal); 1447 ewma_signal_add(&sta->rx_stats.chain_signal_avg[i],
1448 -signal);
1444 } 1449 }
1445 } 1450 }
1446 1451
@@ -1500,7 +1505,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1500 * Update counter and free packet here to avoid 1505 * Update counter and free packet here to avoid
1501 * counting this as a dropped packed. 1506 * counting this as a dropped packed.
1502 */ 1507 */
1503 sta->rx_packets++; 1508 sta->rx_stats.packets++;
1504 dev_kfree_skb(rx->skb); 1509 dev_kfree_skb(rx->skb);
1505 return RX_QUEUED; 1510 return RX_QUEUED;
1506 } 1511 }
@@ -1922,7 +1927,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1922 ieee80211_led_rx(rx->local); 1927 ieee80211_led_rx(rx->local);
1923 out_no_led: 1928 out_no_led:
1924 if (rx->sta) 1929 if (rx->sta)
1925 rx->sta->rx_packets++; 1930 rx->sta->rx_stats.packets++;
1926 return RX_CONTINUE; 1931 return RX_CONTINUE;
1927} 1932}
1928 1933
@@ -2376,7 +2381,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
2376 * for non-QoS-data frames. Here we know it's a data 2381 * for non-QoS-data frames. Here we know it's a data
2377 * frame, so count MSDUs. 2382 * frame, so count MSDUs.
2378 */ 2383 */
2379 rx->sta->rx_msdu[rx->seqno_idx]++; 2384 rx->sta->rx_stats.msdu[rx->seqno_idx]++;
2380 } 2385 }
2381 2386
2382 /* 2387 /*
@@ -2413,7 +2418,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
2413 skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb); 2418 skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb);
2414 schedule_work(&local->tdls_chsw_work); 2419 schedule_work(&local->tdls_chsw_work);
2415 if (rx->sta) 2420 if (rx->sta)
2416 rx->sta->rx_packets++; 2421 rx->sta->rx_stats.packets++;
2417 2422
2418 return RX_QUEUED; 2423 return RX_QUEUED;
2419 } 2424 }
@@ -2875,7 +2880,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2875 2880
2876 handled: 2881 handled:
2877 if (rx->sta) 2882 if (rx->sta)
2878 rx->sta->rx_packets++; 2883 rx->sta->rx_stats.packets++;
2879 dev_kfree_skb(rx->skb); 2884 dev_kfree_skb(rx->skb);
2880 return RX_QUEUED; 2885 return RX_QUEUED;
2881 2886
@@ -2884,7 +2889,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2884 skb_queue_tail(&sdata->skb_queue, rx->skb); 2889 skb_queue_tail(&sdata->skb_queue, rx->skb);
2885 ieee80211_queue_work(&local->hw, &sdata->work); 2890 ieee80211_queue_work(&local->hw, &sdata->work);
2886 if (rx->sta) 2891 if (rx->sta)
2887 rx->sta->rx_packets++; 2892 rx->sta->rx_stats.packets++;
2888 return RX_QUEUED; 2893 return RX_QUEUED;
2889} 2894}
2890 2895
@@ -2911,7 +2916,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2911 if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, 2916 if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
2912 rx->skb->data, rx->skb->len, 0)) { 2917 rx->skb->data, rx->skb->len, 0)) {
2913 if (rx->sta) 2918 if (rx->sta)
2914 rx->sta->rx_packets++; 2919 rx->sta->rx_stats.packets++;
2915 dev_kfree_skb(rx->skb); 2920 dev_kfree_skb(rx->skb);
2916 return RX_QUEUED; 2921 return RX_QUEUED;
2917 } 2922 }
@@ -3030,7 +3035,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
3030 skb_queue_tail(&sdata->skb_queue, rx->skb); 3035 skb_queue_tail(&sdata->skb_queue, rx->skb);
3031 ieee80211_queue_work(&rx->local->hw, &sdata->work); 3036 ieee80211_queue_work(&rx->local->hw, &sdata->work);
3032 if (rx->sta) 3037 if (rx->sta)
3033 rx->sta->rx_packets++; 3038 rx->sta->rx_stats.packets++;
3034 3039
3035 return RX_QUEUED; 3040 return RX_QUEUED;
3036} 3041}
@@ -3112,7 +3117,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
3112 case RX_DROP_MONITOR: 3117 case RX_DROP_MONITOR:
3113 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); 3118 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
3114 if (rx->sta) 3119 if (rx->sta)
3115 rx->sta->rx_dropped++; 3120 rx->sta->rx_stats.dropped++;
3116 /* fall through */ 3121 /* fall through */
3117 case RX_CONTINUE: { 3122 case RX_CONTINUE: {
3118 struct ieee80211_rate *rate = NULL; 3123 struct ieee80211_rate *rate = NULL;
@@ -3132,7 +3137,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
3132 case RX_DROP_UNUSABLE: 3137 case RX_DROP_UNUSABLE:
3133 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); 3138 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
3134 if (rx->sta) 3139 if (rx->sta)
3135 rx->sta->rx_dropped++; 3140 rx->sta->rx_stats.dropped++;
3136 dev_kfree_skb(rx->skb); 3141 dev_kfree_skb(rx->skb);
3137 break; 3142 break;
3138 case RX_QUEUED: 3143 case RX_QUEUED:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 11d0901ebb7b..4aeca4b0c3cb 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -16,7 +16,6 @@
16#include <linux/if_arp.h> 16#include <linux/if_arp.h>
17#include <linux/etherdevice.h> 17#include <linux/etherdevice.h>
18#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
19#include <linux/pm_qos.h>
20#include <net/sch_generic.h> 19#include <net/sch_generic.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22#include <linux/export.h> 21#include <linux/export.h>
@@ -67,24 +66,23 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
67 struct cfg80211_bss *cbss; 66 struct cfg80211_bss *cbss;
68 struct ieee80211_bss *bss; 67 struct ieee80211_bss *bss;
69 int clen, srlen; 68 int clen, srlen;
70 enum nl80211_bss_scan_width scan_width; 69 struct cfg80211_inform_bss bss_meta = {};
71 s32 signal = 0;
72 bool signal_valid; 70 bool signal_valid;
73 71
74 if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) 72 if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
75 signal = rx_status->signal * 100; 73 bss_meta.signal = rx_status->signal * 100;
76 else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) 74 else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
77 signal = (rx_status->signal * 100) / local->hw.max_signal; 75 bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal;
78 76
79 scan_width = NL80211_BSS_CHAN_WIDTH_20; 77 bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20;
80 if (rx_status->flag & RX_FLAG_5MHZ) 78 if (rx_status->flag & RX_FLAG_5MHZ)
81 scan_width = NL80211_BSS_CHAN_WIDTH_5; 79 bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5;
82 if (rx_status->flag & RX_FLAG_10MHZ) 80 if (rx_status->flag & RX_FLAG_10MHZ)
83 scan_width = NL80211_BSS_CHAN_WIDTH_10; 81 bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
84 82
85 cbss = cfg80211_inform_bss_width_frame(local->hw.wiphy, channel, 83 bss_meta.chan = channel;
86 scan_width, mgmt, len, signal, 84 cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta,
87 GFP_ATOMIC); 85 mgmt, len, GFP_ATOMIC);
88 if (!cbss) 86 if (!cbss)
89 return NULL; 87 return NULL;
90 /* In case the signal is invalid update the status */ 88 /* In case the signal is invalid update the status */
@@ -1142,10 +1140,10 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
1142 return ret; 1140 return ret;
1143} 1141}
1144 1142
1145int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) 1143int ieee80211_request_sched_scan_stop(struct ieee80211_local *local)
1146{ 1144{
1147 struct ieee80211_local *local = sdata->local; 1145 struct ieee80211_sub_if_data *sched_scan_sdata;
1148 int ret = 0; 1146 int ret = -ENOENT;
1149 1147
1150 mutex_lock(&local->mtx); 1148 mutex_lock(&local->mtx);
1151 1149
@@ -1157,8 +1155,10 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
1157 /* We don't want to restart sched scan anymore. */ 1155 /* We don't want to restart sched scan anymore. */
1158 RCU_INIT_POINTER(local->sched_scan_req, NULL); 1156 RCU_INIT_POINTER(local->sched_scan_req, NULL);
1159 1157
1160 if (rcu_access_pointer(local->sched_scan_sdata)) { 1158 sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
1161 ret = drv_sched_scan_stop(local, sdata); 1159 lockdep_is_held(&local->mtx));
1160 if (sched_scan_sdata) {
1161 ret = drv_sched_scan_stop(local, sched_scan_sdata);
1162 if (!ret) 1162 if (!ret)
1163 RCU_INIT_POINTER(local->sched_scan_sdata, NULL); 1163 RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
1164 } 1164 }
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 64f1936350c6..f91d1873218c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -303,7 +303,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
303 struct ieee80211_local *local = sdata->local; 303 struct ieee80211_local *local = sdata->local;
304 struct ieee80211_hw *hw = &local->hw; 304 struct ieee80211_hw *hw = &local->hw;
305 struct sta_info *sta; 305 struct sta_info *sta;
306 struct timespec uptime;
307 int i; 306 int i;
308 307
309 sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp); 308 sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp);
@@ -332,18 +331,17 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
332 memcpy(sta->sta.addr, addr, ETH_ALEN); 331 memcpy(sta->sta.addr, addr, ETH_ALEN);
333 sta->local = local; 332 sta->local = local;
334 sta->sdata = sdata; 333 sta->sdata = sdata;
335 sta->last_rx = jiffies; 334 sta->rx_stats.last_rx = jiffies;
336 335
337 sta->sta_state = IEEE80211_STA_NONE; 336 sta->sta_state = IEEE80211_STA_NONE;
338 337
339 /* Mark TID as unreserved */ 338 /* Mark TID as unreserved */
340 sta->reserved_tid = IEEE80211_TID_UNRESERVED; 339 sta->reserved_tid = IEEE80211_TID_UNRESERVED;
341 340
342 ktime_get_ts(&uptime); 341 sta->last_connected = ktime_get_seconds();
343 sta->last_connected = uptime.tv_sec; 342 ewma_signal_init(&sta->rx_stats.avg_signal);
344 ewma_signal_init(&sta->avg_signal); 343 for (i = 0; i < ARRAY_SIZE(sta->rx_stats.chain_signal_avg); i++)
345 for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) 344 ewma_signal_init(&sta->rx_stats.chain_signal_avg[i]);
346 ewma_signal_init(&sta->chain_signal_avg[i]);
347 345
348 if (local->ops->wake_tx_queue) { 346 if (local->ops->wake_tx_queue) {
349 void *txq_data; 347 void *txq_data;
@@ -1068,7 +1066,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
1068 if (sdata != sta->sdata) 1066 if (sdata != sta->sdata)
1069 continue; 1067 continue;
1070 1068
1071 if (time_after(jiffies, sta->last_rx + exp_time)) { 1069 if (time_after(jiffies, sta->rx_stats.last_rx + exp_time)) {
1072 sta_dbg(sta->sdata, "expiring inactive STA %pM\n", 1070 sta_dbg(sta->sdata, "expiring inactive STA %pM\n",
1073 sta->sta.addr); 1071 sta->sta.addr);
1074 1072
@@ -1808,12 +1806,50 @@ u8 sta_info_tx_streams(struct sta_info *sta)
1808 >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; 1806 >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1;
1809} 1807}
1810 1808
1809static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
1810{
1811 rinfo->flags = 0;
1812
1813 if (sta->rx_stats.last_rate_flag & RX_FLAG_HT) {
1814 rinfo->flags |= RATE_INFO_FLAGS_MCS;
1815 rinfo->mcs = sta->rx_stats.last_rate_idx;
1816 } else if (sta->rx_stats.last_rate_flag & RX_FLAG_VHT) {
1817 rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS;
1818 rinfo->nss = sta->rx_stats.last_rate_vht_nss;
1819 rinfo->mcs = sta->rx_stats.last_rate_idx;
1820 } else {
1821 struct ieee80211_supported_band *sband;
1822 int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
1823 u16 brate;
1824
1825 sband = sta->local->hw.wiphy->bands[
1826 ieee80211_get_sdata_band(sta->sdata)];
1827 brate = sband->bitrates[sta->rx_stats.last_rate_idx].bitrate;
1828 rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
1829 }
1830
1831 if (sta->rx_stats.last_rate_flag & RX_FLAG_SHORT_GI)
1832 rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
1833
1834 if (sta->rx_stats.last_rate_flag & RX_FLAG_5MHZ)
1835 rinfo->bw = RATE_INFO_BW_5;
1836 else if (sta->rx_stats.last_rate_flag & RX_FLAG_10MHZ)
1837 rinfo->bw = RATE_INFO_BW_10;
1838 else if (sta->rx_stats.last_rate_flag & RX_FLAG_40MHZ)
1839 rinfo->bw = RATE_INFO_BW_40;
1840 else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_80MHZ)
1841 rinfo->bw = RATE_INFO_BW_80;
1842 else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_160MHZ)
1843 rinfo->bw = RATE_INFO_BW_160;
1844 else
1845 rinfo->bw = RATE_INFO_BW_20;
1846}
1847
1811void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) 1848void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1812{ 1849{
1813 struct ieee80211_sub_if_data *sdata = sta->sdata; 1850 struct ieee80211_sub_if_data *sdata = sta->sdata;
1814 struct ieee80211_local *local = sdata->local; 1851 struct ieee80211_local *local = sdata->local;
1815 struct rate_control_ref *ref = NULL; 1852 struct rate_control_ref *ref = NULL;
1816 struct timespec uptime;
1817 u32 thr = 0; 1853 u32 thr = 0;
1818 int i, ac; 1854 int i, ac;
1819 1855
@@ -1835,51 +1871,54 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1835 BIT(NL80211_STA_INFO_STA_FLAGS) | 1871 BIT(NL80211_STA_INFO_STA_FLAGS) |
1836 BIT(NL80211_STA_INFO_BSS_PARAM) | 1872 BIT(NL80211_STA_INFO_BSS_PARAM) |
1837 BIT(NL80211_STA_INFO_CONNECTED_TIME) | 1873 BIT(NL80211_STA_INFO_CONNECTED_TIME) |
1838 BIT(NL80211_STA_INFO_RX_DROP_MISC) | 1874 BIT(NL80211_STA_INFO_RX_DROP_MISC);
1839 BIT(NL80211_STA_INFO_BEACON_LOSS); 1875
1876 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
1877 sinfo->beacon_loss_count = sdata->u.mgd.beacon_loss_count;
1878 sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_LOSS);
1879 }
1840 1880
1841 ktime_get_ts(&uptime); 1881 sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
1842 sinfo->connected_time = uptime.tv_sec - sta->last_connected; 1882 sinfo->inactive_time =
1843 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 1883 jiffies_to_msecs(jiffies - sta->rx_stats.last_rx);
1844 1884
1845 if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) | 1885 if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) |
1846 BIT(NL80211_STA_INFO_TX_BYTES)))) { 1886 BIT(NL80211_STA_INFO_TX_BYTES)))) {
1847 sinfo->tx_bytes = 0; 1887 sinfo->tx_bytes = 0;
1848 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) 1888 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
1849 sinfo->tx_bytes += sta->tx_bytes[ac]; 1889 sinfo->tx_bytes += sta->tx_stats.bytes[ac];
1850 sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64); 1890 sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
1851 } 1891 }
1852 1892
1853 if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) { 1893 if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) {
1854 sinfo->tx_packets = 0; 1894 sinfo->tx_packets = 0;
1855 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) 1895 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
1856 sinfo->tx_packets += sta->tx_packets[ac]; 1896 sinfo->tx_packets += sta->tx_stats.packets[ac];
1857 sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS); 1897 sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
1858 } 1898 }
1859 1899
1860 if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) | 1900 if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
1861 BIT(NL80211_STA_INFO_RX_BYTES)))) { 1901 BIT(NL80211_STA_INFO_RX_BYTES)))) {
1862 sinfo->rx_bytes = sta->rx_bytes; 1902 sinfo->rx_bytes = sta->rx_stats.bytes;
1863 sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64); 1903 sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
1864 } 1904 }
1865 1905
1866 if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) { 1906 if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
1867 sinfo->rx_packets = sta->rx_packets; 1907 sinfo->rx_packets = sta->rx_stats.packets;
1868 sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS); 1908 sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
1869 } 1909 }
1870 1910
1871 if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) { 1911 if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) {
1872 sinfo->tx_retries = sta->tx_retry_count; 1912 sinfo->tx_retries = sta->status_stats.retry_count;
1873 sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES); 1913 sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES);
1874 } 1914 }
1875 1915
1876 if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) { 1916 if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) {
1877 sinfo->tx_failed = sta->tx_retry_failed; 1917 sinfo->tx_failed = sta->status_stats.retry_failed;
1878 sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED); 1918 sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
1879 } 1919 }
1880 1920
1881 sinfo->rx_dropped_misc = sta->rx_dropped; 1921 sinfo->rx_dropped_misc = sta->rx_stats.dropped;
1882 sinfo->beacon_loss_count = sta->beacon_loss_count;
1883 1922
1884 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1923 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1885 !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { 1924 !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
@@ -1891,33 +1930,35 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1891 if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || 1930 if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
1892 ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { 1931 ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
1893 if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { 1932 if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
1894 sinfo->signal = (s8)sta->last_signal; 1933 sinfo->signal = (s8)sta->rx_stats.last_signal;
1895 sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); 1934 sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
1896 } 1935 }
1897 1936
1898 if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { 1937 if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
1899 sinfo->signal_avg = 1938 sinfo->signal_avg =
1900 (s8) -ewma_signal_read(&sta->avg_signal); 1939 -ewma_signal_read(&sta->rx_stats.avg_signal);
1901 sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); 1940 sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
1902 } 1941 }
1903 } 1942 }
1904 1943
1905 if (sta->chains && 1944 if (sta->rx_stats.chains &&
1906 !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | 1945 !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
1907 BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { 1946 BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
1908 sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | 1947 sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
1909 BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); 1948 BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
1910 1949
1911 sinfo->chains = sta->chains; 1950 sinfo->chains = sta->rx_stats.chains;
1912 for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { 1951 for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
1913 sinfo->chain_signal[i] = sta->chain_signal_last[i]; 1952 sinfo->chain_signal[i] =
1953 sta->rx_stats.chain_signal_last[i];
1914 sinfo->chain_signal_avg[i] = 1954 sinfo->chain_signal_avg[i] =
1915 (s8) -ewma_signal_read(&sta->chain_signal_avg[i]); 1955 -ewma_signal_read(&sta->rx_stats.chain_signal_avg[i]);
1916 } 1956 }
1917 } 1957 }
1918 1958
1919 if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) { 1959 if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) {
1920 sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); 1960 sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate,
1961 &sinfo->txrate);
1921 sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE); 1962 sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
1922 } 1963 }
1923 1964
@@ -1932,12 +1973,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1932 1973
1933 if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { 1974 if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
1934 tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); 1975 tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU);
1935 tidstats->rx_msdu = sta->rx_msdu[i]; 1976 tidstats->rx_msdu = sta->rx_stats.msdu[i];
1936 } 1977 }
1937 1978
1938 if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { 1979 if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) {
1939 tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); 1980 tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU);
1940 tidstats->tx_msdu = sta->tx_msdu[i]; 1981 tidstats->tx_msdu = sta->tx_stats.msdu[i];
1941 } 1982 }
1942 1983
1943 if (!(tidstats->filled & 1984 if (!(tidstats->filled &
@@ -1945,7 +1986,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1945 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { 1986 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
1946 tidstats->filled |= 1987 tidstats->filled |=
1947 BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); 1988 BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
1948 tidstats->tx_msdu_retries = sta->tx_msdu_retries[i]; 1989 tidstats->tx_msdu_retries =
1990 sta->status_stats.msdu_retries[i];
1949 } 1991 }
1950 1992
1951 if (!(tidstats->filled & 1993 if (!(tidstats->filled &
@@ -1953,7 +1995,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
1953 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { 1995 ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
1954 tidstats->filled |= 1996 tidstats->filled |=
1955 BIT(NL80211_TID_STATS_TX_MSDU_FAILED); 1997 BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
1956 tidstats->tx_msdu_failed = sta->tx_msdu_failed[i]; 1998 tidstats->tx_msdu_failed =
1999 sta->status_stats.msdu_failed[i];
1957 } 2000 }
1958 } 2001 }
1959 2002
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index b087c71ff7fe..2cafb21b422f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -133,6 +133,7 @@ enum ieee80211_agg_stop_reason {
133 * @buf_size: reorder buffer size at receiver 133 * @buf_size: reorder buffer size at receiver
134 * @failed_bar_ssn: ssn of the last failed BAR tx attempt 134 * @failed_bar_ssn: ssn of the last failed BAR tx attempt
135 * @bar_pending: BAR needs to be re-sent 135 * @bar_pending: BAR needs to be re-sent
136 * @amsdu: support A-MSDU withing A-MDPU
136 * 137 *
137 * This structure's lifetime is managed by RCU, assignments to 138 * This structure's lifetime is managed by RCU, assignments to
138 * the array holding it must hold the aggregation mutex. 139 * the array holding it must hold the aggregation mutex.
@@ -158,6 +159,7 @@ struct tid_ampdu_tx {
158 159
159 u16 failed_bar_ssn; 160 u16 failed_bar_ssn;
160 bool bar_pending; 161 bool bar_pending;
162 bool amsdu;
161}; 163};
162 164
163/** 165/**
@@ -342,12 +344,6 @@ DECLARE_EWMA(signal, 1024, 8)
342 * @rate_ctrl_lock: spinlock used to protect rate control data 344 * @rate_ctrl_lock: spinlock used to protect rate control data
343 * (data inside the algorithm, so serializes calls there) 345 * (data inside the algorithm, so serializes calls there)
344 * @rate_ctrl_priv: rate control private per-STA pointer 346 * @rate_ctrl_priv: rate control private per-STA pointer
345 * @last_tx_rate: rate used for last transmit, to report to userspace as
346 * "the" transmit rate
347 * @last_rx_rate_idx: rx status rate index of the last data packet
348 * @last_rx_rate_flag: rx status flag of the last data packet
349 * @last_rx_rate_vht_flag: rx status vht flag of the last data packet
350 * @last_rx_rate_vht_nss: rx status nss of last data packet
351 * @lock: used for locking all fields that require locking, see comments 347 * @lock: used for locking all fields that require locking, see comments
352 * in the header file. 348 * in the header file.
353 * @drv_deliver_wk: used for delivering frames after driver PS unblocking 349 * @drv_deliver_wk: used for delivering frames after driver PS unblocking
@@ -362,23 +358,9 @@ DECLARE_EWMA(signal, 1024, 8)
362 * the station when it leaves powersave or polls for frames 358 * the station when it leaves powersave or polls for frames
363 * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on 359 * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on
364 * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on 360 * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on
365 * @rx_packets: Number of MSDUs received from this STA
366 * @rx_bytes: Number of bytes received from this STA
367 * @last_rx: time (in jiffies) when last frame was received from this STA
368 * @last_connected: time (in seconds) when a station got connected 361 * @last_connected: time (in seconds) when a station got connected
369 * @num_duplicates: number of duplicate frames received from this STA
370 * @rx_fragments: number of received MPDUs
371 * @rx_dropped: number of dropped MPDUs from this STA
372 * @last_signal: signal of last received frame from this STA
373 * @avg_signal: moving average of signal of received frames from this STA
374 * @last_ack_signal: signal of last received Ack frame from this STA
375 * @last_seq_ctrl: last received seq/frag number from this STA (per TID 362 * @last_seq_ctrl: last received seq/frag number from this STA (per TID
376 * plus one for non-QoS frames) 363 * plus one for non-QoS frames)
377 * @tx_filtered_count: number of frames the hardware filtered for this STA
378 * @tx_retry_failed: number of frames that failed retry
379 * @tx_retry_count: total number of retries for frames to this STA
380 * @tx_packets: number of RX/TX MSDUs
381 * @tx_bytes: number of bytes transmitted to this STA
382 * @tid_seq: per-TID sequence numbers for sending to this STA 364 * @tid_seq: per-TID sequence numbers for sending to this STA
383 * @ampdu_mlme: A-MPDU state machine state 365 * @ampdu_mlme: A-MPDU state machine state
384 * @timer_to_tid: identity mapping to ID timers 366 * @timer_to_tid: identity mapping to ID timers
@@ -386,32 +368,22 @@ DECLARE_EWMA(signal, 1024, 8)
386 * @debugfs: debug filesystem info 368 * @debugfs: debug filesystem info
387 * @dead: set to true when sta is unlinked 369 * @dead: set to true when sta is unlinked
388 * @uploaded: set to true when sta is uploaded to the driver 370 * @uploaded: set to true when sta is uploaded to the driver
389 * @lost_packets: number of consecutive lost packets
390 * @sta: station information we share with the driver 371 * @sta: station information we share with the driver
391 * @sta_state: duplicates information about station state (for debug) 372 * @sta_state: duplicates information about station state (for debug)
392 * @beacon_loss_count: number of times beacon loss has triggered 373 * @beacon_loss_count: number of times beacon loss has triggered
393 * @rcu_head: RCU head used for freeing this station struct 374 * @rcu_head: RCU head used for freeing this station struct
394 * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, 375 * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
395 * taken from HT/VHT capabilities or VHT operating mode notification 376 * taken from HT/VHT capabilities or VHT operating mode notification
396 * @chains: chains ever used for RX from this station
397 * @chain_signal_last: last signal (per chain)
398 * @chain_signal_avg: signal average (per chain)
399 * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for 377 * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for
400 * AP only. 378 * AP only.
401 * @cipher_scheme: optional cipher scheme for this station 379 * @cipher_scheme: optional cipher scheme for this station
402 * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed
403 * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) 380 * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
404 * @tx_msdu: MSDUs transmitted to this station, using IEEE80211_NUM_TID
405 * entry for non-QoS frames
406 * @tx_msdu_retries: MSDU retries for transmissions to to this station,
407 * using IEEE80211_NUM_TID entry for non-QoS frames
408 * @tx_msdu_failed: MSDU failures for transmissions to to this station,
409 * using IEEE80211_NUM_TID entry for non-QoS frames
410 * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID
411 * entry for non-QoS frames
412 * @fast_tx: TX fastpath information 381 * @fast_tx: TX fastpath information
413 * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to 382 * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to
414 * the BSS one. 383 * the BSS one.
384 * @tx_stats: TX statistics
385 * @rx_stats: RX statistics
386 * @status_stats: TX status statistics
415 */ 387 */
416struct sta_info { 388struct sta_info {
417 /* General information, mostly static */ 389 /* General information, mostly static */
@@ -455,42 +427,49 @@ struct sta_info {
455 unsigned long driver_buffered_tids; 427 unsigned long driver_buffered_tids;
456 unsigned long txq_buffered_tids; 428 unsigned long txq_buffered_tids;
457 429
458 /* Updated from RX path only, no locking requirements */
459 unsigned long rx_packets;
460 u64 rx_bytes;
461 unsigned long last_rx;
462 long last_connected; 430 long last_connected;
463 unsigned long num_duplicates;
464 unsigned long rx_fragments;
465 unsigned long rx_dropped;
466 int last_signal;
467 struct ewma_signal avg_signal;
468 int last_ack_signal;
469 431
470 u8 chains; 432 /* Updated from RX path only, no locking requirements */
471 s8 chain_signal_last[IEEE80211_MAX_CHAINS]; 433 struct {
472 struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS]; 434 unsigned long packets;
435 u64 bytes;
436 unsigned long last_rx;
437 unsigned long num_duplicates;
438 unsigned long fragments;
439 unsigned long dropped;
440 int last_signal;
441 struct ewma_signal avg_signal;
442 u8 chains;
443 s8 chain_signal_last[IEEE80211_MAX_CHAINS];
444 struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS];
445 int last_rate_idx;
446 u32 last_rate_flag;
447 u32 last_rate_vht_flag;
448 u8 last_rate_vht_nss;
449 u64 msdu[IEEE80211_NUM_TIDS + 1];
450 } rx_stats;
473 451
474 /* Plus 1 for non-QoS frames */ 452 /* Plus 1 for non-QoS frames */
475 __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; 453 __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1];
476 454
477 /* Updated from TX status path only, no locking requirements */ 455 /* Updated from TX status path only, no locking requirements */
478 unsigned long tx_filtered_count; 456 struct {
479 unsigned long tx_retry_failed, tx_retry_count; 457 unsigned long filtered;
458 unsigned long retry_failed, retry_count;
459 unsigned int lost_packets;
460 unsigned long last_tdls_pkt_time;
461 u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
462 u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
463 } status_stats;
480 464
481 /* Updated from TX path only, no locking requirements */ 465 /* Updated from TX path only, no locking requirements */
482 u64 tx_packets[IEEE80211_NUM_ACS]; 466 struct {
483 u64 tx_bytes[IEEE80211_NUM_ACS]; 467 u64 packets[IEEE80211_NUM_ACS];
484 struct ieee80211_tx_rate last_tx_rate; 468 u64 bytes[IEEE80211_NUM_ACS];
485 int last_rx_rate_idx; 469 struct ieee80211_tx_rate last_rate;
486 u32 last_rx_rate_flag; 470 u64 msdu[IEEE80211_NUM_TIDS + 1];
487 u32 last_rx_rate_vht_flag; 471 } tx_stats;
488 u8 last_rx_rate_vht_nss;
489 u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; 472 u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
490 u64 tx_msdu[IEEE80211_NUM_TIDS + 1];
491 u64 tx_msdu_retries[IEEE80211_NUM_TIDS + 1];
492 u64 tx_msdu_failed[IEEE80211_NUM_TIDS + 1];
493 u64 rx_msdu[IEEE80211_NUM_TIDS + 1];
494 473
495 /* 474 /*
496 * Aggregation information, locked with lock. 475 * Aggregation information, locked with lock.
@@ -507,15 +486,9 @@ struct sta_info {
507 486
508 enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; 487 enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
509 488
510 unsigned int lost_packets;
511 unsigned int beacon_loss_count;
512
513 enum ieee80211_smps_mode known_smps_mode; 489 enum ieee80211_smps_mode known_smps_mode;
514 const struct ieee80211_cipher_scheme *cipher_scheme; 490 const struct ieee80211_cipher_scheme *cipher_scheme;
515 491
516 /* TDLS timeout data */
517 unsigned long last_tdls_pkt_time;
518
519 u8 reserved_tid; 492 u8 reserved_tid;
520 493
521 struct cfg80211_chan_def tdls_chandef; 494 struct cfg80211_chan_def tdls_chandef;
@@ -686,8 +659,6 @@ static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata)
686void sta_set_rate_info_tx(struct sta_info *sta, 659void sta_set_rate_info_tx(struct sta_info *sta,
687 const struct ieee80211_tx_rate *rate, 660 const struct ieee80211_tx_rate *rate,
688 struct rate_info *rinfo); 661 struct rate_info *rinfo);
689void sta_set_rate_info_rx(struct sta_info *sta,
690 struct rate_info *rinfo);
691void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo); 662void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo);
692 663
693void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, 664void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 8ba583243509..5bad05e9af90 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -67,7 +67,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
67 IEEE80211_TX_INTFL_RETRANSMISSION; 67 IEEE80211_TX_INTFL_RETRANSMISSION;
68 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; 68 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
69 69
70 sta->tx_filtered_count++; 70 sta->status_stats.filtered++;
71 71
72 /* 72 /*
73 * Clear more-data bit on filtered frames, it might be set 73 * Clear more-data bit on filtered frames, it might be set
@@ -101,6 +101,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
101 * when it wakes up for the next time. 101 * when it wakes up for the next time.
102 */ 102 */
103 set_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT); 103 set_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT);
104 ieee80211_clear_fast_xmit(sta);
104 105
105 /* 106 /*
106 * This code races in the following way: 107 * This code races in the following way:
@@ -182,7 +183,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
182 struct ieee80211_sub_if_data *sdata = sta->sdata; 183 struct ieee80211_sub_if_data *sdata = sta->sdata;
183 184
184 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) 185 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
185 sta->last_rx = jiffies; 186 sta->rx_stats.last_rx = jiffies;
186 187
187 if (ieee80211_is_data_qos(mgmt->frame_control)) { 188 if (ieee80211_is_data_qos(mgmt->frame_control)) {
188 struct ieee80211_hdr *hdr = (void *) skb->data; 189 struct ieee80211_hdr *hdr = (void *) skb->data;
@@ -556,8 +557,9 @@ static void ieee80211_lost_packet(struct sta_info *sta,
556 !(info->flags & IEEE80211_TX_STAT_AMPDU)) 557 !(info->flags & IEEE80211_TX_STAT_AMPDU))
557 return; 558 return;
558 559
559 sta->lost_packets++; 560 sta->status_stats.lost_packets++;
560 if (!sta->sta.tdls && sta->lost_packets < STA_LOST_PKT_THRESHOLD) 561 if (!sta->sta.tdls &&
562 sta->status_stats.lost_packets < STA_LOST_PKT_THRESHOLD)
561 return; 563 return;
562 564
563 /* 565 /*
@@ -567,14 +569,15 @@ static void ieee80211_lost_packet(struct sta_info *sta,
567 * mechanism. 569 * mechanism.
568 */ 570 */
569 if (sta->sta.tdls && 571 if (sta->sta.tdls &&
570 (sta->lost_packets < STA_LOST_TDLS_PKT_THRESHOLD || 572 (sta->status_stats.lost_packets < STA_LOST_TDLS_PKT_THRESHOLD ||
571 time_before(jiffies, 573 time_before(jiffies,
572 sta->last_tdls_pkt_time + STA_LOST_TDLS_PKT_TIME))) 574 sta->status_stats.last_tdls_pkt_time +
575 STA_LOST_TDLS_PKT_TIME)))
573 return; 576 return;
574 577
575 cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr, 578 cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
576 sta->lost_packets, GFP_ATOMIC); 579 sta->status_stats.lost_packets, GFP_ATOMIC);
577 sta->lost_packets = 0; 580 sta->status_stats.lost_packets = 0;
578} 581}
579 582
580static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, 583static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
@@ -635,18 +638,18 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
635 sta = container_of(pubsta, struct sta_info, sta); 638 sta = container_of(pubsta, struct sta_info, sta);
636 639
637 if (!acked) 640 if (!acked)
638 sta->tx_retry_failed++; 641 sta->status_stats.retry_failed++;
639 sta->tx_retry_count += retry_count; 642 sta->status_stats.retry_count += retry_count;
640 643
641 if (acked) { 644 if (acked) {
642 sta->last_rx = jiffies; 645 sta->rx_stats.last_rx = jiffies;
643 646
644 if (sta->lost_packets) 647 if (sta->status_stats.lost_packets)
645 sta->lost_packets = 0; 648 sta->status_stats.lost_packets = 0;
646 649
647 /* Track when last TDLS packet was ACKed */ 650 /* Track when last TDLS packet was ACKed */
648 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) 651 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
649 sta->last_tdls_pkt_time = jiffies; 652 sta->status_stats.last_tdls_pkt_time = jiffies;
650 } else { 653 } else {
651 ieee80211_lost_packet(sta, info); 654 ieee80211_lost_packet(sta, info);
652 } 655 }
@@ -668,16 +671,70 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
668} 671}
669EXPORT_SYMBOL(ieee80211_tx_status_noskb); 672EXPORT_SYMBOL(ieee80211_tx_status_noskb);
670 673
671void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) 674void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
675 struct ieee80211_supported_band *sband,
676 int retry_count, int shift, bool send_to_cooked)
672{ 677{
673 struct sk_buff *skb2; 678 struct sk_buff *skb2;
679 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
680 struct ieee80211_sub_if_data *sdata;
681 struct net_device *prev_dev = NULL;
682 int rtap_len;
683
684 /* send frame to monitor interfaces now */
685 rtap_len = ieee80211_tx_radiotap_len(info);
686 if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
687 pr_err("ieee80211_tx_status: headroom too small\n");
688 dev_kfree_skb(skb);
689 return;
690 }
691 ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
692 rtap_len, shift);
693
694 /* XXX: is this sufficient for BPF? */
695 skb_set_mac_header(skb, 0);
696 skb->ip_summed = CHECKSUM_UNNECESSARY;
697 skb->pkt_type = PACKET_OTHERHOST;
698 skb->protocol = htons(ETH_P_802_2);
699 memset(skb->cb, 0, sizeof(skb->cb));
700
701 rcu_read_lock();
702 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
703 if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
704 if (!ieee80211_sdata_running(sdata))
705 continue;
706
707 if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
708 !send_to_cooked)
709 continue;
710
711 if (prev_dev) {
712 skb2 = skb_clone(skb, GFP_ATOMIC);
713 if (skb2) {
714 skb2->dev = prev_dev;
715 netif_rx(skb2);
716 }
717 }
718
719 prev_dev = sdata->dev;
720 }
721 }
722 if (prev_dev) {
723 skb->dev = prev_dev;
724 netif_rx(skb);
725 skb = NULL;
726 }
727 rcu_read_unlock();
728 dev_kfree_skb(skb);
729}
730
731void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
732{
674 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 733 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
675 struct ieee80211_local *local = hw_to_local(hw); 734 struct ieee80211_local *local = hw_to_local(hw);
676 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 735 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
677 __le16 fc; 736 __le16 fc;
678 struct ieee80211_supported_band *sband; 737 struct ieee80211_supported_band *sband;
679 struct ieee80211_sub_if_data *sdata;
680 struct net_device *prev_dev = NULL;
681 struct sta_info *sta; 738 struct sta_info *sta;
682 struct rhash_head *tmp; 739 struct rhash_head *tmp;
683 int retry_count; 740 int retry_count;
@@ -685,7 +742,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
685 bool send_to_cooked; 742 bool send_to_cooked;
686 bool acked; 743 bool acked;
687 struct ieee80211_bar *bar; 744 struct ieee80211_bar *bar;
688 int rtap_len;
689 int shift = 0; 745 int shift = 0;
690 int tid = IEEE80211_NUM_TIDS; 746 int tid = IEEE80211_NUM_TIDS;
691 const struct bucket_table *tbl; 747 const struct bucket_table *tbl;
@@ -730,7 +786,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
730 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && 786 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
731 (ieee80211_is_data(hdr->frame_control)) && 787 (ieee80211_is_data(hdr->frame_control)) &&
732 (rates_idx != -1)) 788 (rates_idx != -1))
733 sta->last_tx_rate = info->status.rates[rates_idx]; 789 sta->tx_stats.last_rate =
790 info->status.rates[rates_idx];
734 791
735 if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && 792 if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
736 (ieee80211_is_data_qos(fc))) { 793 (ieee80211_is_data_qos(fc))) {
@@ -776,13 +833,15 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
776 return; 833 return;
777 } else { 834 } else {
778 if (!acked) 835 if (!acked)
779 sta->tx_retry_failed++; 836 sta->status_stats.retry_failed++;
780 sta->tx_retry_count += retry_count; 837 sta->status_stats.retry_count += retry_count;
781 838
782 if (ieee80211_is_data_present(fc)) { 839 if (ieee80211_is_data_present(fc)) {
783 if (!acked) 840 if (!acked)
784 sta->tx_msdu_failed[tid]++; 841 sta->status_stats.msdu_failed[tid]++;
785 sta->tx_msdu_retries[tid] += retry_count; 842
843 sta->status_stats.msdu_retries[tid] +=
844 retry_count;
786 } 845 }
787 } 846 }
788 847
@@ -800,19 +859,17 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
800 859
801 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { 860 if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
802 if (info->flags & IEEE80211_TX_STAT_ACK) { 861 if (info->flags & IEEE80211_TX_STAT_ACK) {
803 if (sta->lost_packets) 862 if (sta->status_stats.lost_packets)
804 sta->lost_packets = 0; 863 sta->status_stats.lost_packets = 0;
805 864
806 /* Track when last TDLS packet was ACKed */ 865 /* Track when last TDLS packet was ACKed */
807 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) 866 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
808 sta->last_tdls_pkt_time = jiffies; 867 sta->status_stats.last_tdls_pkt_time =
868 jiffies;
809 } else { 869 } else {
810 ieee80211_lost_packet(sta, info); 870 ieee80211_lost_packet(sta, info);
811 } 871 }
812 } 872 }
813
814 if (acked)
815 sta->last_ack_signal = info->status.ack_signal;
816 } 873 }
817 874
818 rcu_read_unlock(); 875 rcu_read_unlock();
@@ -878,51 +935,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
878 return; 935 return;
879 } 936 }
880 937
881 /* send frame to monitor interfaces now */ 938 /* send to monitor interfaces */
882 rtap_len = ieee80211_tx_radiotap_len(info); 939 ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked);
883 if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
884 pr_err("ieee80211_tx_status: headroom too small\n");
885 dev_kfree_skb(skb);
886 return;
887 }
888 ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
889 rtap_len, shift);
890
891 /* XXX: is this sufficient for BPF? */
892 skb_set_mac_header(skb, 0);
893 skb->ip_summed = CHECKSUM_UNNECESSARY;
894 skb->pkt_type = PACKET_OTHERHOST;
895 skb->protocol = htons(ETH_P_802_2);
896 memset(skb->cb, 0, sizeof(skb->cb));
897
898 rcu_read_lock();
899 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
900 if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
901 if (!ieee80211_sdata_running(sdata))
902 continue;
903
904 if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
905 !send_to_cooked)
906 continue;
907
908 if (prev_dev) {
909 skb2 = skb_clone(skb, GFP_ATOMIC);
910 if (skb2) {
911 skb2->dev = prev_dev;
912 netif_rx(skb2);
913 }
914 }
915
916 prev_dev = sdata->dev;
917 }
918 }
919 if (prev_dev) {
920 skb->dev = prev_dev;
921 netif_rx(skb);
922 skb = NULL;
923 }
924 rcu_read_unlock();
925 dev_kfree_skb(skb);
926} 940}
927EXPORT_SYMBOL(ieee80211_tx_status); 941EXPORT_SYMBOL(ieee80211_tx_status);
928 942
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 4e202d0679b2..c9eeb3f12808 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -41,9 +41,11 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
41 struct sk_buff *skb) 41 struct sk_buff *skb)
42{ 42{
43 struct ieee80211_local *local = sdata->local; 43 struct ieee80211_local *local = sdata->local;
44 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
44 bool chan_switch = local->hw.wiphy->features & 45 bool chan_switch = local->hw.wiphy->features &
45 NL80211_FEATURE_TDLS_CHANNEL_SWITCH; 46 NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
46 bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW); 47 bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
48 !ifmgd->tdls_wider_bw_prohibited;
47 enum ieee80211_band band = ieee80211_get_sdata_band(sdata); 49 enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
48 struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; 50 struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
49 bool vht = sband && sband->vht_cap.vht_supported; 51 bool vht = sband && sband->vht_cap.vht_supported;
@@ -331,8 +333,8 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
331 333
332 /* proceed to downgrade the chandef until usable or the same */ 334 /* proceed to downgrade the chandef until usable or the same */
333 while (uc.width > max_width && 335 while (uc.width > max_width &&
334 !cfg80211_reg_can_beacon(sdata->local->hw.wiphy, 336 !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
335 &uc, sdata->wdev.iftype)) 337 sdata->wdev.iftype))
336 ieee80211_chandef_downgrade(&uc); 338 ieee80211_chandef_downgrade(&uc);
337 339
338 if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) { 340 if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
@@ -589,12 +591,19 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
589 offset = noffset; 591 offset = noffset;
590 } 592 }
591 593
592 /* if HT support is only added in TDLS, we need an HT-operation IE */ 594 /*
595 * if HT support is only added in TDLS, we need an HT-operation IE.
596 * add the IE as required by IEEE802.11-2012 9.23.3.2.
597 */
593 if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { 598 if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
599 u16 prot = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED |
600 IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
601 IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
602
594 pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); 603 pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
595 /* send an empty HT operation IE */
596 ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap, 604 ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
597 &sdata->vif.bss_conf.chandef, 0); 605 &sdata->vif.bss_conf.chandef, prot,
606 true);
598 } 607 }
599 608
600 ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); 609 ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 6f14591d8ca9..56c6d6cfa5a1 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -33,11 +33,11 @@
33 __field(u32, chan_width) \ 33 __field(u32, chan_width) \
34 __field(u32, center_freq1) \ 34 __field(u32, center_freq1) \
35 __field(u32, center_freq2) 35 __field(u32, center_freq2)
36#define CHANDEF_ASSIGN(c) \ 36#define CHANDEF_ASSIGN(c) \
37 __entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ 37 __entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \
38 __entry->chan_width = (c)->width; \ 38 __entry->chan_width = (c) ? (c)->width : 0; \
39 __entry->center_freq1 = (c)->center_freq1; \ 39 __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \
40 __entry->center_freq2 = (c)->center_freq2; 40 __entry->center_freq2 = (c) ? (c)->center_freq2 : 0;
41#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" 41#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz"
42#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ 42#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \
43 __entry->center_freq1, __entry->center_freq2 43 __entry->center_freq1, __entry->center_freq2
@@ -325,7 +325,6 @@ TRACE_EVENT(drv_config,
325 __field(u32, flags) 325 __field(u32, flags)
326 __field(int, power_level) 326 __field(int, power_level)
327 __field(int, dynamic_ps_timeout) 327 __field(int, dynamic_ps_timeout)
328 __field(int, max_sleep_period)
329 __field(u16, listen_interval) 328 __field(u16, listen_interval)
330 __field(u8, long_frame_max_tx_count) 329 __field(u8, long_frame_max_tx_count)
331 __field(u8, short_frame_max_tx_count) 330 __field(u8, short_frame_max_tx_count)
@@ -339,7 +338,6 @@ TRACE_EVENT(drv_config,
339 __entry->flags = local->hw.conf.flags; 338 __entry->flags = local->hw.conf.flags;
340 __entry->power_level = local->hw.conf.power_level; 339 __entry->power_level = local->hw.conf.power_level;
341 __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; 340 __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout;
342 __entry->max_sleep_period = local->hw.conf.max_sleep_period;
343 __entry->listen_interval = local->hw.conf.listen_interval; 341 __entry->listen_interval = local->hw.conf.listen_interval;
344 __entry->long_frame_max_tx_count = 342 __entry->long_frame_max_tx_count =
345 local->hw.conf.long_frame_max_tx_count; 343 local->hw.conf.long_frame_max_tx_count;
@@ -497,6 +495,36 @@ TRACE_EVENT(drv_configure_filter,
497 ) 495 )
498); 496);
499 497
498TRACE_EVENT(drv_config_iface_filter,
499 TP_PROTO(struct ieee80211_local *local,
500 struct ieee80211_sub_if_data *sdata,
501 unsigned int filter_flags,
502 unsigned int changed_flags),
503
504 TP_ARGS(local, sdata, filter_flags, changed_flags),
505
506 TP_STRUCT__entry(
507 LOCAL_ENTRY
508 VIF_ENTRY
509 __field(unsigned int, filter_flags)
510 __field(unsigned int, changed_flags)
511 ),
512
513 TP_fast_assign(
514 LOCAL_ASSIGN;
515 VIF_ASSIGN;
516 __entry->filter_flags = filter_flags;
517 __entry->changed_flags = changed_flags;
518 ),
519
520 TP_printk(
521 LOCAL_PR_FMT VIF_PR_FMT
522 " filter_flags: %#x changed_flags: %#x",
523 LOCAL_PR_ARG, VIF_PR_ARG, __entry->filter_flags,
524 __entry->changed_flags
525 )
526);
527
500TRACE_EVENT(drv_set_tim, 528TRACE_EVENT(drv_set_tim,
501 TP_PROTO(struct ieee80211_local *local, 529 TP_PROTO(struct ieee80211_local *local,
502 struct ieee80211_sta *sta, bool set), 530 struct ieee80211_sta *sta, bool set),
@@ -944,9 +972,9 @@ TRACE_EVENT(drv_ampdu_action,
944 struct ieee80211_sub_if_data *sdata, 972 struct ieee80211_sub_if_data *sdata,
945 enum ieee80211_ampdu_mlme_action action, 973 enum ieee80211_ampdu_mlme_action action,
946 struct ieee80211_sta *sta, u16 tid, 974 struct ieee80211_sta *sta, u16 tid,
947 u16 *ssn, u8 buf_size), 975 u16 *ssn, u8 buf_size, bool amsdu),
948 976
949 TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size), 977 TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size, amsdu),
950 978
951 TP_STRUCT__entry( 979 TP_STRUCT__entry(
952 LOCAL_ENTRY 980 LOCAL_ENTRY
@@ -955,6 +983,7 @@ TRACE_EVENT(drv_ampdu_action,
955 __field(u16, tid) 983 __field(u16, tid)
956 __field(u16, ssn) 984 __field(u16, ssn)
957 __field(u8, buf_size) 985 __field(u8, buf_size)
986 __field(bool, amsdu)
958 VIF_ENTRY 987 VIF_ENTRY
959 ), 988 ),
960 989
@@ -966,12 +995,13 @@ TRACE_EVENT(drv_ampdu_action,
966 __entry->tid = tid; 995 __entry->tid = tid;
967 __entry->ssn = ssn ? *ssn : 0; 996 __entry->ssn = ssn ? *ssn : 0;
968 __entry->buf_size = buf_size; 997 __entry->buf_size = buf_size;
998 __entry->amsdu = amsdu;
969 ), 999 ),
970 1000
971 TP_printk( 1001 TP_printk(
972 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d", 1002 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d amsdu:%d",
973 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, 1003 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
974 __entry->tid, __entry->buf_size 1004 __entry->tid, __entry->buf_size, __entry->amsdu
975 ) 1005 )
976); 1006);
977 1007
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 84e0e8c7fb23..bdc224d5053a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -757,9 +757,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
757 if (txrc.reported_rate.idx < 0) { 757 if (txrc.reported_rate.idx < 0) {
758 txrc.reported_rate = tx->rate; 758 txrc.reported_rate = tx->rate;
759 if (tx->sta && ieee80211_is_data(hdr->frame_control)) 759 if (tx->sta && ieee80211_is_data(hdr->frame_control))
760 tx->sta->last_tx_rate = txrc.reported_rate; 760 tx->sta->tx_stats.last_rate = txrc.reported_rate;
761 } else if (tx->sta) 761 } else if (tx->sta)
762 tx->sta->last_tx_rate = txrc.reported_rate; 762 tx->sta->tx_stats.last_rate = txrc.reported_rate;
763 763
764 if (ratetbl) 764 if (ratetbl)
765 return TX_CONTINUE; 765 return TX_CONTINUE;
@@ -824,7 +824,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
824 hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number); 824 hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number);
825 tx->sdata->sequence_number += 0x10; 825 tx->sdata->sequence_number += 0x10;
826 if (tx->sta) 826 if (tx->sta)
827 tx->sta->tx_msdu[IEEE80211_NUM_TIDS]++; 827 tx->sta->tx_stats.msdu[IEEE80211_NUM_TIDS]++;
828 return TX_CONTINUE; 828 return TX_CONTINUE;
829 } 829 }
830 830
@@ -840,7 +840,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
840 840
841 qc = ieee80211_get_qos_ctl(hdr); 841 qc = ieee80211_get_qos_ctl(hdr);
842 tid = *qc & IEEE80211_QOS_CTL_TID_MASK; 842 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
843 tx->sta->tx_msdu[tid]++; 843 tx->sta->tx_stats.msdu[tid]++;
844 844
845 if (!tx->sta->sta.txq[0]) 845 if (!tx->sta->sta.txq[0])
846 hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); 846 hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -994,10 +994,10 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
994 994
995 skb_queue_walk(&tx->skbs, skb) { 995 skb_queue_walk(&tx->skbs, skb) {
996 ac = skb_get_queue_mapping(skb); 996 ac = skb_get_queue_mapping(skb);
997 tx->sta->tx_bytes[ac] += skb->len; 997 tx->sta->tx_stats.bytes[ac] += skb->len;
998 } 998 }
999 if (ac >= 0) 999 if (ac >= 0)
1000 tx->sta->tx_packets[ac]++; 1000 tx->sta->tx_stats.packets[ac]++;
1001 1001
1002 return TX_CONTINUE; 1002 return TX_CONTINUE;
1003} 1003}
@@ -1218,8 +1218,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
1218 1218
1219 if (!tx->sta) 1219 if (!tx->sta)
1220 info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; 1220 info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
1221 else if (test_and_clear_sta_flag(tx->sta, WLAN_STA_CLEAR_PS_FILT)) 1221 else if (test_and_clear_sta_flag(tx->sta, WLAN_STA_CLEAR_PS_FILT)) {
1222 info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; 1222 info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
1223 ieee80211_check_fast_xmit(tx->sta);
1224 }
1223 1225
1224 info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT; 1226 info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT;
1225 1227
@@ -2451,7 +2453,8 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
2451 2453
2452 if (test_sta_flag(sta, WLAN_STA_PS_STA) || 2454 if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
2453 test_sta_flag(sta, WLAN_STA_PS_DRIVER) || 2455 test_sta_flag(sta, WLAN_STA_PS_DRIVER) ||
2454 test_sta_flag(sta, WLAN_STA_PS_DELIVER)) 2456 test_sta_flag(sta, WLAN_STA_PS_DELIVER) ||
2457 test_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT))
2455 goto out; 2458 goto out;
2456 2459
2457 if (sdata->noack_map) 2460 if (sdata->noack_map)
@@ -2767,7 +2770,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
2767 2770
2768 if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { 2771 if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
2769 *ieee80211_get_qos_ctl(hdr) = tid; 2772 *ieee80211_get_qos_ctl(hdr) = tid;
2770 hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); 2773 if (!sta->sta.txq[0])
2774 hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
2771 } else { 2775 } else {
2772 info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; 2776 info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
2773 hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); 2777 hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
@@ -2775,10 +2779,10 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
2775 } 2779 }
2776 2780
2777 if (skb_shinfo(skb)->gso_size) 2781 if (skb_shinfo(skb)->gso_size)
2778 sta->tx_msdu[tid] += 2782 sta->tx_stats.msdu[tid] +=
2779 DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); 2783 DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
2780 else 2784 else
2781 sta->tx_msdu[tid]++; 2785 sta->tx_stats.msdu[tid]++;
2782 2786
2783 info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; 2787 info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
2784 2788
@@ -2809,8 +2813,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
2809 /* statistics normally done by ieee80211_tx_h_stats (but that 2813 /* statistics normally done by ieee80211_tx_h_stats (but that
2810 * has to consider fragmentation, so is more complex) 2814 * has to consider fragmentation, so is more complex)
2811 */ 2815 */
2812 sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len; 2816 sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
2813 sta->tx_packets[skb_get_queue_mapping(skb)]++; 2817 sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
2814 2818
2815 if (fast_tx->pn_offs) { 2819 if (fast_tx->pn_offs) {
2816 u64 pn; 2820 u64 pn;
@@ -3512,6 +3516,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
3512{ 3516{
3513 struct ieee80211_mutable_offsets offs = {}; 3517 struct ieee80211_mutable_offsets offs = {};
3514 struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false); 3518 struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false);
3519 struct sk_buff *copy;
3520 struct ieee80211_supported_band *sband;
3521 int shift;
3522
3523 if (!bcn)
3524 return bcn;
3515 3525
3516 if (tim_offset) 3526 if (tim_offset)
3517 *tim_offset = offs.tim_offset; 3527 *tim_offset = offs.tim_offset;
@@ -3519,6 +3529,19 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
3519 if (tim_length) 3529 if (tim_length)
3520 *tim_length = offs.tim_length; 3530 *tim_length = offs.tim_length;
3521 3531
3532 if (ieee80211_hw_check(hw, BEACON_TX_STATUS) ||
3533 !hw_to_local(hw)->monitors)
3534 return bcn;
3535
3536 /* send a copy to monitor interfaces */
3537 copy = skb_copy(bcn, GFP_ATOMIC);
3538 if (!copy)
3539 return bcn;
3540
3541 shift = ieee80211_vif_get_shift(vif);
3542 sband = hw->wiphy->bands[ieee80211_get_sdata_band(vif_to_sdata(vif))];
3543 ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false);
3544
3522 return bcn; 3545 return bcn;
3523} 3546}
3524EXPORT_SYMBOL(ieee80211_beacon_get_tim); 3547EXPORT_SYMBOL(ieee80211_beacon_get_tim);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1104421bc525..74058020b7d6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -4,6 +4,7 @@
4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
5 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> 5 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
6 * Copyright 2013-2014 Intel Mobile Communications GmbH 6 * Copyright 2013-2014 Intel Mobile Communications GmbH
7 * Copyright (C) 2015 Intel Deutschland GmbH
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -1104,13 +1105,13 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
1104} 1105}
1105 1106
1106void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, 1107void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
1107 bool bss_notify) 1108 bool bss_notify, bool enable_qos)
1108{ 1109{
1109 struct ieee80211_local *local = sdata->local; 1110 struct ieee80211_local *local = sdata->local;
1110 struct ieee80211_tx_queue_params qparam; 1111 struct ieee80211_tx_queue_params qparam;
1111 struct ieee80211_chanctx_conf *chanctx_conf; 1112 struct ieee80211_chanctx_conf *chanctx_conf;
1112 int ac; 1113 int ac;
1113 bool use_11b, enable_qos; 1114 bool use_11b;
1114 bool is_ocb; /* Use another EDCA parameters if dot11OCBActivated=true */ 1115 bool is_ocb; /* Use another EDCA parameters if dot11OCBActivated=true */
1115 int aCWmin, aCWmax; 1116 int aCWmin, aCWmax;
1116 1117
@@ -1129,13 +1130,6 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
1129 !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE); 1130 !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE);
1130 rcu_read_unlock(); 1131 rcu_read_unlock();
1131 1132
1132 /*
1133 * By default disable QoS in STA mode for old access points, which do
1134 * not support 802.11e. New APs will provide proper queue parameters,
1135 * that we will configure later.
1136 */
1137 enable_qos = (sdata->vif.type != NL80211_IFTYPE_STATION);
1138
1139 is_ocb = (sdata->vif.type == NL80211_IFTYPE_OCB); 1133 is_ocb = (sdata->vif.type == NL80211_IFTYPE_OCB);
1140 1134
1141 /* Set defaults according to 802.11-2007 Table 7-37 */ 1135 /* Set defaults according to 802.11-2007 Table 7-37 */
@@ -1664,7 +1658,6 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
1664 1658
1665 local->resuming = false; 1659 local->resuming = false;
1666 local->suspended = false; 1660 local->suspended = false;
1667 local->started = false;
1668 local->in_reconfig = false; 1661 local->in_reconfig = false;
1669 1662
1670 /* scheduled scan clearly can't be running any more, but tell 1663 /* scheduled scan clearly can't be running any more, but tell
@@ -1754,6 +1747,18 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1754#endif 1747#endif
1755 1748
1756 /* 1749 /*
1750 * In case of hw_restart during suspend (without wowlan),
1751 * cancel restart work, as we are reconfiguring the device
1752 * anyway.
1753 * Note that restart_work is scheduled on a frozen workqueue,
1754 * so we can't deadlock in this case.
1755 */
1756 if (suspended && local->in_reconfig && !reconfig_due_to_wowlan)
1757 cancel_work_sync(&local->restart_work);
1758
1759 local->started = false;
1760
1761 /*
1757 * Upon resume hardware can sometimes be goofy due to 1762 * Upon resume hardware can sometimes be goofy due to
1758 * various platform / driver / bus issues, so restarting 1763 * various platform / driver / bus issues, so restarting
1759 * the device may at times not work immediately. Propagate 1764 * the device may at times not work immediately. Propagate
@@ -1951,7 +1956,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1951 } 1956 }
1952 } 1957 }
1953 1958
1954 ieee80211_recalc_ps(local, -1); 1959 ieee80211_recalc_ps(local);
1955 1960
1956 /* 1961 /*
1957 * The sta might be in psm against the ap (e.g. because 1962 * The sta might be in psm against the ap (e.g. because
@@ -1966,7 +1971,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1966 if (!sdata->u.mgd.associated) 1971 if (!sdata->u.mgd.associated)
1967 continue; 1972 continue;
1968 1973
1969 ieee80211_send_nullfunc(local, sdata, 0); 1974 ieee80211_send_nullfunc(local, sdata, false);
1970 } 1975 }
1971 } 1976 }
1972 1977
@@ -1996,6 +2001,29 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1996 if (ieee80211_sdata_running(sdata)) 2001 if (ieee80211_sdata_running(sdata))
1997 ieee80211_enable_keys(sdata); 2002 ieee80211_enable_keys(sdata);
1998 2003
2004 /* Reconfigure sched scan if it was interrupted by FW restart */
2005 mutex_lock(&local->mtx);
2006 sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
2007 lockdep_is_held(&local->mtx));
2008 sched_scan_req = rcu_dereference_protected(local->sched_scan_req,
2009 lockdep_is_held(&local->mtx));
2010 if (sched_scan_sdata && sched_scan_req)
2011 /*
2012 * Sched scan stopped, but we don't want to report it. Instead,
2013 * we're trying to reschedule. However, if more than one scan
2014 * plan was set, we cannot reschedule since we don't know which
2015 * scan plan was currently running (and some scan plans may have
2016 * already finished).
2017 */
2018 if (sched_scan_req->n_scan_plans > 1 ||
2019 __ieee80211_request_sched_scan_start(sched_scan_sdata,
2020 sched_scan_req))
2021 sched_scan_stopped = true;
2022 mutex_unlock(&local->mtx);
2023
2024 if (sched_scan_stopped)
2025 cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
2026
1999 wake_up: 2027 wake_up:
2000 local->in_reconfig = false; 2028 local->in_reconfig = false;
2001 barrier(); 2029 barrier();
@@ -2017,8 +2045,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
2017 mutex_lock(&local->sta_mtx); 2045 mutex_lock(&local->sta_mtx);
2018 2046
2019 list_for_each_entry(sta, &local->sta_list, list) { 2047 list_for_each_entry(sta, &local->sta_list, list) {
2020 ieee80211_sta_tear_down_BA_sessions( 2048 if (!local->resuming)
2021 sta, AGG_STOP_LOCAL_REQUEST); 2049 ieee80211_sta_tear_down_BA_sessions(
2050 sta, AGG_STOP_LOCAL_REQUEST);
2022 clear_sta_flag(sta, WLAN_STA_BLOCK_BA); 2051 clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
2023 } 2052 }
2024 2053
@@ -2030,28 +2059,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
2030 false); 2059 false);
2031 2060
2032 /* 2061 /*
2033 * Reconfigure sched scan if it was interrupted by FW restart or
2034 * suspend.
2035 */
2036 mutex_lock(&local->mtx);
2037 sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
2038 lockdep_is_held(&local->mtx));
2039 sched_scan_req = rcu_dereference_protected(local->sched_scan_req,
2040 lockdep_is_held(&local->mtx));
2041 if (sched_scan_sdata && sched_scan_req)
2042 /*
2043 * Sched scan stopped, but we don't want to report it. Instead,
2044 * we're trying to reschedule.
2045 */
2046 if (__ieee80211_request_sched_scan_start(sched_scan_sdata,
2047 sched_scan_req))
2048 sched_scan_stopped = true;
2049 mutex_unlock(&local->mtx);
2050
2051 if (sched_scan_stopped)
2052 cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
2053
2054 /*
2055 * If this is for hw restart things are still running. 2062 * If this is for hw restart things are still running.
2056 * We may want to change that later, however. 2063 * We may want to change that later, however.
2057 */ 2064 */
@@ -2135,7 +2142,13 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata)
2135 chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, 2142 chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
2136 lockdep_is_held(&local->chanctx_mtx)); 2143 lockdep_is_held(&local->chanctx_mtx));
2137 2144
2138 if (WARN_ON_ONCE(!chanctx_conf)) 2145 /*
2146 * This function can be called from a work, thus it may be possible
2147 * that the chanctx_conf is removed (due to a disconnection, for
2148 * example).
2149 * So nothing should be done in such case.
2150 */
2151 if (!chanctx_conf)
2139 goto unlock; 2152 goto unlock;
2140 2153
2141 chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); 2154 chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
@@ -2272,7 +2285,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
2272 2285
2273u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, 2286u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
2274 const struct cfg80211_chan_def *chandef, 2287 const struct cfg80211_chan_def *chandef,
2275 u16 prot_mode) 2288 u16 prot_mode, bool rifs_mode)
2276{ 2289{
2277 struct ieee80211_ht_operation *ht_oper; 2290 struct ieee80211_ht_operation *ht_oper;
2278 /* Build HT Information */ 2291 /* Build HT Information */
@@ -2300,6 +2313,9 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
2300 chandef->width != NL80211_CHAN_WIDTH_20) 2313 chandef->width != NL80211_CHAN_WIDTH_20)
2301 ht_oper->ht_param |= IEEE80211_HT_PARAM_CHAN_WIDTH_ANY; 2314 ht_oper->ht_param |= IEEE80211_HT_PARAM_CHAN_WIDTH_ANY;
2302 2315
2316 if (rifs_mode)
2317 ht_oper->ht_param |= IEEE80211_HT_PARAM_RIFS_MODE;
2318
2303 ht_oper->operation_mode = cpu_to_le16(prot_mode); 2319 ht_oper->operation_mode = cpu_to_le16(prot_mode);
2304 ht_oper->stbc_param = 0x0000; 2320 ht_oper->stbc_param = 0x0000;
2305 2321
@@ -2324,6 +2340,8 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
2324 if (chandef->center_freq2) 2340 if (chandef->center_freq2)
2325 vht_oper->center_freq_seg2_idx = 2341 vht_oper->center_freq_seg2_idx =
2326 ieee80211_frequency_to_channel(chandef->center_freq2); 2342 ieee80211_frequency_to_channel(chandef->center_freq2);
2343 else
2344 vht_oper->center_freq_seg2_idx = 0x00;
2327 2345
2328 switch (chandef->width) { 2346 switch (chandef->width) {
2329 case NL80211_CHAN_WIDTH_160: 2347 case NL80211_CHAN_WIDTH_160:
@@ -2541,7 +2559,7 @@ int ieee80211_ave_rssi(struct ieee80211_vif *vif)
2541 /* non-managed type inferfaces */ 2559 /* non-managed type inferfaces */
2542 return 0; 2560 return 0;
2543 } 2561 }
2544 return ifmgd->ave_beacon_signal / 16; 2562 return -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
2545} 2563}
2546EXPORT_SYMBOL_GPL(ieee80211_ave_rssi); 2564EXPORT_SYMBOL_GPL(ieee80211_ave_rssi);
2547 2565
@@ -2951,6 +2969,13 @@ ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i)
2951 if (end > 0) 2969 if (end > 0)
2952 return false; 2970 return false;
2953 2971
2972 /* One shot NOA */
2973 if (data->count[i] == 1)
2974 return false;
2975
2976 if (data->desc[i].interval == 0)
2977 return false;
2978
2954 /* End time is in the past, check for repetitions */ 2979 /* End time is in the past, check for repetitions */
2955 skip = DIV_ROUND_UP(-end, data->desc[i].interval); 2980 skip = DIV_ROUND_UP(-end, data->desc[i].interval);
2956 if (data->count[i] < 255) { 2981 if (data->count[i] < 255) {
@@ -3298,9 +3323,11 @@ void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
3298 if (sta) { 3323 if (sta) {
3299 txqi->txq.sta = &sta->sta; 3324 txqi->txq.sta = &sta->sta;
3300 sta->sta.txq[tid] = &txqi->txq; 3325 sta->sta.txq[tid] = &txqi->txq;
3326 txqi->txq.tid = tid;
3301 txqi->txq.ac = ieee802_1d_to_ac[tid & 7]; 3327 txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
3302 } else { 3328 } else {
3303 sdata->vif.txq = &txqi->txq; 3329 sdata->vif.txq = &txqi->txq;
3330 txqi->txq.tid = 0;
3304 txqi->txq.ac = IEEE80211_AC_BE; 3331 txqi->txq.ac = IEEE80211_AC_BE;
3305 } 3332 }
3306} 3333}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index feb547dc8643..d824c38971ed 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -174,9 +174,12 @@ mic_fail_no_key:
174 * a driver that supports HW encryption. Send up the key idx only if 174 * a driver that supports HW encryption. Send up the key idx only if
175 * the key is set. 175 * the key is set.
176 */ 176 */
177 mac80211_ev_michael_mic_failure(rx->sdata, 177 cfg80211_michael_mic_failure(rx->sdata->dev, hdr->addr2,
178 rx->key ? rx->key->conf.keyidx : -1, 178 is_multicast_ether_addr(hdr->addr1) ?
179 (void *) skb->data, NULL, GFP_ATOMIC); 179 NL80211_KEYTYPE_GROUP :
180 NL80211_KEYTYPE_PAIRWISE,
181 rx->key ? rx->key->conf.keyidx : -1,
182 NULL, GFP_ATOMIC);
180 return RX_DROP_UNUSABLE; 183 return RX_DROP_UNUSABLE;
181} 184}
182 185
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index c865ebb2ace2..57b5e94471af 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -266,6 +266,195 @@ ieee802154_set_ackreq_default(struct wpan_phy *wpan_phy,
266 return 0; 266 return 0;
267} 267}
268 268
269#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
270static void
271ieee802154_get_llsec_table(struct wpan_phy *wpan_phy,
272 struct wpan_dev *wpan_dev,
273 struct ieee802154_llsec_table **table)
274{
275 struct net_device *dev = wpan_dev->netdev;
276 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
277
278 *table = &sdata->sec.table;
279}
280
281static void
282ieee802154_lock_llsec_table(struct wpan_phy *wpan_phy,
283 struct wpan_dev *wpan_dev)
284{
285 struct net_device *dev = wpan_dev->netdev;
286 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
287
288 mutex_lock(&sdata->sec_mtx);
289}
290
291static void
292ieee802154_unlock_llsec_table(struct wpan_phy *wpan_phy,
293 struct wpan_dev *wpan_dev)
294{
295 struct net_device *dev = wpan_dev->netdev;
296 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
297
298 mutex_unlock(&sdata->sec_mtx);
299}
300
301static int
302ieee802154_set_llsec_params(struct wpan_phy *wpan_phy,
303 struct wpan_dev *wpan_dev,
304 const struct ieee802154_llsec_params *params,
305 int changed)
306{
307 struct net_device *dev = wpan_dev->netdev;
308 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
309 int res;
310
311 mutex_lock(&sdata->sec_mtx);
312 res = mac802154_llsec_set_params(&sdata->sec, params, changed);
313 mutex_unlock(&sdata->sec_mtx);
314
315 return res;
316}
317
318static int
319ieee802154_get_llsec_params(struct wpan_phy *wpan_phy,
320 struct wpan_dev *wpan_dev,
321 struct ieee802154_llsec_params *params)
322{
323 struct net_device *dev = wpan_dev->netdev;
324 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
325 int res;
326
327 mutex_lock(&sdata->sec_mtx);
328 res = mac802154_llsec_get_params(&sdata->sec, params);
329 mutex_unlock(&sdata->sec_mtx);
330
331 return res;
332}
333
334static int
335ieee802154_add_llsec_key(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
336 const struct ieee802154_llsec_key_id *id,
337 const struct ieee802154_llsec_key *key)
338{
339 struct net_device *dev = wpan_dev->netdev;
340 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
341 int res;
342
343 mutex_lock(&sdata->sec_mtx);
344 res = mac802154_llsec_key_add(&sdata->sec, id, key);
345 mutex_unlock(&sdata->sec_mtx);
346
347 return res;
348}
349
350static int
351ieee802154_del_llsec_key(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
352 const struct ieee802154_llsec_key_id *id)
353{
354 struct net_device *dev = wpan_dev->netdev;
355 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
356 int res;
357
358 mutex_lock(&sdata->sec_mtx);
359 res = mac802154_llsec_key_del(&sdata->sec, id);
360 mutex_unlock(&sdata->sec_mtx);
361
362 return res;
363}
364
365static int
366ieee802154_add_seclevel(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
367 const struct ieee802154_llsec_seclevel *sl)
368{
369 struct net_device *dev = wpan_dev->netdev;
370 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
371 int res;
372
373 mutex_lock(&sdata->sec_mtx);
374 res = mac802154_llsec_seclevel_add(&sdata->sec, sl);
375 mutex_unlock(&sdata->sec_mtx);
376
377 return res;
378}
379
380static int
381ieee802154_del_seclevel(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
382 const struct ieee802154_llsec_seclevel *sl)
383{
384 struct net_device *dev = wpan_dev->netdev;
385 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
386 int res;
387
388 mutex_lock(&sdata->sec_mtx);
389 res = mac802154_llsec_seclevel_del(&sdata->sec, sl);
390 mutex_unlock(&sdata->sec_mtx);
391
392 return res;
393}
394
395static int
396ieee802154_add_device(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
397 const struct ieee802154_llsec_device *dev_desc)
398{
399 struct net_device *dev = wpan_dev->netdev;
400 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
401 int res;
402
403 mutex_lock(&sdata->sec_mtx);
404 res = mac802154_llsec_dev_add(&sdata->sec, dev_desc);
405 mutex_unlock(&sdata->sec_mtx);
406
407 return res;
408}
409
410static int
411ieee802154_del_device(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
412 __le64 extended_addr)
413{
414 struct net_device *dev = wpan_dev->netdev;
415 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
416 int res;
417
418 mutex_lock(&sdata->sec_mtx);
419 res = mac802154_llsec_dev_del(&sdata->sec, extended_addr);
420 mutex_unlock(&sdata->sec_mtx);
421
422 return res;
423}
424
425static int
426ieee802154_add_devkey(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
427 __le64 extended_addr,
428 const struct ieee802154_llsec_device_key *key)
429{
430 struct net_device *dev = wpan_dev->netdev;
431 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
432 int res;
433
434 mutex_lock(&sdata->sec_mtx);
435 res = mac802154_llsec_devkey_add(&sdata->sec, extended_addr, key);
436 mutex_unlock(&sdata->sec_mtx);
437
438 return res;
439}
440
441static int
442ieee802154_del_devkey(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
443 __le64 extended_addr,
444 const struct ieee802154_llsec_device_key *key)
445{
446 struct net_device *dev = wpan_dev->netdev;
447 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
448 int res;
449
450 mutex_lock(&sdata->sec_mtx);
451 res = mac802154_llsec_devkey_del(&sdata->sec, extended_addr, key);
452 mutex_unlock(&sdata->sec_mtx);
453
454 return res;
455}
456#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
457
269const struct cfg802154_ops mac802154_config_ops = { 458const struct cfg802154_ops mac802154_config_ops = {
270 .add_virtual_intf_deprecated = ieee802154_add_iface_deprecated, 459 .add_virtual_intf_deprecated = ieee802154_add_iface_deprecated,
271 .del_virtual_intf_deprecated = ieee802154_del_iface_deprecated, 460 .del_virtual_intf_deprecated = ieee802154_del_iface_deprecated,
@@ -284,4 +473,20 @@ const struct cfg802154_ops mac802154_config_ops = {
284 .set_max_frame_retries = ieee802154_set_max_frame_retries, 473 .set_max_frame_retries = ieee802154_set_max_frame_retries,
285 .set_lbt_mode = ieee802154_set_lbt_mode, 474 .set_lbt_mode = ieee802154_set_lbt_mode,
286 .set_ackreq_default = ieee802154_set_ackreq_default, 475 .set_ackreq_default = ieee802154_set_ackreq_default,
476#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
477 .get_llsec_table = ieee802154_get_llsec_table,
478 .lock_llsec_table = ieee802154_lock_llsec_table,
479 .unlock_llsec_table = ieee802154_unlock_llsec_table,
480 /* TODO above */
481 .set_llsec_params = ieee802154_set_llsec_params,
482 .get_llsec_params = ieee802154_get_llsec_params,
483 .add_llsec_key = ieee802154_add_llsec_key,
484 .del_llsec_key = ieee802154_del_llsec_key,
485 .add_seclevel = ieee802154_add_seclevel,
486 .del_seclevel = ieee802154_del_seclevel,
487 .add_device = ieee802154_add_device,
488 .del_device = ieee802154_del_device,
489 .add_devkey = ieee802154_add_devkey,
490 .del_devkey = ieee802154_del_devkey,
491#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
287}; 492};
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index ed26952f9e14..7079cd32a7ad 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -367,12 +367,11 @@ static int mac802154_set_header_security(struct ieee802154_sub_if_data *sdata,
367 return 0; 367 return 0;
368} 368}
369 369
370static int mac802154_header_create(struct sk_buff *skb, 370static int ieee802154_header_create(struct sk_buff *skb,
371 struct net_device *dev, 371 struct net_device *dev,
372 unsigned short type, 372 const struct ieee802154_addr *daddr,
373 const void *daddr, 373 const struct ieee802154_addr *saddr,
374 const void *saddr, 374 unsigned len)
375 unsigned len)
376{ 375{
377 struct ieee802154_hdr hdr; 376 struct ieee802154_hdr hdr;
378 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); 377 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
@@ -423,24 +422,89 @@ static int mac802154_header_create(struct sk_buff *skb,
423 return hlen; 422 return hlen;
424} 423}
425 424
425static const struct wpan_dev_header_ops ieee802154_header_ops = {
426 .create = ieee802154_header_create,
427};
428
429/* This header create functionality assumes a 8 byte array for
430 * source and destination pointer at maximum. To adapt this for
431 * the 802.15.4 dataframe header we use extended address handling
432 * here only and intra pan connection. fc fields are mostly fallback
433 * handling. For provide dev_hard_header for dgram sockets.
434 */
435static int mac802154_header_create(struct sk_buff *skb,
436 struct net_device *dev,
437 unsigned short type,
438 const void *daddr,
439 const void *saddr,
440 unsigned len)
441{
442 struct ieee802154_hdr hdr;
443 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
444 struct wpan_dev *wpan_dev = &sdata->wpan_dev;
445 struct ieee802154_mac_cb cb = { };
446 int hlen;
447
448 if (!daddr)
449 return -EINVAL;
450
451 memset(&hdr.fc, 0, sizeof(hdr.fc));
452 hdr.fc.type = IEEE802154_FC_TYPE_DATA;
453 hdr.fc.ack_request = wpan_dev->ackreq;
454 hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF;
455
456 /* TODO currently a workaround to give zero cb block to set
457 * security parameters defaults according MIB.
458 */
459 if (mac802154_set_header_security(sdata, &hdr, &cb) < 0)
460 return -EINVAL;
461
462 hdr.dest.pan_id = wpan_dev->pan_id;
463 hdr.dest.mode = IEEE802154_ADDR_LONG;
464 ieee802154_be64_to_le64(&hdr.dest.extended_addr, daddr);
465
466 hdr.source.pan_id = hdr.dest.pan_id;
467 hdr.source.mode = IEEE802154_ADDR_LONG;
468
469 if (!saddr)
470 hdr.source.extended_addr = wpan_dev->extended_addr;
471 else
472 ieee802154_be64_to_le64(&hdr.source.extended_addr, saddr);
473
474 hlen = ieee802154_hdr_push(skb, &hdr);
475 if (hlen < 0)
476 return -EINVAL;
477
478 skb_reset_mac_header(skb);
479 skb->mac_len = hlen;
480
481 if (len > ieee802154_max_payload(&hdr))
482 return -EMSGSIZE;
483
484 return hlen;
485}
486
426static int 487static int
427mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr) 488mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr)
428{ 489{
429 struct ieee802154_hdr hdr; 490 struct ieee802154_hdr hdr;
430 struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr;
431 491
432 if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) { 492 if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) {
433 pr_debug("malformed packet\n"); 493 pr_debug("malformed packet\n");
434 return 0; 494 return 0;
435 } 495 }
436 496
437 *addr = hdr.source; 497 if (hdr.source.mode == IEEE802154_ADDR_LONG) {
438 return sizeof(*addr); 498 ieee802154_le64_to_be64(haddr, &hdr.source.extended_addr);
499 return IEEE802154_EXTENDED_ADDR_LEN;
500 }
501
502 return 0;
439} 503}
440 504
441static struct header_ops mac802154_header_ops = { 505static const struct header_ops mac802154_header_ops = {
442 .create = mac802154_header_create, 506 .create = mac802154_header_create,
443 .parse = mac802154_header_parse, 507 .parse = mac802154_header_parse,
444}; 508};
445 509
446static const struct net_device_ops mac802154_wpan_ops = { 510static const struct net_device_ops mac802154_wpan_ops = {
@@ -471,9 +535,29 @@ static void ieee802154_if_setup(struct net_device *dev)
471 dev->addr_len = IEEE802154_EXTENDED_ADDR_LEN; 535 dev->addr_len = IEEE802154_EXTENDED_ADDR_LEN;
472 memset(dev->broadcast, 0xff, IEEE802154_EXTENDED_ADDR_LEN); 536 memset(dev->broadcast, 0xff, IEEE802154_EXTENDED_ADDR_LEN);
473 537
474 dev->hard_header_len = MAC802154_FRAME_HARD_HEADER_LEN; 538 /* Let hard_header_len set to IEEE802154_MIN_HEADER_LEN. AF_PACKET
475 dev->needed_tailroom = 2 + 16; /* FCS + MIC */ 539 * will not send frames without any payload, but ack frames
476 dev->mtu = IEEE802154_MTU; 540 * has no payload, so substract one that we can send a 3 bytes
541 * frame. The xmit callback assumes at least a hard header where two
542 * bytes fc and sequence field are set.
543 */
544 dev->hard_header_len = IEEE802154_MIN_HEADER_LEN - 1;
545 /* The auth_tag header is for security and places in private payload
546 * room of mac frame which stucks between payload and FCS field.
547 */
548 dev->needed_tailroom = IEEE802154_MAX_AUTH_TAG_LEN +
549 IEEE802154_FCS_LEN;
550 /* The mtu size is the payload without mac header in this case.
551 * We have a dynamic length header with a minimum header length
552 * which is hard_header_len. In this case we let mtu to the size
553 * of maximum payload which is IEEE802154_MTU - IEEE802154_FCS_LEN -
554 * hard_header_len. The FCS which is set by hardware or ndo_start_xmit
555 * and the minimum mac header which can be evaluated inside driver
556 * layer. The rest of mac header will be part of payload if greater
557 * than hard_header_len.
558 */
559 dev->mtu = IEEE802154_MTU - IEEE802154_FCS_LEN -
560 dev->hard_header_len;
477 dev->tx_queue_len = 300; 561 dev->tx_queue_len = 300;
478 dev->flags = IFF_NOARP | IFF_BROADCAST; 562 dev->flags = IFF_NOARP | IFF_BROADCAST;
479} 563}
@@ -513,6 +597,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
513 sdata->dev->netdev_ops = &mac802154_wpan_ops; 597 sdata->dev->netdev_ops = &mac802154_wpan_ops;
514 sdata->dev->ml_priv = &mac802154_mlme_wpan; 598 sdata->dev->ml_priv = &mac802154_mlme_wpan;
515 wpan_dev->promiscuous_mode = false; 599 wpan_dev->promiscuous_mode = false;
600 wpan_dev->header_ops = &ieee802154_header_ops;
516 601
517 mutex_init(&sdata->sec_mtx); 602 mutex_init(&sdata->sec_mtx);
518 603
@@ -550,7 +635,8 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
550 if (!ndev) 635 if (!ndev)
551 return ERR_PTR(-ENOMEM); 636 return ERR_PTR(-ENOMEM);
552 637
553 ndev->needed_headroom = local->hw.extra_tx_headroom; 638 ndev->needed_headroom = local->hw.extra_tx_headroom +
639 IEEE802154_MAX_HEADER_LEN;
554 640
555 ret = dev_alloc_name(ndev, ndev->name); 641 ret = dev_alloc_name(ndev, ndev->name);
556 if (ret < 0) 642 if (ret < 0)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 985e9394e2af..a13d02b7cee4 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -55,7 +55,7 @@ void mac802154_llsec_destroy(struct mac802154_llsec *sec)
55 55
56 msl = container_of(sl, struct mac802154_llsec_seclevel, level); 56 msl = container_of(sl, struct mac802154_llsec_seclevel, level);
57 list_del(&sl->list); 57 list_del(&sl->list);
58 kfree(msl); 58 kzfree(msl);
59 } 59 }
60 60
61 list_for_each_entry_safe(dev, dn, &sec->table.devices, list) { 61 list_for_each_entry_safe(dev, dn, &sec->table.devices, list) {
@@ -72,7 +72,7 @@ void mac802154_llsec_destroy(struct mac802154_llsec *sec)
72 mkey = container_of(key->key, struct mac802154_llsec_key, key); 72 mkey = container_of(key->key, struct mac802154_llsec_key, key);
73 list_del(&key->list); 73 list_del(&key->list);
74 llsec_key_put(mkey); 74 llsec_key_put(mkey);
75 kfree(key); 75 kzfree(key);
76 } 76 }
77} 77}
78 78
@@ -161,7 +161,7 @@ err_tfm:
161 if (key->tfm[i]) 161 if (key->tfm[i])
162 crypto_free_aead(key->tfm[i]); 162 crypto_free_aead(key->tfm[i]);
163 163
164 kfree(key); 164 kzfree(key);
165 return NULL; 165 return NULL;
166} 166}
167 167
@@ -176,7 +176,7 @@ static void llsec_key_release(struct kref *ref)
176 crypto_free_aead(key->tfm[i]); 176 crypto_free_aead(key->tfm[i]);
177 177
178 crypto_free_blkcipher(key->tfm0); 178 crypto_free_blkcipher(key->tfm0);
179 kfree(key); 179 kzfree(key);
180} 180}
181 181
182static struct mac802154_llsec_key* 182static struct mac802154_llsec_key*
@@ -267,7 +267,7 @@ int mac802154_llsec_key_add(struct mac802154_llsec *sec,
267 return 0; 267 return 0;
268 268
269fail: 269fail:
270 kfree(new); 270 kzfree(new);
271 return -ENOMEM; 271 return -ENOMEM;
272} 272}
273 273
@@ -347,10 +347,10 @@ static void llsec_dev_free(struct mac802154_llsec_device *dev)
347 devkey); 347 devkey);
348 348
349 list_del(&pos->list); 349 list_del(&pos->list);
350 kfree(devkey); 350 kzfree(devkey);
351 } 351 }
352 352
353 kfree(dev); 353 kzfree(dev);
354} 354}
355 355
356int mac802154_llsec_dev_add(struct mac802154_llsec *sec, 356int mac802154_llsec_dev_add(struct mac802154_llsec *sec,
@@ -401,6 +401,7 @@ int mac802154_llsec_dev_del(struct mac802154_llsec *sec, __le64 device_addr)
401 401
402 hash_del_rcu(&pos->bucket_s); 402 hash_del_rcu(&pos->bucket_s);
403 hash_del_rcu(&pos->bucket_hw); 403 hash_del_rcu(&pos->bucket_hw);
404 list_del_rcu(&pos->dev.list);
404 call_rcu(&pos->rcu, llsec_dev_free_rcu); 405 call_rcu(&pos->rcu, llsec_dev_free_rcu);
405 406
406 return 0; 407 return 0;
@@ -680,7 +681,7 @@ llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
680 681
681 rc = crypto_aead_encrypt(req); 682 rc = crypto_aead_encrypt(req);
682 683
683 kfree(req); 684 kzfree(req);
684 685
685 return rc; 686 return rc;
686} 687}
@@ -880,7 +881,7 @@ llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
880 881
881 rc = crypto_aead_decrypt(req); 882 rc = crypto_aead_decrypt(req);
882 883
883 kfree(req); 884 kzfree(req);
884 skb_trim(skb, skb->len - authlen); 885 skb_trim(skb, skb->len - authlen);
885 886
886 return rc; 887 return rc;
@@ -920,7 +921,7 @@ llsec_update_devkey_record(struct mac802154_llsec_device *dev,
920 if (!devkey) 921 if (!devkey)
921 list_add_rcu(&next->devkey.list, &dev->dev.keys); 922 list_add_rcu(&next->devkey.list, &dev->dev.keys);
922 else 923 else
923 kfree(next); 924 kzfree(next);
924 925
925 spin_unlock_bh(&dev->lock); 926 spin_unlock_bh(&dev->lock);
926 } 927 }
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index d1c33c1d6b9b..42e96729dae6 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -87,6 +87,10 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
87 87
88 skb->dev = sdata->dev; 88 skb->dev = sdata->dev;
89 89
90 /* TODO this should be moved after netif_receive_skb call, otherwise
91 * wireshark will show a mac header with security fields and the
92 * payload is already decrypted.
93 */
90 rc = mac802154_llsec_decrypt(&sdata->sec, skb); 94 rc = mac802154_llsec_decrypt(&sdata->sec, skb);
91 if (rc) { 95 if (rc) {
92 pr_debug("decryption failed: %i\n", rc); 96 pr_debug("decryption failed: %i\n", rc);
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 7ed439172f30..3827f359b336 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -77,9 +77,6 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
77 put_unaligned_le16(crc, skb_put(skb, 2)); 77 put_unaligned_le16(crc, skb_put(skb, 2));
78 } 78 }
79 79
80 if (skb_cow_head(skb, local->hw.extra_tx_headroom))
81 goto err_tx;
82
83 /* Stop the netif queue on each sub_if_data object. */ 80 /* Stop the netif queue on each sub_if_data object. */
84 ieee802154_stop_queue(&local->hw); 81 ieee802154_stop_queue(&local->hw);
85 82
@@ -121,6 +118,10 @@ ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev)
121 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); 118 struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
122 int rc; 119 int rc;
123 120
121 /* TODO we should move it to wpan_dev_hard_header and dev_hard_header
122 * functions. The reason is wireshark will show a mac header which is
123 * with security fields but the payload is not encrypted.
124 */
124 rc = mac802154_llsec_encrypt(&sdata->sec, skb); 125 rc = mac802154_llsec_encrypt(&sdata->sec, skb);
125 if (rc) { 126 if (rc) {
126 netdev_warn(dev, "encryption failed: %i\n", rc); 127 netdev_warn(dev, "encryption failed: %i\n", rc);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index bb185a28de98..c70d750148b6 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -19,36 +19,13 @@
19#include <net/ipv6.h> 19#include <net/ipv6.h>
20#include <net/addrconf.h> 20#include <net/addrconf.h>
21#endif 21#endif
22#include <net/nexthop.h>
22#include "internal.h" 23#include "internal.h"
23 24
24#define LABEL_NOT_SPECIFIED (1<<20) 25/* Maximum number of labels to look ahead at when selecting a path of
25#define MAX_NEW_LABELS 2 26 * a multipath route
26 27 */
27/* This maximum ha length copied from the definition of struct neighbour */ 28#define MAX_MP_SELECT_LABELS 4
28#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
29
30enum mpls_payload_type {
31 MPT_UNSPEC, /* IPv4 or IPv6 */
32 MPT_IPV4 = 4,
33 MPT_IPV6 = 6,
34
35 /* Other types not implemented:
36 * - Pseudo-wire with or without control word (RFC4385)
37 * - GAL (RFC5586)
38 */
39};
40
41struct mpls_route { /* next hop label forwarding entry */
42 struct net_device __rcu *rt_dev;
43 struct rcu_head rt_rcu;
44 u32 rt_label[MAX_NEW_LABELS];
45 u8 rt_protocol; /* routing protocol that set this entry */
46 u8 rt_payload_type;
47 u8 rt_labels;
48 u8 rt_via_alen;
49 u8 rt_via_table;
50 u8 rt_via[0];
51};
52 29
53static int zero = 0; 30static int zero = 0;
54static int label_limit = (1 << 20) - 1; 31static int label_limit = (1 << 20) - 1;
@@ -80,10 +57,24 @@ bool mpls_output_possible(const struct net_device *dev)
80} 57}
81EXPORT_SYMBOL_GPL(mpls_output_possible); 58EXPORT_SYMBOL_GPL(mpls_output_possible);
82 59
83static unsigned int mpls_rt_header_size(const struct mpls_route *rt) 60static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
61{
62 u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN);
63 int nh_index = nh - rt->rt_nh;
64
65 return nh0_via + rt->rt_max_alen * nh_index;
66}
67
68static const u8 *mpls_nh_via(const struct mpls_route *rt,
69 const struct mpls_nh *nh)
70{
71 return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh);
72}
73
74static unsigned int mpls_nh_header_size(const struct mpls_nh *nh)
84{ 75{
85 /* The size of the layer 2.5 labels to be added for this route */ 76 /* The size of the layer 2.5 labels to be added for this route */
86 return rt->rt_labels * sizeof(struct mpls_shim_hdr); 77 return nh->nh_labels * sizeof(struct mpls_shim_hdr);
87} 78}
88 79
89unsigned int mpls_dev_mtu(const struct net_device *dev) 80unsigned int mpls_dev_mtu(const struct net_device *dev)
@@ -105,6 +96,80 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
105} 96}
106EXPORT_SYMBOL_GPL(mpls_pkt_too_big); 97EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
107 98
99static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
100 struct sk_buff *skb, bool bos)
101{
102 struct mpls_entry_decoded dec;
103 struct mpls_shim_hdr *hdr;
104 bool eli_seen = false;
105 int label_index;
106 int nh_index = 0;
107 u32 hash = 0;
108
109 /* No need to look further into packet if there's only
110 * one path
111 */
112 if (rt->rt_nhn == 1)
113 goto out;
114
115 for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos;
116 label_index++) {
117 if (!pskb_may_pull(skb, sizeof(*hdr) * label_index))
118 break;
119
120 /* Read and decode the current label */
121 hdr = mpls_hdr(skb) + label_index;
122 dec = mpls_entry_decode(hdr);
123
124 /* RFC6790 - reserved labels MUST NOT be used as keys
125 * for the load-balancing function
126 */
127 if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) {
128 hash = jhash_1word(dec.label, hash);
129
130 /* The entropy label follows the entropy label
131 * indicator, so this means that the entropy
132 * label was just added to the hash - no need to
133 * go any deeper either in the label stack or in the
134 * payload
135 */
136 if (eli_seen)
137 break;
138 } else if (dec.label == MPLS_LABEL_ENTROPY) {
139 eli_seen = true;
140 }
141
142 bos = dec.bos;
143 if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index +
144 sizeof(struct iphdr))) {
145 const struct iphdr *v4hdr;
146
147 v4hdr = (const struct iphdr *)(mpls_hdr(skb) +
148 label_index);
149 if (v4hdr->version == 4) {
150 hash = jhash_3words(ntohl(v4hdr->saddr),
151 ntohl(v4hdr->daddr),
152 v4hdr->protocol, hash);
153 } else if (v4hdr->version == 6 &&
154 pskb_may_pull(skb, sizeof(*hdr) * label_index +
155 sizeof(struct ipv6hdr))) {
156 const struct ipv6hdr *v6hdr;
157
158 v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) +
159 label_index);
160
161 hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
162 hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
163 hash = jhash_1word(v6hdr->nexthdr, hash);
164 }
165 }
166 }
167
168 nh_index = hash % rt->rt_nhn;
169out:
170 return &rt->rt_nh[nh_index];
171}
172
108static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, 173static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
109 struct mpls_entry_decoded dec) 174 struct mpls_entry_decoded dec)
110{ 175{
@@ -159,6 +224,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
159 struct net *net = dev_net(dev); 224 struct net *net = dev_net(dev);
160 struct mpls_shim_hdr *hdr; 225 struct mpls_shim_hdr *hdr;
161 struct mpls_route *rt; 226 struct mpls_route *rt;
227 struct mpls_nh *nh;
162 struct mpls_entry_decoded dec; 228 struct mpls_entry_decoded dec;
163 struct net_device *out_dev; 229 struct net_device *out_dev;
164 struct mpls_dev *mdev; 230 struct mpls_dev *mdev;
@@ -196,8 +262,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
196 if (!rt) 262 if (!rt)
197 goto drop; 263 goto drop;
198 264
265 nh = mpls_select_multipath(rt, skb, dec.bos);
266 if (!nh)
267 goto drop;
268
199 /* Find the output device */ 269 /* Find the output device */
200 out_dev = rcu_dereference(rt->rt_dev); 270 out_dev = rcu_dereference(nh->nh_dev);
201 if (!mpls_output_possible(out_dev)) 271 if (!mpls_output_possible(out_dev))
202 goto drop; 272 goto drop;
203 273
@@ -212,7 +282,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
212 dec.ttl -= 1; 282 dec.ttl -= 1;
213 283
214 /* Verify the destination can hold the packet */ 284 /* Verify the destination can hold the packet */
215 new_header_size = mpls_rt_header_size(rt); 285 new_header_size = mpls_nh_header_size(nh);
216 mtu = mpls_dev_mtu(out_dev); 286 mtu = mpls_dev_mtu(out_dev);
217 if (mpls_pkt_too_big(skb, mtu - new_header_size)) 287 if (mpls_pkt_too_big(skb, mtu - new_header_size))
218 goto drop; 288 goto drop;
@@ -240,13 +310,14 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
240 /* Push the new labels */ 310 /* Push the new labels */
241 hdr = mpls_hdr(skb); 311 hdr = mpls_hdr(skb);
242 bos = dec.bos; 312 bos = dec.bos;
243 for (i = rt->rt_labels - 1; i >= 0; i--) { 313 for (i = nh->nh_labels - 1; i >= 0; i--) {
244 hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos); 314 hdr[i] = mpls_entry_encode(nh->nh_label[i],
315 dec.ttl, 0, bos);
245 bos = false; 316 bos = false;
246 } 317 }
247 } 318 }
248 319
249 err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb); 320 err = neigh_xmit(nh->nh_via_table, out_dev, mpls_nh_via(rt, nh), skb);
250 if (err) 321 if (err)
251 net_dbg_ratelimited("%s: packet transmission failed: %d\n", 322 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
252 __func__, err); 323 __func__, err);
@@ -270,24 +341,33 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
270struct mpls_route_config { 341struct mpls_route_config {
271 u32 rc_protocol; 342 u32 rc_protocol;
272 u32 rc_ifindex; 343 u32 rc_ifindex;
273 u16 rc_via_table; 344 u8 rc_via_table;
274 u16 rc_via_alen; 345 u8 rc_via_alen;
275 u8 rc_via[MAX_VIA_ALEN]; 346 u8 rc_via[MAX_VIA_ALEN];
276 u32 rc_label; 347 u32 rc_label;
277 u32 rc_output_labels; 348 u8 rc_output_labels;
278 u32 rc_output_label[MAX_NEW_LABELS]; 349 u32 rc_output_label[MAX_NEW_LABELS];
279 u32 rc_nlflags; 350 u32 rc_nlflags;
280 enum mpls_payload_type rc_payload_type; 351 enum mpls_payload_type rc_payload_type;
281 struct nl_info rc_nlinfo; 352 struct nl_info rc_nlinfo;
353 struct rtnexthop *rc_mp;
354 int rc_mp_len;
282}; 355};
283 356
284static struct mpls_route *mpls_rt_alloc(size_t alen) 357static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
285{ 358{
359 u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN);
286 struct mpls_route *rt; 360 struct mpls_route *rt;
287 361
288 rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL); 362 rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh),
289 if (rt) 363 VIA_ALEN_ALIGN) +
290 rt->rt_via_alen = alen; 364 num_nh * max_alen_aligned,
365 GFP_KERNEL);
366 if (rt) {
367 rt->rt_nhn = num_nh;
368 rt->rt_max_alen = max_alen_aligned;
369 }
370
291 return rt; 371 return rt;
292} 372}
293 373
@@ -312,25 +392,22 @@ static void mpls_notify_route(struct net *net, unsigned index,
312} 392}
313 393
314static void mpls_route_update(struct net *net, unsigned index, 394static void mpls_route_update(struct net *net, unsigned index,
315 struct net_device *dev, struct mpls_route *new, 395 struct mpls_route *new,
316 const struct nl_info *info) 396 const struct nl_info *info)
317{ 397{
318 struct mpls_route __rcu **platform_label; 398 struct mpls_route __rcu **platform_label;
319 struct mpls_route *rt, *old = NULL; 399 struct mpls_route *rt;
320 400
321 ASSERT_RTNL(); 401 ASSERT_RTNL();
322 402
323 platform_label = rtnl_dereference(net->mpls.platform_label); 403 platform_label = rtnl_dereference(net->mpls.platform_label);
324 rt = rtnl_dereference(platform_label[index]); 404 rt = rtnl_dereference(platform_label[index]);
325 if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) { 405 rcu_assign_pointer(platform_label[index], new);
326 rcu_assign_pointer(platform_label[index], new);
327 old = rt;
328 }
329 406
330 mpls_notify_route(net, index, old, new, info); 407 mpls_notify_route(net, index, rt, new, info);
331 408
332 /* If we removed a route free it now */ 409 /* If we removed a route free it now */
333 mpls_rt_free(old); 410 mpls_rt_free(rt);
334} 411}
335 412
336static unsigned find_free_label(struct net *net) 413static unsigned find_free_label(struct net *net)
@@ -350,7 +427,8 @@ static unsigned find_free_label(struct net *net)
350} 427}
351 428
352#if IS_ENABLED(CONFIG_INET) 429#if IS_ENABLED(CONFIG_INET)
353static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) 430static struct net_device *inet_fib_lookup_dev(struct net *net,
431 const void *addr)
354{ 432{
355 struct net_device *dev; 433 struct net_device *dev;
356 struct rtable *rt; 434 struct rtable *rt;
@@ -369,14 +447,16 @@ static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
369 return dev; 447 return dev;
370} 448}
371#else 449#else
372static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) 450static struct net_device *inet_fib_lookup_dev(struct net *net,
451 const void *addr)
373{ 452{
374 return ERR_PTR(-EAFNOSUPPORT); 453 return ERR_PTR(-EAFNOSUPPORT);
375} 454}
376#endif 455#endif
377 456
378#if IS_ENABLED(CONFIG_IPV6) 457#if IS_ENABLED(CONFIG_IPV6)
379static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) 458static struct net_device *inet6_fib_lookup_dev(struct net *net,
459 const void *addr)
380{ 460{
381 struct net_device *dev; 461 struct net_device *dev;
382 struct dst_entry *dst; 462 struct dst_entry *dst;
@@ -399,47 +479,234 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
399 return dev; 479 return dev;
400} 480}
401#else 481#else
402static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) 482static struct net_device *inet6_fib_lookup_dev(struct net *net,
483 const void *addr)
403{ 484{
404 return ERR_PTR(-EAFNOSUPPORT); 485 return ERR_PTR(-EAFNOSUPPORT);
405} 486}
406#endif 487#endif
407 488
408static struct net_device *find_outdev(struct net *net, 489static struct net_device *find_outdev(struct net *net,
409 struct mpls_route_config *cfg) 490 struct mpls_route *rt,
491 struct mpls_nh *nh, int oif)
410{ 492{
411 struct net_device *dev = NULL; 493 struct net_device *dev = NULL;
412 494
413 if (!cfg->rc_ifindex) { 495 if (!oif) {
414 switch (cfg->rc_via_table) { 496 switch (nh->nh_via_table) {
415 case NEIGH_ARP_TABLE: 497 case NEIGH_ARP_TABLE:
416 dev = inet_fib_lookup_dev(net, cfg->rc_via); 498 dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh));
417 break; 499 break;
418 case NEIGH_ND_TABLE: 500 case NEIGH_ND_TABLE:
419 dev = inet6_fib_lookup_dev(net, cfg->rc_via); 501 dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh));
420 break; 502 break;
421 case NEIGH_LINK_TABLE: 503 case NEIGH_LINK_TABLE:
422 break; 504 break;
423 } 505 }
424 } else { 506 } else {
425 dev = dev_get_by_index(net, cfg->rc_ifindex); 507 dev = dev_get_by_index(net, oif);
426 } 508 }
427 509
428 if (!dev) 510 if (!dev)
429 return ERR_PTR(-ENODEV); 511 return ERR_PTR(-ENODEV);
430 512
513 /* The caller is holding rtnl anyways, so release the dev reference */
514 dev_put(dev);
515
431 return dev; 516 return dev;
432} 517}
433 518
519static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
520 struct mpls_nh *nh, int oif)
521{
522 struct net_device *dev = NULL;
523 int err = -ENODEV;
524
525 dev = find_outdev(net, rt, nh, oif);
526 if (IS_ERR(dev)) {
527 err = PTR_ERR(dev);
528 dev = NULL;
529 goto errout;
530 }
531
532 /* Ensure this is a supported device */
533 err = -EINVAL;
534 if (!mpls_dev_get(dev))
535 goto errout;
536
537 RCU_INIT_POINTER(nh->nh_dev, dev);
538
539 return 0;
540
541errout:
542 return err;
543}
544
545static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
546 struct mpls_route *rt)
547{
548 struct net *net = cfg->rc_nlinfo.nl_net;
549 struct mpls_nh *nh = rt->rt_nh;
550 int err;
551 int i;
552
553 if (!nh)
554 return -ENOMEM;
555
556 err = -EINVAL;
557 /* Ensure only a supported number of labels are present */
558 if (cfg->rc_output_labels > MAX_NEW_LABELS)
559 goto errout;
560
561 nh->nh_labels = cfg->rc_output_labels;
562 for (i = 0; i < nh->nh_labels; i++)
563 nh->nh_label[i] = cfg->rc_output_label[i];
564
565 nh->nh_via_table = cfg->rc_via_table;
566 memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen);
567 nh->nh_via_alen = cfg->rc_via_alen;
568
569 err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex);
570 if (err)
571 goto errout;
572
573 return 0;
574
575errout:
576 return err;
577}
578
579static int mpls_nh_build(struct net *net, struct mpls_route *rt,
580 struct mpls_nh *nh, int oif,
581 struct nlattr *via, struct nlattr *newdst)
582{
583 int err = -ENOMEM;
584
585 if (!nh)
586 goto errout;
587
588 if (newdst) {
589 err = nla_get_labels(newdst, MAX_NEW_LABELS,
590 &nh->nh_labels, nh->nh_label);
591 if (err)
592 goto errout;
593 }
594
595 err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
596 __mpls_nh_via(rt, nh));
597 if (err)
598 goto errout;
599
600 err = mpls_nh_assign_dev(net, rt, nh, oif);
601 if (err)
602 goto errout;
603
604 return 0;
605
606errout:
607 return err;
608}
609
610static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
611 u8 cfg_via_alen, u8 *max_via_alen)
612{
613 int nhs = 0;
614 int remaining = len;
615
616 if (!rtnh) {
617 *max_via_alen = cfg_via_alen;
618 return 1;
619 }
620
621 *max_via_alen = 0;
622
623 while (rtnh_ok(rtnh, remaining)) {
624 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
625 int attrlen;
626
627 attrlen = rtnh_attrlen(rtnh);
628 nla = nla_find(attrs, attrlen, RTA_VIA);
629 if (nla && nla_len(nla) >=
630 offsetof(struct rtvia, rtvia_addr)) {
631 int via_alen = nla_len(nla) -
632 offsetof(struct rtvia, rtvia_addr);
633
634 if (via_alen <= MAX_VIA_ALEN)
635 *max_via_alen = max_t(u16, *max_via_alen,
636 via_alen);
637 }
638
639 nhs++;
640 rtnh = rtnh_next(rtnh, &remaining);
641 }
642
643 /* leftover implies invalid nexthop configuration, discard it */
644 return remaining > 0 ? 0 : nhs;
645}
646
647static int mpls_nh_build_multi(struct mpls_route_config *cfg,
648 struct mpls_route *rt)
649{
650 struct rtnexthop *rtnh = cfg->rc_mp;
651 struct nlattr *nla_via, *nla_newdst;
652 int remaining = cfg->rc_mp_len;
653 int nhs = 0;
654 int err = 0;
655
656 change_nexthops(rt) {
657 int attrlen;
658
659 nla_via = NULL;
660 nla_newdst = NULL;
661
662 err = -EINVAL;
663 if (!rtnh_ok(rtnh, remaining))
664 goto errout;
665
666 /* neither weighted multipath nor any flags
667 * are supported
668 */
669 if (rtnh->rtnh_hops || rtnh->rtnh_flags)
670 goto errout;
671
672 attrlen = rtnh_attrlen(rtnh);
673 if (attrlen > 0) {
674 struct nlattr *attrs = rtnh_attrs(rtnh);
675
676 nla_via = nla_find(attrs, attrlen, RTA_VIA);
677 nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
678 }
679
680 if (!nla_via)
681 goto errout;
682
683 err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
684 rtnh->rtnh_ifindex, nla_via,
685 nla_newdst);
686 if (err)
687 goto errout;
688
689 rtnh = rtnh_next(rtnh, &remaining);
690 nhs++;
691 } endfor_nexthops(rt);
692
693 rt->rt_nhn = nhs;
694
695 return 0;
696
697errout:
698 return err;
699}
700
434static int mpls_route_add(struct mpls_route_config *cfg) 701static int mpls_route_add(struct mpls_route_config *cfg)
435{ 702{
436 struct mpls_route __rcu **platform_label; 703 struct mpls_route __rcu **platform_label;
437 struct net *net = cfg->rc_nlinfo.nl_net; 704 struct net *net = cfg->rc_nlinfo.nl_net;
438 struct net_device *dev = NULL;
439 struct mpls_route *rt, *old; 705 struct mpls_route *rt, *old;
440 unsigned index;
441 int i;
442 int err = -EINVAL; 706 int err = -EINVAL;
707 u8 max_via_alen;
708 unsigned index;
709 int nhs;
443 710
444 index = cfg->rc_label; 711 index = cfg->rc_label;
445 712
@@ -457,27 +724,6 @@ static int mpls_route_add(struct mpls_route_config *cfg)
457 if (index >= net->mpls.platform_labels) 724 if (index >= net->mpls.platform_labels)
458 goto errout; 725 goto errout;
459 726
460 /* Ensure only a supported number of labels are present */
461 if (cfg->rc_output_labels > MAX_NEW_LABELS)
462 goto errout;
463
464 dev = find_outdev(net, cfg);
465 if (IS_ERR(dev)) {
466 err = PTR_ERR(dev);
467 dev = NULL;
468 goto errout;
469 }
470
471 /* Ensure this is a supported device */
472 err = -EINVAL;
473 if (!mpls_dev_get(dev))
474 goto errout;
475
476 err = -EINVAL;
477 if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
478 (dev->addr_len != cfg->rc_via_alen))
479 goto errout;
480
481 /* Append makes no sense with mpls */ 727 /* Append makes no sense with mpls */
482 err = -EOPNOTSUPP; 728 err = -EOPNOTSUPP;
483 if (cfg->rc_nlflags & NLM_F_APPEND) 729 if (cfg->rc_nlflags & NLM_F_APPEND)
@@ -497,28 +743,34 @@ static int mpls_route_add(struct mpls_route_config *cfg)
497 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) 743 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
498 goto errout; 744 goto errout;
499 745
746 err = -EINVAL;
747 nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
748 cfg->rc_via_alen, &max_via_alen);
749 if (nhs == 0)
750 goto errout;
751
500 err = -ENOMEM; 752 err = -ENOMEM;
501 rt = mpls_rt_alloc(cfg->rc_via_alen); 753 rt = mpls_rt_alloc(nhs, max_via_alen);
502 if (!rt) 754 if (!rt)
503 goto errout; 755 goto errout;
504 756
505 rt->rt_labels = cfg->rc_output_labels;
506 for (i = 0; i < rt->rt_labels; i++)
507 rt->rt_label[i] = cfg->rc_output_label[i];
508 rt->rt_protocol = cfg->rc_protocol; 757 rt->rt_protocol = cfg->rc_protocol;
509 RCU_INIT_POINTER(rt->rt_dev, dev);
510 rt->rt_payload_type = cfg->rc_payload_type; 758 rt->rt_payload_type = cfg->rc_payload_type;
511 rt->rt_via_table = cfg->rc_via_table;
512 memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
513 759
514 mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo); 760 if (cfg->rc_mp)
761 err = mpls_nh_build_multi(cfg, rt);
762 else
763 err = mpls_nh_build_from_cfg(cfg, rt);
764 if (err)
765 goto freert;
766
767 mpls_route_update(net, index, rt, &cfg->rc_nlinfo);
515 768
516 dev_put(dev);
517 return 0; 769 return 0;
518 770
771freert:
772 mpls_rt_free(rt);
519errout: 773errout:
520 if (dev)
521 dev_put(dev);
522 return err; 774 return err;
523} 775}
524 776
@@ -538,7 +790,7 @@ static int mpls_route_del(struct mpls_route_config *cfg)
538 if (index >= net->mpls.platform_labels) 790 if (index >= net->mpls.platform_labels)
539 goto errout; 791 goto errout;
540 792
541 mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo); 793 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
542 794
543 err = 0; 795 err = 0;
544errout: 796errout:
@@ -635,9 +887,11 @@ static void mpls_ifdown(struct net_device *dev)
635 struct mpls_route *rt = rtnl_dereference(platform_label[index]); 887 struct mpls_route *rt = rtnl_dereference(platform_label[index]);
636 if (!rt) 888 if (!rt)
637 continue; 889 continue;
638 if (rtnl_dereference(rt->rt_dev) != dev) 890 for_nexthops(rt) {
639 continue; 891 if (rtnl_dereference(nh->nh_dev) != dev)
640 rt->rt_dev = NULL; 892 continue;
893 nh->nh_dev = NULL;
894 } endfor_nexthops(rt);
641 } 895 }
642 896
643 mdev = mpls_dev_get(dev); 897 mdev = mpls_dev_get(dev);
@@ -736,7 +990,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
736EXPORT_SYMBOL_GPL(nla_put_labels); 990EXPORT_SYMBOL_GPL(nla_put_labels);
737 991
738int nla_get_labels(const struct nlattr *nla, 992int nla_get_labels(const struct nlattr *nla,
739 u32 max_labels, u32 *labels, u32 label[]) 993 u32 max_labels, u8 *labels, u32 label[])
740{ 994{
741 unsigned len = nla_len(nla); 995 unsigned len = nla_len(nla);
742 unsigned nla_labels; 996 unsigned nla_labels;
@@ -781,6 +1035,48 @@ int nla_get_labels(const struct nlattr *nla,
781} 1035}
782EXPORT_SYMBOL_GPL(nla_get_labels); 1036EXPORT_SYMBOL_GPL(nla_get_labels);
783 1037
1038int nla_get_via(const struct nlattr *nla, u8 *via_alen,
1039 u8 *via_table, u8 via_addr[])
1040{
1041 struct rtvia *via = nla_data(nla);
1042 int err = -EINVAL;
1043 int alen;
1044
1045 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
1046 goto errout;
1047 alen = nla_len(nla) -
1048 offsetof(struct rtvia, rtvia_addr);
1049 if (alen > MAX_VIA_ALEN)
1050 goto errout;
1051
1052 /* Validate the address family */
1053 switch (via->rtvia_family) {
1054 case AF_PACKET:
1055 *via_table = NEIGH_LINK_TABLE;
1056 break;
1057 case AF_INET:
1058 *via_table = NEIGH_ARP_TABLE;
1059 if (alen != 4)
1060 goto errout;
1061 break;
1062 case AF_INET6:
1063 *via_table = NEIGH_ND_TABLE;
1064 if (alen != 16)
1065 goto errout;
1066 break;
1067 default:
1068 /* Unsupported address family */
1069 goto errout;
1070 }
1071
1072 memcpy(via_addr, via->rtvia_addr, alen);
1073 *via_alen = alen;
1074 err = 0;
1075
1076errout:
1077 return err;
1078}
1079
784static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, 1080static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
785 struct mpls_route_config *cfg) 1081 struct mpls_route_config *cfg)
786{ 1082{
@@ -844,7 +1140,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
844 break; 1140 break;
845 case RTA_DST: 1141 case RTA_DST:
846 { 1142 {
847 u32 label_count; 1143 u8 label_count;
848 if (nla_get_labels(nla, 1, &label_count, 1144 if (nla_get_labels(nla, 1, &label_count,
849 &cfg->rc_label)) 1145 &cfg->rc_label))
850 goto errout; 1146 goto errout;
@@ -857,35 +1153,15 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
857 } 1153 }
858 case RTA_VIA: 1154 case RTA_VIA:
859 { 1155 {
860 struct rtvia *via = nla_data(nla); 1156 if (nla_get_via(nla, &cfg->rc_via_alen,
861 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) 1157 &cfg->rc_via_table, cfg->rc_via))
862 goto errout;
863 cfg->rc_via_alen = nla_len(nla) -
864 offsetof(struct rtvia, rtvia_addr);
865 if (cfg->rc_via_alen > MAX_VIA_ALEN)
866 goto errout; 1158 goto errout;
867 1159 break;
868 /* Validate the address family */ 1160 }
869 switch(via->rtvia_family) { 1161 case RTA_MULTIPATH:
870 case AF_PACKET: 1162 {
871 cfg->rc_via_table = NEIGH_LINK_TABLE; 1163 cfg->rc_mp = nla_data(nla);
872 break; 1164 cfg->rc_mp_len = nla_len(nla);
873 case AF_INET:
874 cfg->rc_via_table = NEIGH_ARP_TABLE;
875 if (cfg->rc_via_alen != 4)
876 goto errout;
877 break;
878 case AF_INET6:
879 cfg->rc_via_table = NEIGH_ND_TABLE;
880 if (cfg->rc_via_alen != 16)
881 goto errout;
882 break;
883 default:
884 /* Unsupported address family */
885 goto errout;
886 }
887
888 memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
889 break; 1165 break;
890 } 1166 }
891 default: 1167 default:
@@ -946,16 +1222,52 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
946 rtm->rtm_type = RTN_UNICAST; 1222 rtm->rtm_type = RTN_UNICAST;
947 rtm->rtm_flags = 0; 1223 rtm->rtm_flags = 0;
948 1224
949 if (rt->rt_labels &&
950 nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
951 goto nla_put_failure;
952 if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen))
953 goto nla_put_failure;
954 dev = rtnl_dereference(rt->rt_dev);
955 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
956 goto nla_put_failure;
957 if (nla_put_labels(skb, RTA_DST, 1, &label)) 1225 if (nla_put_labels(skb, RTA_DST, 1, &label))
958 goto nla_put_failure; 1226 goto nla_put_failure;
1227 if (rt->rt_nhn == 1) {
1228 const struct mpls_nh *nh = rt->rt_nh;
1229
1230 if (nh->nh_labels &&
1231 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
1232 nh->nh_label))
1233 goto nla_put_failure;
1234 if (nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
1235 nh->nh_via_alen))
1236 goto nla_put_failure;
1237 dev = rtnl_dereference(nh->nh_dev);
1238 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
1239 goto nla_put_failure;
1240 } else {
1241 struct rtnexthop *rtnh;
1242 struct nlattr *mp;
1243
1244 mp = nla_nest_start(skb, RTA_MULTIPATH);
1245 if (!mp)
1246 goto nla_put_failure;
1247
1248 for_nexthops(rt) {
1249 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1250 if (!rtnh)
1251 goto nla_put_failure;
1252
1253 dev = rtnl_dereference(nh->nh_dev);
1254 if (dev)
1255 rtnh->rtnh_ifindex = dev->ifindex;
1256 if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
1257 nh->nh_labels,
1258 nh->nh_label))
1259 goto nla_put_failure;
1260 if (nla_put_via(skb, nh->nh_via_table,
1261 mpls_nh_via(rt, nh),
1262 nh->nh_via_alen))
1263 goto nla_put_failure;
1264
1265 /* length of rtnetlink header + attributes */
1266 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
1267 } endfor_nexthops(rt);
1268
1269 nla_nest_end(skb, mp);
1270 }
959 1271
960 nlmsg_end(skb, nlh); 1272 nlmsg_end(skb, nlh);
961 return 0; 1273 return 0;
@@ -1000,12 +1312,30 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
1000{ 1312{
1001 size_t payload = 1313 size_t payload =
1002 NLMSG_ALIGN(sizeof(struct rtmsg)) 1314 NLMSG_ALIGN(sizeof(struct rtmsg))
1003 + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */
1004 + nla_total_size(4); /* RTA_DST */ 1315 + nla_total_size(4); /* RTA_DST */
1005 if (rt->rt_labels) /* RTA_NEWDST */ 1316
1006 payload += nla_total_size(rt->rt_labels * 4); 1317 if (rt->rt_nhn == 1) {
1007 if (rt->rt_dev) /* RTA_OIF */ 1318 struct mpls_nh *nh = rt->rt_nh;
1008 payload += nla_total_size(4); 1319
1320 if (nh->nh_dev)
1321 payload += nla_total_size(4); /* RTA_OIF */
1322 payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */
1323 if (nh->nh_labels) /* RTA_NEWDST */
1324 payload += nla_total_size(nh->nh_labels * 4);
1325 } else {
1326 /* each nexthop is packed in an attribute */
1327 size_t nhsize = 0;
1328
1329 for_nexthops(rt) {
1330 nhsize += nla_total_size(sizeof(struct rtnexthop));
1331 nhsize += nla_total_size(2 + nh->nh_via_alen);
1332 if (nh->nh_labels)
1333 nhsize += nla_total_size(nh->nh_labels * 4);
1334 } endfor_nexthops(rt);
1335 /* nested attribute */
1336 payload += nla_total_size(nhsize);
1337 }
1338
1009 return payload; 1339 return payload;
1010} 1340}
1011 1341
@@ -1057,25 +1387,29 @@ static int resize_platform_label_table(struct net *net, size_t limit)
1057 /* In case the predefined labels need to be populated */ 1387 /* In case the predefined labels need to be populated */
1058 if (limit > MPLS_LABEL_IPV4NULL) { 1388 if (limit > MPLS_LABEL_IPV4NULL) {
1059 struct net_device *lo = net->loopback_dev; 1389 struct net_device *lo = net->loopback_dev;
1060 rt0 = mpls_rt_alloc(lo->addr_len); 1390 rt0 = mpls_rt_alloc(1, lo->addr_len);
1061 if (!rt0) 1391 if (!rt0)
1062 goto nort0; 1392 goto nort0;
1063 RCU_INIT_POINTER(rt0->rt_dev, lo); 1393 RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
1064 rt0->rt_protocol = RTPROT_KERNEL; 1394 rt0->rt_protocol = RTPROT_KERNEL;
1065 rt0->rt_payload_type = MPT_IPV4; 1395 rt0->rt_payload_type = MPT_IPV4;
1066 rt0->rt_via_table = NEIGH_LINK_TABLE; 1396 rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
1067 memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); 1397 rt0->rt_nh->nh_via_alen = lo->addr_len;
1398 memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
1399 lo->addr_len);
1068 } 1400 }
1069 if (limit > MPLS_LABEL_IPV6NULL) { 1401 if (limit > MPLS_LABEL_IPV6NULL) {
1070 struct net_device *lo = net->loopback_dev; 1402 struct net_device *lo = net->loopback_dev;
1071 rt2 = mpls_rt_alloc(lo->addr_len); 1403 rt2 = mpls_rt_alloc(1, lo->addr_len);
1072 if (!rt2) 1404 if (!rt2)
1073 goto nort2; 1405 goto nort2;
1074 RCU_INIT_POINTER(rt2->rt_dev, lo); 1406 RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
1075 rt2->rt_protocol = RTPROT_KERNEL; 1407 rt2->rt_protocol = RTPROT_KERNEL;
1076 rt2->rt_payload_type = MPT_IPV6; 1408 rt2->rt_payload_type = MPT_IPV6;
1077 rt2->rt_via_table = NEIGH_LINK_TABLE; 1409 rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
1078 memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); 1410 rt2->rt_nh->nh_via_alen = lo->addr_len;
1411 memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
1412 lo->addr_len);
1079 } 1413 }
1080 1414
1081 rtnl_lock(); 1415 rtnl_lock();
@@ -1085,7 +1419,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
1085 1419
1086 /* Free any labels beyond the new table */ 1420 /* Free any labels beyond the new table */
1087 for (index = limit; index < old_limit; index++) 1421 for (index = limit; index < old_limit; index++)
1088 mpls_route_update(net, index, NULL, NULL, NULL); 1422 mpls_route_update(net, index, NULL, NULL);
1089 1423
1090 /* Copy over the old labels */ 1424 /* Copy over the old labels */
1091 cp_size = size; 1425 cp_size = size;
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 2681a4ba6c37..bde52ce88c94 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -21,6 +21,76 @@ struct mpls_dev {
21 21
22struct sk_buff; 22struct sk_buff;
23 23
24#define LABEL_NOT_SPECIFIED (1 << 20)
25#define MAX_NEW_LABELS 2
26
27/* This maximum ha length copied from the definition of struct neighbour */
28#define VIA_ALEN_ALIGN sizeof(unsigned long)
29#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, VIA_ALEN_ALIGN))
30
31enum mpls_payload_type {
32 MPT_UNSPEC, /* IPv4 or IPv6 */
33 MPT_IPV4 = 4,
34 MPT_IPV6 = 6,
35
36 /* Other types not implemented:
37 * - Pseudo-wire with or without control word (RFC4385)
38 * - GAL (RFC5586)
39 */
40};
41
42struct mpls_nh { /* next hop label forwarding entry */
43 struct net_device __rcu *nh_dev;
44 u32 nh_label[MAX_NEW_LABELS];
45 u8 nh_labels;
46 u8 nh_via_alen;
47 u8 nh_via_table;
48};
49
50/* The route, nexthops and vias are stored together in the same memory
51 * block:
52 *
53 * +----------------------+
54 * | mpls_route |
55 * +----------------------+
56 * | mpls_nh 0 |
57 * +----------------------+
58 * | ... |
59 * +----------------------+
60 * | mpls_nh n-1 |
61 * +----------------------+
62 * | alignment padding |
63 * +----------------------+
64 * | via[rt_max_alen] 0 |
65 * +----------------------+
66 * | ... |
67 * +----------------------+
68 * | via[rt_max_alen] n-1 |
69 * +----------------------+
70 */
71struct mpls_route { /* next hop label forwarding entry */
72 struct rcu_head rt_rcu;
73 u8 rt_protocol;
74 u8 rt_payload_type;
75 u8 rt_max_alen;
76 unsigned int rt_nhn;
77 struct mpls_nh rt_nh[0];
78};
79
80#define for_nexthops(rt) { \
81 int nhsel; struct mpls_nh *nh; \
82 for (nhsel = 0, nh = (rt)->rt_nh; \
83 nhsel < (rt)->rt_nhn; \
84 nh++, nhsel++)
85
86#define change_nexthops(rt) { \
87 int nhsel; struct mpls_nh *nh; \
88 for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \
89 nhsel < (rt)->rt_nhn; \
90 nh++, nhsel++)
91
92#define endfor_nexthops(rt) }
93
24static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) 94static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
25{ 95{
26 return (struct mpls_shim_hdr *)skb_network_header(skb); 96 return (struct mpls_shim_hdr *)skb_network_header(skb);
@@ -52,8 +122,10 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *
52 122
53int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, 123int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels,
54 const u32 label[]); 124 const u32 label[]);
55int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, 125int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
56 u32 label[]); 126 u32 label[]);
127int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
128 u8 via[]);
57bool mpls_output_possible(const struct net_device *dev); 129bool mpls_output_possible(const struct net_device *dev);
58unsigned int mpls_dev_mtu(const struct net_device *dev); 130unsigned int mpls_dev_mtu(const struct net_device *dev);
59bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); 131bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 21e70bc9af98..67591aef9cae 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
37 return en->labels * sizeof(struct mpls_shim_hdr); 37 return en->labels * sizeof(struct mpls_shim_hdr);
38} 38}
39 39
40int mpls_output(struct sock *sk, struct sk_buff *skb) 40int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
41{ 41{
42 struct mpls_iptunnel_encap *tun_encap_info; 42 struct mpls_iptunnel_encap *tun_encap_info;
43 struct mpls_shim_hdr *hdr; 43 struct mpls_shim_hdr *hdr;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 3e1b4abf1897..4692782b5280 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -354,7 +354,7 @@ config NF_CT_NETLINK_HELPER
354 select NETFILTER_NETLINK 354 select NETFILTER_NETLINK
355 depends on NF_CT_NETLINK 355 depends on NF_CT_NETLINK
356 depends on NETFILTER_NETLINK_QUEUE 356 depends on NETFILTER_NETLINK_QUEUE
357 depends on NETFILTER_NETLINK_QUEUE_CT 357 depends on NETFILTER_NETLINK_GLUE_CT
358 depends on NETFILTER_ADVANCED 358 depends on NETFILTER_ADVANCED
359 help 359 help
360 This option enables the user-space connection tracking helpers 360 This option enables the user-space connection tracking helpers
@@ -362,13 +362,14 @@ config NF_CT_NETLINK_HELPER
362 362
363 If unsure, say `N'. 363 If unsure, say `N'.
364 364
365config NETFILTER_NETLINK_QUEUE_CT 365config NETFILTER_NETLINK_GLUE_CT
366 bool "NFQUEUE integration with Connection Tracking" 366 bool "NFQUEUE and NFLOG integration with Connection Tracking"
367 default n 367 default n
368 depends on NETFILTER_NETLINK_QUEUE 368 depends on (NETFILTER_NETLINK_QUEUE || NETFILTER_NETLINK_LOG) && NF_CT_NETLINK
369 help 369 help
370 If this option is enabled, NFQUEUE can include Connection Tracking 370 If this option is enabled, NFQUEUE and NFLOG can include
371 information together with the packet is the enqueued via NFNETLINK. 371 Connection Tracking information together with the packet is
372 the enqueued via NFNETLINK.
372 373
373config NF_NAT 374config NF_NAT
374 tristate 375 tristate
@@ -868,7 +869,7 @@ config NETFILTER_XT_TARGET_TEE
868 depends on IPV6 || IPV6=n 869 depends on IPV6 || IPV6=n
869 depends on !NF_CONNTRACK || NF_CONNTRACK 870 depends on !NF_CONNTRACK || NF_CONNTRACK
870 select NF_DUP_IPV4 871 select NF_DUP_IPV4
871 select NF_DUP_IPV6 if IP6_NF_IPTABLES 872 select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
872 ---help--- 873 ---help---
873 This option adds a "TEE" target with which a packet can be cloned and 874 This option adds a "TEE" target with which a packet can be cloned and
874 this clone be rerouted to another nexthop. 875 this clone be rerouted to another nexthop.
@@ -881,7 +882,7 @@ config NETFILTER_XT_TARGET_TPROXY
881 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 882 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
882 depends on IP_NF_MANGLE 883 depends on IP_NF_MANGLE
883 select NF_DEFRAG_IPV4 884 select NF_DEFRAG_IPV4
884 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 885 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
885 help 886 help
886 This option adds a `TPROXY' target, which is somewhat similar to 887 This option adds a `TPROXY' target, which is somewhat similar to
887 REDIRECT. It can only be used in the mangle table and is useful 888 REDIRECT. It can only be used in the mangle table and is useful
@@ -1374,7 +1375,7 @@ config NETFILTER_XT_MATCH_SOCKET
1374 depends on IPV6 || IPV6=n 1375 depends on IPV6 || IPV6=n
1375 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 1376 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
1376 select NF_DEFRAG_IPV4 1377 select NF_DEFRAG_IPV4
1377 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 1378 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
1378 help 1379 help
1379 This option adds a `socket' match, which can be used to match 1380 This option adds a `socket' match, which can be used to match
1380 packets for which a TCP or UDP socket lookup finds a valid socket. 1381 packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 70d026d46fe7..7638c36b498c 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -10,8 +10,6 @@ obj-$(CONFIG_NETFILTER) = netfilter.o
10 10
11obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o 11obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
12obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o 12obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
13nfnetlink_queue-y := nfnetlink_queue_core.o
14nfnetlink_queue-$(CONFIG_NETFILTER_NETLINK_QUEUE_CT) += nfnetlink_queue_ct.o
15obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o 13obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
16obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o 14obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
17 15
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 8e47f8113495..f39276d1c2d7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -152,6 +152,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
152#endif 152#endif
153 synchronize_net(); 153 synchronize_net();
154 nf_queue_nf_hook_drop(net, &entry->ops); 154 nf_queue_nf_hook_drop(net, &entry->ops);
155 /* other cpu might still process nfqueue verdict that used reg */
156 synchronize_net();
155 kfree(entry); 157 kfree(entry);
156} 158}
157EXPORT_SYMBOL(nf_unregister_net_hook); 159EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -269,7 +271,7 @@ unsigned int nf_iterate(struct list_head *head,
269 /* Optimization: we don't need to hold module 271 /* Optimization: we don't need to hold module
270 reference here, since function can't sleep. --RR */ 272 reference here, since function can't sleep. --RR */
271repeat: 273repeat:
272 verdict = (*elemp)->hook(*elemp, skb, state); 274 verdict = (*elemp)->hook((*elemp)->priv, skb, state);
273 if (verdict != NF_ACCEPT) { 275 if (verdict != NF_ACCEPT) {
274#ifdef CONFIG_NETFILTER_DEBUG 276#ifdef CONFIG_NETFILTER_DEBUG
275 if (unlikely((verdict & NF_VERDICT_MASK) 277 if (unlikely((verdict & NF_VERDICT_MASK)
@@ -313,8 +315,6 @@ next_hook:
313 int err = nf_queue(skb, elem, state, 315 int err = nf_queue(skb, elem, state,
314 verdict >> NF_VERDICT_QBITS); 316 verdict >> NF_VERDICT_QBITS);
315 if (err < 0) { 317 if (err < 0) {
316 if (err == -ECANCELED)
317 goto next_hook;
318 if (err == -ESRCH && 318 if (err == -ESRCH &&
319 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) 319 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
320 goto next_hook; 320 goto next_hook;
@@ -348,6 +348,12 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
348} 348}
349EXPORT_SYMBOL(skb_make_writable); 349EXPORT_SYMBOL(skb_make_writable);
350 350
351/* This needs to be compiled in any case to avoid dependencies between the
352 * nfnetlink_queue code and nf_conntrack.
353 */
354struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
355EXPORT_SYMBOL_GPL(nfnl_ct_hook);
356
351#if IS_ENABLED(CONFIG_NF_CONNTRACK) 357#if IS_ENABLED(CONFIG_NF_CONNTRACK)
352/* This does not belong here, but locally generated errors need it if connection 358/* This does not belong here, but locally generated errors need it if connection
353 tracking in use: without this, connection may not be in hash table, and hence 359 tracking in use: without this, connection may not be in hash table, and hence
@@ -385,9 +391,6 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
385} 391}
386EXPORT_SYMBOL(nf_conntrack_destroy); 392EXPORT_SYMBOL(nf_conntrack_destroy);
387 393
388struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
389EXPORT_SYMBOL_GPL(nfq_ct_hook);
390
391/* Built-in default zone used e.g. by modules. */ 394/* Built-in default zone used e.g. by modules. */
392const struct nf_conntrack_zone nf_ct_zone_dflt = { 395const struct nf_conntrack_zone nf_ct_zone_dflt = {
393 .id = NF_CT_DEFAULT_ZONE_ID, 396 .id = NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
33#define mtype_gc IPSET_TOKEN(MTYPE, _gc) 33#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
34#define mtype MTYPE 34#define mtype MTYPE
35 35
36#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) 36#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
37 37
38static void 38static void
39mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 39mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
67 del_timer_sync(&map->gc); 67 del_timer_sync(&map->gc);
68 68
69 ip_set_free(map->members); 69 ip_set_free(map->members);
70 if (set->dsize) { 70 if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
71 if (set->extensions & IPSET_EXT_DESTROY) 71 mtype_ext_cleanup(set);
72 mtype_ext_cleanup(set); 72 ip_set_free(map);
73 ip_set_free(map->extensions);
74 }
75 kfree(map);
76 73
77 set->data = NULL; 74 set->data = NULL;
78} 75}
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
92{ 89{
93 const struct mtype *map = set->data; 90 const struct mtype *map = set->data;
94 struct nlattr *nested; 91 struct nlattr *nested;
92 size_t memsize = sizeof(*map) + map->memsize;
95 93
96 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 94 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
97 if (!nested) 95 if (!nested)
98 goto nla_put_failure; 96 goto nla_put_failure;
99 if (mtype_do_head(skb, map) || 97 if (mtype_do_head(skb, map) ||
100 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 98 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
101 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 99 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
102 htonl(sizeof(*map) +
103 map->memsize +
104 set->dsize * map->elements)))
105 goto nla_put_failure; 100 goto nla_put_failure;
106 if (unlikely(ip_set_put_flags(skb, set))) 101 if (unlikely(ip_set_put_flags(skb, set)))
107 goto nla_put_failure; 102 goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
41/* Type structure */ 41/* Type structure */
42struct bitmap_ip { 42struct bitmap_ip {
43 void *members; /* the set members */ 43 void *members; /* the set members */
44 void *extensions; /* data extensions */
45 u32 first_ip; /* host byte order, included in range */ 44 u32 first_ip; /* host byte order, included in range */
46 u32 last_ip; /* host byte order, included in range */ 45 u32 last_ip; /* host byte order, included in range */
47 u32 elements; /* number of max elements in the set */ 46 u32 elements; /* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
49 size_t memsize; /* members size */ 48 size_t memsize; /* members size */
50 u8 netmask; /* subnet netmask */ 49 u8 netmask; /* subnet netmask */
51 struct timer_list gc; /* garbage collection */ 50 struct timer_list gc; /* garbage collection */
51 unsigned char extensions[0] /* data extensions */
52 __aligned(__alignof__(u64));
52}; 53};
53 54
54/* ADT structure for generic function args */ 55/* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
224 map->members = ip_set_alloc(map->memsize); 225 map->members = ip_set_alloc(map->memsize);
225 if (!map->members) 226 if (!map->members)
226 return false; 227 return false;
227 if (set->dsize) {
228 map->extensions = ip_set_alloc(set->dsize * elements);
229 if (!map->extensions) {
230 kfree(map->members);
231 return false;
232 }
233 }
234 map->first_ip = first_ip; 228 map->first_ip = first_ip;
235 map->last_ip = last_ip; 229 map->last_ip = last_ip;
236 map->elements = elements; 230 map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
316 pr_debug("hosts %u, elements %llu\n", 310 pr_debug("hosts %u, elements %llu\n",
317 hosts, (unsigned long long)elements); 311 hosts, (unsigned long long)elements);
318 312
319 map = kzalloc(sizeof(*map), GFP_KERNEL); 313 set->dsize = ip_set_elem_len(set, tb, 0, 0);
314 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
320 if (!map) 315 if (!map)
321 return -ENOMEM; 316 return -ENOMEM;
322 317
323 map->memsize = bitmap_bytes(0, elements - 1); 318 map->memsize = bitmap_bytes(0, elements - 1);
324 set->variant = &bitmap_ip; 319 set->variant = &bitmap_ip;
325 set->dsize = ip_set_elem_len(set, tb, 0);
326 if (!init_map_ip(set, map, first_ip, last_ip, 320 if (!init_map_ip(set, map, first_ip, last_ip,
327 elements, hosts, netmask)) { 321 elements, hosts, netmask)) {
328 kfree(map); 322 kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
47/* Type structure */ 47/* Type structure */
48struct bitmap_ipmac { 48struct bitmap_ipmac {
49 void *members; /* the set members */ 49 void *members; /* the set members */
50 void *extensions; /* MAC + data extensions */
51 u32 first_ip; /* host byte order, included in range */ 50 u32 first_ip; /* host byte order, included in range */
52 u32 last_ip; /* host byte order, included in range */ 51 u32 last_ip; /* host byte order, included in range */
53 u32 elements; /* number of max elements in the set */ 52 u32 elements; /* number of max elements in the set */
54 size_t memsize; /* members size */ 53 size_t memsize; /* members size */
55 struct timer_list gc; /* garbage collector */ 54 struct timer_list gc; /* garbage collector */
55 unsigned char extensions[0] /* MAC + data extensions */
56 __aligned(__alignof__(u64));
56}; 57};
57 58
58/* ADT structure for generic function args */ 59/* ADT structure for generic function args */
59struct bitmap_ipmac_adt_elem { 60struct bitmap_ipmac_adt_elem {
61 unsigned char ether[ETH_ALEN] __aligned(2);
60 u16 id; 62 u16 id;
61 unsigned char *ether; 63 u16 add_mac;
62}; 64};
63 65
64struct bitmap_ipmac_elem { 66struct bitmap_ipmac_elem {
65 unsigned char ether[ETH_ALEN]; 67 unsigned char ether[ETH_ALEN];
66 unsigned char filled; 68 unsigned char filled;
67} __attribute__ ((aligned)); 69} __aligned(__alignof__(u64));
68 70
69static inline u32 71static inline u32
70ip_to_id(const struct bitmap_ipmac *m, u32 ip) 72ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
72 return ip - m->first_ip; 74 return ip - m->first_ip;
73} 75}
74 76
75static inline struct bitmap_ipmac_elem * 77#define get_elem(extensions, id, dsize) \
76get_elem(void *extensions, u16 id, size_t dsize) 78 (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
77{ 79
78 return (struct bitmap_ipmac_elem *)(extensions + id * dsize); 80#define get_const_elem(extensions, id, dsize) \
79} 81 (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
80 82
81/* Common functions */ 83/* Common functions */
82 84
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
88 90
89 if (!test_bit(e->id, map->members)) 91 if (!test_bit(e->id, map->members))
90 return 0; 92 return 0;
91 elem = get_elem(map->extensions, e->id, dsize); 93 elem = get_const_elem(map->extensions, e->id, dsize);
92 if (elem->filled == MAC_FILLED) 94 if (e->add_mac && elem->filled == MAC_FILLED)
93 return !e->ether || 95 return ether_addr_equal(e->ether, elem->ether);
94 ether_addr_equal(e->ether, elem->ether);
95 /* Trigger kernel to fill out the ethernet address */ 96 /* Trigger kernel to fill out the ethernet address */
96 return -EAGAIN; 97 return -EAGAIN;
97} 98}
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
103 104
104 if (!test_bit(id, map->members)) 105 if (!test_bit(id, map->members))
105 return 0; 106 return 0;
106 elem = get_elem(map->extensions, id, dsize); 107 elem = get_const_elem(map->extensions, id, dsize);
107 /* Timer not started for the incomplete elements */ 108 /* Timer not started for the incomplete elements */
108 return elem->filled == MAC_FILLED; 109 return elem->filled == MAC_FILLED;
109} 110}
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
133 * and we can reuse it later when MAC is filled out, 134 * and we can reuse it later when MAC is filled out,
134 * possibly by the kernel 135 * possibly by the kernel
135 */ 136 */
136 if (e->ether) 137 if (e->add_mac)
137 ip_set_timeout_set(timeout, t); 138 ip_set_timeout_set(timeout, t);
138 else 139 else
139 *timeout = t; 140 *timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
150 elem = get_elem(map->extensions, e->id, dsize); 151 elem = get_elem(map->extensions, e->id, dsize);
151 if (test_bit(e->id, map->members)) { 152 if (test_bit(e->id, map->members)) {
152 if (elem->filled == MAC_FILLED) { 153 if (elem->filled == MAC_FILLED) {
153 if (e->ether && 154 if (e->add_mac &&
154 (flags & IPSET_FLAG_EXIST) && 155 (flags & IPSET_FLAG_EXIST) &&
155 !ether_addr_equal(e->ether, elem->ether)) { 156 !ether_addr_equal(e->ether, elem->ether)) {
156 /* memcpy isn't atomic */ 157 /* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
159 ether_addr_copy(elem->ether, e->ether); 160 ether_addr_copy(elem->ether, e->ether);
160 } 161 }
161 return IPSET_ADD_FAILED; 162 return IPSET_ADD_FAILED;
162 } else if (!e->ether) 163 } else if (!e->add_mac)
163 /* Already added without ethernet address */ 164 /* Already added without ethernet address */
164 return IPSET_ADD_FAILED; 165 return IPSET_ADD_FAILED;
165 /* Fill the MAC address and trigger the timer activation */ 166 /* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
168 ether_addr_copy(elem->ether, e->ether); 169 ether_addr_copy(elem->ether, e->ether);
169 elem->filled = MAC_FILLED; 170 elem->filled = MAC_FILLED;
170 return IPSET_ADD_START_STORED_TIMEOUT; 171 return IPSET_ADD_START_STORED_TIMEOUT;
171 } else if (e->ether) { 172 } else if (e->add_mac) {
172 /* We can store MAC too */ 173 /* We can store MAC too */
173 ether_addr_copy(elem->ether, e->ether); 174 ether_addr_copy(elem->ether, e->ether);
174 elem->filled = MAC_FILLED; 175 elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
191 u32 id, size_t dsize) 192 u32 id, size_t dsize)
192{ 193{
193 const struct bitmap_ipmac_elem *elem = 194 const struct bitmap_ipmac_elem *elem =
194 get_elem(map->extensions, id, dsize); 195 get_const_elem(map->extensions, id, dsize);
195 196
196 return nla_put_ipaddr4(skb, IPSET_ATTR_IP, 197 return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
197 htonl(map->first_ip + id)) || 198 htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
213{ 214{
214 struct bitmap_ipmac *map = set->data; 215 struct bitmap_ipmac *map = set->data;
215 ipset_adtfn adtfn = set->variant->adt[adt]; 216 ipset_adtfn adtfn = set->variant->adt[adt];
216 struct bitmap_ipmac_adt_elem e = { .id = 0 }; 217 struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
217 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 218 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
218 u32 ip; 219 u32 ip;
219 220
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
231 return -EINVAL; 232 return -EINVAL;
232 233
233 e.id = ip_to_id(map, ip); 234 e.id = ip_to_id(map, ip);
234 e.ether = eth_hdr(skb)->h_source; 235 memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
235 236
236 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); 237 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
237} 238}
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
265 return -IPSET_ERR_BITMAP_RANGE; 266 return -IPSET_ERR_BITMAP_RANGE;
266 267
267 e.id = ip_to_id(map, ip); 268 e.id = ip_to_id(map, ip);
268 if (tb[IPSET_ATTR_ETHER]) 269 if (tb[IPSET_ATTR_ETHER]) {
269 e.ether = nla_data(tb[IPSET_ATTR_ETHER]); 270 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
270 else 271 e.add_mac = 1;
271 e.ether = NULL; 272 }
272
273 ret = adtfn(set, &e, &ext, &ext, flags); 273 ret = adtfn(set, &e, &ext, &ext, flags);
274 274
275 return ip_set_eexist(ret, flags) ? 0 : ret; 275 return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
300 map->members = ip_set_alloc(map->memsize); 300 map->members = ip_set_alloc(map->memsize);
301 if (!map->members) 301 if (!map->members)
302 return false; 302 return false;
303 if (set->dsize) {
304 map->extensions = ip_set_alloc(set->dsize * elements);
305 if (!map->extensions) {
306 kfree(map->members);
307 return false;
308 }
309 }
310 map->first_ip = first_ip; 303 map->first_ip = first_ip;
311 map->last_ip = last_ip; 304 map->last_ip = last_ip;
312 map->elements = elements; 305 map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
361 if (elements > IPSET_BITMAP_MAX_RANGE + 1) 354 if (elements > IPSET_BITMAP_MAX_RANGE + 1)
362 return -IPSET_ERR_BITMAP_RANGE_SIZE; 355 return -IPSET_ERR_BITMAP_RANGE_SIZE;
363 356
364 map = kzalloc(sizeof(*map), GFP_KERNEL); 357 set->dsize = ip_set_elem_len(set, tb,
358 sizeof(struct bitmap_ipmac_elem),
359 __alignof__(struct bitmap_ipmac_elem));
360 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
365 if (!map) 361 if (!map)
366 return -ENOMEM; 362 return -ENOMEM;
367 363
368 map->memsize = bitmap_bytes(0, elements - 1); 364 map->memsize = bitmap_bytes(0, elements - 1);
369 set->variant = &bitmap_ipmac; 365 set->variant = &bitmap_ipmac;
370 set->dsize = ip_set_elem_len(set, tb,
371 sizeof(struct bitmap_ipmac_elem));
372 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { 366 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
373 kfree(map); 367 kfree(map);
374 return -ENOMEM; 368 return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
35/* Type structure */ 35/* Type structure */
36struct bitmap_port { 36struct bitmap_port {
37 void *members; /* the set members */ 37 void *members; /* the set members */
38 void *extensions; /* data extensions */
39 u16 first_port; /* host byte order, included in range */ 38 u16 first_port; /* host byte order, included in range */
40 u16 last_port; /* host byte order, included in range */ 39 u16 last_port; /* host byte order, included in range */
41 u32 elements; /* number of max elements in the set */ 40 u32 elements; /* number of max elements in the set */
42 size_t memsize; /* members size */ 41 size_t memsize; /* members size */
43 struct timer_list gc; /* garbage collection */ 42 struct timer_list gc; /* garbage collection */
43 unsigned char extensions[0] /* data extensions */
44 __aligned(__alignof__(u64));
44}; 45};
45 46
46/* ADT structure for generic function args */ 47/* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
209 map->members = ip_set_alloc(map->memsize); 210 map->members = ip_set_alloc(map->memsize);
210 if (!map->members) 211 if (!map->members)
211 return false; 212 return false;
212 if (set->dsize) {
213 map->extensions = ip_set_alloc(set->dsize * map->elements);
214 if (!map->extensions) {
215 kfree(map->members);
216 return false;
217 }
218 }
219 map->first_port = first_port; 213 map->first_port = first_port;
220 map->last_port = last_port; 214 map->last_port = last_port;
221 set->timeout = IPSET_NO_TIMEOUT; 215 set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
232{ 226{
233 struct bitmap_port *map; 227 struct bitmap_port *map;
234 u16 first_port, last_port; 228 u16 first_port, last_port;
229 u32 elements;
235 230
236 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 231 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
237 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || 232 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
248 last_port = tmp; 243 last_port = tmp;
249 } 244 }
250 245
251 map = kzalloc(sizeof(*map), GFP_KERNEL); 246 elements = last_port - first_port + 1;
247 set->dsize = ip_set_elem_len(set, tb, 0, 0);
248 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
252 if (!map) 249 if (!map)
253 return -ENOMEM; 250 return -ENOMEM;
254 251
255 map->elements = last_port - first_port + 1; 252 map->elements = elements;
256 map->memsize = bitmap_bytes(0, map->elements); 253 map->memsize = bitmap_bytes(0, map->elements);
257 set->variant = &bitmap_port; 254 set->variant = &bitmap_port;
258 set->dsize = ip_set_elem_len(set, tb, 0);
259 if (!init_map_port(set, map, first_port, last_port)) { 255 if (!init_map_port(set, map, first_port, last_port)) {
260 kfree(map); 256 kfree(map);
261 return -ENOMEM; 257 return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 338b4047776f..54f3d7cb23e6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
364} 364}
365 365
366size_t 366size_t
367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) 367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
368 size_t align)
368{ 369{
369 enum ip_set_ext_id id; 370 enum ip_set_ext_id id;
370 size_t offset = len;
371 u32 cadt_flags = 0; 371 u32 cadt_flags = 0;
372 372
373 if (tb[IPSET_ATTR_CADT_FLAGS]) 373 if (tb[IPSET_ATTR_CADT_FLAGS])
374 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 374 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
375 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) 375 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
376 set->flags |= IPSET_CREATE_FLAG_FORCEADD; 376 set->flags |= IPSET_CREATE_FLAG_FORCEADD;
377 if (!align)
378 align = 1;
377 for (id = 0; id < IPSET_EXT_ID_MAX; id++) { 379 for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
378 if (!add_extension(id, cadt_flags, tb)) 380 if (!add_extension(id, cadt_flags, tb))
379 continue; 381 continue;
380 offset = ALIGN(offset, ip_set_extensions[id].align); 382 len = ALIGN(len, ip_set_extensions[id].align);
381 set->offset[id] = offset; 383 set->offset[id] = len;
382 set->extensions |= ip_set_extensions[id].type; 384 set->extensions |= ip_set_extensions[id].type;
383 offset += ip_set_extensions[id].len; 385 len += ip_set_extensions[id].len;
384 } 386 }
385 return offset; 387 return ALIGN(len, align);
386} 388}
387EXPORT_SYMBOL_GPL(ip_set_elem_len); 389EXPORT_SYMBOL_GPL(ip_set_elem_len);
388 390
@@ -519,8 +521,7 @@ int
519ip_set_test(ip_set_id_t index, const struct sk_buff *skb, 521ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
520 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 522 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
521{ 523{
522 struct ip_set *set = ip_set_rcu_get( 524 struct ip_set *set = ip_set_rcu_get(par->net, index);
523 dev_net(par->in ? par->in : par->out), index);
524 int ret = 0; 525 int ret = 0;
525 526
526 BUG_ON(!set); 527 BUG_ON(!set);
@@ -558,8 +559,7 @@ int
558ip_set_add(ip_set_id_t index, const struct sk_buff *skb, 559ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
559 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 560 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
560{ 561{
561 struct ip_set *set = ip_set_rcu_get( 562 struct ip_set *set = ip_set_rcu_get(par->net, index);
562 dev_net(par->in ? par->in : par->out), index);
563 int ret; 563 int ret;
564 564
565 BUG_ON(!set); 565 BUG_ON(!set);
@@ -581,8 +581,7 @@ int
581ip_set_del(ip_set_id_t index, const struct sk_buff *skb, 581ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
582 const struct xt_action_param *par, struct ip_set_adt_opt *opt) 582 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
583{ 583{
584 struct ip_set *set = ip_set_rcu_get( 584 struct ip_set *set = ip_set_rcu_get(par->net, index);
585 dev_net(par->in ? par->in : par->out), index);
586 int ret = 0; 585 int ret = 0;
587 586
588 BUG_ON(!set); 587 BUG_ON(!set);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..e5336ab36d67 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
72 DECLARE_BITMAP(used, AHASH_MAX_TUNED); 72 DECLARE_BITMAP(used, AHASH_MAX_TUNED);
73 u8 size; /* size of the array */ 73 u8 size; /* size of the array */
74 u8 pos; /* position of the first free entry */ 74 u8 pos; /* position of the first free entry */
75 unsigned char value[0]; /* the array of the values */ 75 unsigned char value[0] /* the array of the values */
76} __attribute__ ((aligned)); 76 __aligned(__alignof__(u64));
77};
77 78
78/* The hash table: the table size stored here in order to make resizing easy */ 79/* The hash table: the table size stored here in order to make resizing easy */
79struct htable { 80struct htable {
@@ -475,7 +476,7 @@ static void
475mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) 476mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
476{ 477{
477 struct htable *t; 478 struct htable *t;
478 struct hbucket *n; 479 struct hbucket *n, *tmp;
479 struct mtype_elem *data; 480 struct mtype_elem *data;
480 u32 i, j, d; 481 u32 i, j, d;
481#ifdef IP_SET_HASH_WITH_NETS 482#ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
510 } 511 }
511 } 512 }
512 if (d >= AHASH_INIT_SIZE) { 513 if (d >= AHASH_INIT_SIZE) {
513 struct hbucket *tmp = kzalloc(sizeof(*tmp) + 514 if (d >= n->size) {
514 (n->size - AHASH_INIT_SIZE) * dsize, 515 rcu_assign_pointer(hbucket(t, i), NULL);
515 GFP_ATOMIC); 516 kfree_rcu(n, rcu);
517 continue;
518 }
519 tmp = kzalloc(sizeof(*tmp) +
520 (n->size - AHASH_INIT_SIZE) * dsize,
521 GFP_ATOMIC);
516 if (!tmp) 522 if (!tmp)
517 /* Still try to delete expired elements */ 523 /* Still try to delete expired elements */
518 continue; 524 continue;
@@ -522,7 +528,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
522 continue; 528 continue;
523 data = ahash_data(n, j, dsize); 529 data = ahash_data(n, j, dsize);
524 memcpy(tmp->value + d * dsize, data, dsize); 530 memcpy(tmp->value + d * dsize, data, dsize);
525 set_bit(j, tmp->used); 531 set_bit(d, tmp->used);
526 d++; 532 d++;
527 } 533 }
528 tmp->pos = d; 534 tmp->pos = d;
@@ -1323,12 +1329,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1323#endif 1329#endif
1324 set->variant = &IPSET_TOKEN(HTYPE, 4_variant); 1330 set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1325 set->dsize = ip_set_elem_len(set, tb, 1331 set->dsize = ip_set_elem_len(set, tb,
1326 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); 1332 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1333 __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1327#ifndef IP_SET_PROTO_UNDEF 1334#ifndef IP_SET_PROTO_UNDEF
1328 } else { 1335 } else {
1329 set->variant = &IPSET_TOKEN(HTYPE, 6_variant); 1336 set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1330 set->dsize = ip_set_elem_len(set, tb, 1337 set->dsize = ip_set_elem_len(set, tb,
1331 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); 1338 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1339 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1332 } 1340 }
1333#endif 1341#endif
1334 if (tb[IPSET_ATTR_TIMEOUT]) { 1342 if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a1fe5377a2b3..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
31 struct rcu_head rcu; 31 struct rcu_head rcu;
32 struct list_head list; 32 struct list_head list;
33 ip_set_id_t id; 33 ip_set_id_t id;
34}; 34} __aligned(__alignof__(u64));
35 35
36struct set_adt_elem { 36struct set_adt_elem {
37 ip_set_id_t id; 37 ip_set_id_t id;
@@ -297,7 +297,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
297 ip_set_timeout_expired(ext_timeout(n, set)))) 297 ip_set_timeout_expired(ext_timeout(n, set))))
298 n = NULL; 298 n = NULL;
299 299
300 e = kzalloc(set->dsize, GFP_KERNEL); 300 e = kzalloc(set->dsize, GFP_ATOMIC);
301 if (!e) 301 if (!e)
302 return -ENOMEM; 302 return -ENOMEM;
303 e->id = d->id; 303 e->id = d->id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
618 size = IP_SET_LIST_MIN_SIZE; 618 size = IP_SET_LIST_MIN_SIZE;
619 619
620 set->variant = &set_variant; 620 set->variant = &set_variant;
621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); 621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
622 __alignof__(struct set_elem));
622 if (!init_list_set(net, set, size)) 623 if (!init_list_set(net, set, size))
623 return -ENOMEM; 624 return -ENOMEM;
624 if (tb[IPSET_ATTR_TIMEOUT]) { 625 if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index dfd7b65b3d2a..0328f7250693 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -75,7 +75,7 @@ static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
75 * Allocate/initialize app incarnation and register it in proto apps. 75 * Allocate/initialize app incarnation and register it in proto apps.
76 */ 76 */
77static int 77static int
78ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, 78ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
79 __u16 port) 79 __u16 port)
80{ 80{
81 struct ip_vs_protocol *pp; 81 struct ip_vs_protocol *pp;
@@ -107,7 +107,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
107 } 107 }
108 } 108 }
109 109
110 ret = pp->register_app(net, inc); 110 ret = pp->register_app(ipvs, inc);
111 if (ret) 111 if (ret)
112 goto out; 112 goto out;
113 113
@@ -127,7 +127,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
127 * Release app incarnation 127 * Release app incarnation
128 */ 128 */
129static void 129static void
130ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) 130ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
131{ 131{
132 struct ip_vs_protocol *pp; 132 struct ip_vs_protocol *pp;
133 133
@@ -135,7 +135,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
135 return; 135 return;
136 136
137 if (pp->unregister_app) 137 if (pp->unregister_app)
138 pp->unregister_app(net, inc); 138 pp->unregister_app(ipvs, inc);
139 139
140 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 140 IP_VS_DBG(9, "%s App %s:%u unregistered\n",
141 pp->name, inc->name, ntohs(inc->port)); 141 pp->name, inc->name, ntohs(inc->port));
@@ -175,14 +175,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
175 * Register an application incarnation in protocol applications 175 * Register an application incarnation in protocol applications
176 */ 176 */
177int 177int
178register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, 178register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
179 __u16 port) 179 __u16 port)
180{ 180{
181 int result; 181 int result;
182 182
183 mutex_lock(&__ip_vs_app_mutex); 183 mutex_lock(&__ip_vs_app_mutex);
184 184
185 result = ip_vs_app_inc_new(net, app, proto, port); 185 result = ip_vs_app_inc_new(ipvs, app, proto, port);
186 186
187 mutex_unlock(&__ip_vs_app_mutex); 187 mutex_unlock(&__ip_vs_app_mutex);
188 188
@@ -191,15 +191,11 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
191 191
192 192
193/* Register application for netns */ 193/* Register application for netns */
194struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) 194struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
195{ 195{
196 struct netns_ipvs *ipvs = net_ipvs(net);
197 struct ip_vs_app *a; 196 struct ip_vs_app *a;
198 int err = 0; 197 int err = 0;
199 198
200 if (!ipvs)
201 return ERR_PTR(-ENOENT);
202
203 mutex_lock(&__ip_vs_app_mutex); 199 mutex_lock(&__ip_vs_app_mutex);
204 200
205 list_for_each_entry(a, &ipvs->app_list, a_list) { 201 list_for_each_entry(a, &ipvs->app_list, a_list) {
@@ -230,21 +226,17 @@ out_unlock:
230 * We are sure there are no app incarnations attached to services 226 * We are sure there are no app incarnations attached to services
231 * Caller should use synchronize_rcu() or rcu_barrier() 227 * Caller should use synchronize_rcu() or rcu_barrier()
232 */ 228 */
233void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) 229void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
234{ 230{
235 struct netns_ipvs *ipvs = net_ipvs(net);
236 struct ip_vs_app *a, *anxt, *inc, *nxt; 231 struct ip_vs_app *a, *anxt, *inc, *nxt;
237 232
238 if (!ipvs)
239 return;
240
241 mutex_lock(&__ip_vs_app_mutex); 233 mutex_lock(&__ip_vs_app_mutex);
242 234
243 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { 235 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
244 if (app && strcmp(app->name, a->name)) 236 if (app && strcmp(app->name, a->name))
245 continue; 237 continue;
246 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { 238 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
247 ip_vs_app_inc_release(net, inc); 239 ip_vs_app_inc_release(ipvs, inc);
248 } 240 }
249 241
250 list_del(&a->a_list); 242 list_del(&a->a_list);
@@ -611,17 +603,19 @@ static const struct file_operations ip_vs_app_fops = {
611}; 603};
612#endif 604#endif
613 605
614int __net_init ip_vs_app_net_init(struct net *net) 606int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
615{ 607{
616 struct netns_ipvs *ipvs = net_ipvs(net); 608 struct net *net = ipvs->net;
617 609
618 INIT_LIST_HEAD(&ipvs->app_list); 610 INIT_LIST_HEAD(&ipvs->app_list);
619 proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); 611 proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
620 return 0; 612 return 0;
621} 613}
622 614
623void __net_exit ip_vs_app_net_cleanup(struct net *net) 615void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
624{ 616{
625 unregister_ip_vs_app(net, NULL /* all */); 617 struct net *net = ipvs->net;
618
619 unregister_ip_vs_app(ipvs, NULL /* all */);
626 remove_proc_entry("ip_vs_app", net->proc_net); 620 remove_proc_entry("ip_vs_app", net->proc_net);
627} 621}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b0f7b626b56d..85ca189bdc3d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -108,7 +108,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
108/* 108/*
109 * Returns hash value for IPVS connection entry 109 * Returns hash value for IPVS connection entry
110 */ 110 */
111static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto, 111static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
112 const union nf_inet_addr *addr, 112 const union nf_inet_addr *addr,
113 __be16 port) 113 __be16 port)
114{ 114{
@@ -116,11 +116,11 @@ static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int pro
116 if (af == AF_INET6) 116 if (af == AF_INET6)
117 return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), 117 return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
118 (__force u32)port, proto, ip_vs_conn_rnd) ^ 118 (__force u32)port, proto, ip_vs_conn_rnd) ^
119 ((size_t)net>>8)) & ip_vs_conn_tab_mask; 119 ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
120#endif 120#endif
121 return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, 121 return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
122 ip_vs_conn_rnd) ^ 122 ip_vs_conn_rnd) ^
123 ((size_t)net>>8)) & ip_vs_conn_tab_mask; 123 ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
124} 124}
125 125
126static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, 126static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -141,14 +141,14 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
141 port = p->vport; 141 port = p->vport;
142 } 142 }
143 143
144 return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); 144 return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port);
145} 145}
146 146
147static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) 147static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
148{ 148{
149 struct ip_vs_conn_param p; 149 struct ip_vs_conn_param p;
150 150
151 ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, 151 ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol,
152 &cp->caddr, cp->cport, NULL, 0, &p); 152 &cp->caddr, cp->cport, NULL, 0, &p);
153 153
154 if (cp->pe) { 154 if (cp->pe) {
@@ -279,7 +279,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
279 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && 279 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
280 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 280 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
281 p->protocol == cp->protocol && 281 p->protocol == cp->protocol &&
282 ip_vs_conn_net_eq(cp, p->net)) { 282 cp->ipvs == p->ipvs) {
283 if (!__ip_vs_conn_get(cp)) 283 if (!__ip_vs_conn_get(cp))
284 continue; 284 continue;
285 /* HIT */ 285 /* HIT */
@@ -314,33 +314,34 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
314} 314}
315 315
316static int 316static int
317ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, 317ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
318 int af, const struct sk_buff *skb,
318 const struct ip_vs_iphdr *iph, 319 const struct ip_vs_iphdr *iph,
319 int inverse, struct ip_vs_conn_param *p) 320 struct ip_vs_conn_param *p)
320{ 321{
321 __be16 _ports[2], *pptr; 322 __be16 _ports[2], *pptr;
322 struct net *net = skb_net(skb);
323 323
324 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); 324 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
325 if (pptr == NULL) 325 if (pptr == NULL)
326 return 1; 326 return 1;
327 327
328 if (likely(!inverse)) 328 if (likely(!ip_vs_iph_inverse(iph)))
329 ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, 329 ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->saddr,
330 pptr[0], &iph->daddr, pptr[1], p); 330 pptr[0], &iph->daddr, pptr[1], p);
331 else 331 else
332 ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, 332 ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->daddr,
333 pptr[1], &iph->saddr, pptr[0], p); 333 pptr[1], &iph->saddr, pptr[0], p);
334 return 0; 334 return 0;
335} 335}
336 336
337struct ip_vs_conn * 337struct ip_vs_conn *
338ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 338ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af,
339 const struct ip_vs_iphdr *iph, int inverse) 339 const struct sk_buff *skb,
340 const struct ip_vs_iphdr *iph)
340{ 341{
341 struct ip_vs_conn_param p; 342 struct ip_vs_conn_param p;
342 343
343 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) 344 if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
344 return NULL; 345 return NULL;
345 346
346 return ip_vs_conn_in_get(&p); 347 return ip_vs_conn_in_get(&p);
@@ -359,7 +360,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
359 360
360 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { 361 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
361 if (unlikely(p->pe_data && p->pe->ct_match)) { 362 if (unlikely(p->pe_data && p->pe->ct_match)) {
362 if (!ip_vs_conn_net_eq(cp, p->net)) 363 if (cp->ipvs != p->ipvs)
363 continue; 364 continue;
364 if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { 365 if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
365 if (__ip_vs_conn_get(cp)) 366 if (__ip_vs_conn_get(cp))
@@ -377,7 +378,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
377 p->vport == cp->vport && p->cport == cp->cport && 378 p->vport == cp->vport && p->cport == cp->cport &&
378 cp->flags & IP_VS_CONN_F_TEMPLATE && 379 cp->flags & IP_VS_CONN_F_TEMPLATE &&
379 p->protocol == cp->protocol && 380 p->protocol == cp->protocol &&
380 ip_vs_conn_net_eq(cp, p->net)) { 381 cp->ipvs == p->ipvs) {
381 if (__ip_vs_conn_get(cp)) 382 if (__ip_vs_conn_get(cp))
382 goto out; 383 goto out;
383 } 384 }
@@ -418,7 +419,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
418 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && 419 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
419 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && 420 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
420 p->protocol == cp->protocol && 421 p->protocol == cp->protocol &&
421 ip_vs_conn_net_eq(cp, p->net)) { 422 cp->ipvs == p->ipvs) {
422 if (!__ip_vs_conn_get(cp)) 423 if (!__ip_vs_conn_get(cp))
423 continue; 424 continue;
424 /* HIT */ 425 /* HIT */
@@ -439,12 +440,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
439} 440}
440 441
441struct ip_vs_conn * 442struct ip_vs_conn *
442ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 443ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
443 const struct ip_vs_iphdr *iph, int inverse) 444 const struct sk_buff *skb,
445 const struct ip_vs_iphdr *iph)
444{ 446{
445 struct ip_vs_conn_param p; 447 struct ip_vs_conn_param p;
446 448
447 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) 449 if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
448 return NULL; 450 return NULL;
449 451
450 return ip_vs_conn_out_get(&p); 452 return ip_vs_conn_out_get(&p);
@@ -638,7 +640,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
638 * so we can make the assumption that the svc_af is the same as the 640 * so we can make the assumption that the svc_af is the same as the
639 * dest_af 641 * dest_af
640 */ 642 */
641 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr, 643 dest = ip_vs_find_dest(cp->ipvs, cp->af, cp->af, &cp->daddr,
642 cp->dport, &cp->vaddr, cp->vport, 644 cp->dport, &cp->vaddr, cp->vport,
643 cp->protocol, cp->fwmark, cp->flags); 645 cp->protocol, cp->fwmark, cp->flags);
644 if (dest) { 646 if (dest) {
@@ -668,7 +670,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
668#endif 670#endif
669 ip_vs_bind_xmit(cp); 671 ip_vs_bind_xmit(cp);
670 672
671 pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol); 673 pd = ip_vs_proto_data_get(cp->ipvs, cp->protocol);
672 if (pd && atomic_read(&pd->appcnt)) 674 if (pd && atomic_read(&pd->appcnt))
673 ip_vs_bind_app(cp, pd->pp); 675 ip_vs_bind_app(cp, pd->pp);
674 } 676 }
@@ -746,7 +748,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs,
746int ip_vs_check_template(struct ip_vs_conn *ct) 748int ip_vs_check_template(struct ip_vs_conn *ct)
747{ 749{
748 struct ip_vs_dest *dest = ct->dest; 750 struct ip_vs_dest *dest = ct->dest;
749 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); 751 struct netns_ipvs *ipvs = ct->ipvs;
750 752
751 /* 753 /*
752 * Checking the dest server status. 754 * Checking the dest server status.
@@ -800,8 +802,7 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
800static void ip_vs_conn_expire(unsigned long data) 802static void ip_vs_conn_expire(unsigned long data)
801{ 803{
802 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 804 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
803 struct net *net = ip_vs_conn_net(cp); 805 struct netns_ipvs *ipvs = cp->ipvs;
804 struct netns_ipvs *ipvs = net_ipvs(net);
805 806
806 /* 807 /*
807 * do I control anybody? 808 * do I control anybody?
@@ -847,7 +848,7 @@ static void ip_vs_conn_expire(unsigned long data)
847 cp->timeout = 60*HZ; 848 cp->timeout = 60*HZ;
848 849
849 if (ipvs->sync_state & IP_VS_STATE_MASTER) 850 if (ipvs->sync_state & IP_VS_STATE_MASTER)
850 ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); 851 ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
851 852
852 ip_vs_conn_put(cp); 853 ip_vs_conn_put(cp);
853} 854}
@@ -875,8 +876,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
875 struct ip_vs_dest *dest, __u32 fwmark) 876 struct ip_vs_dest *dest, __u32 fwmark)
876{ 877{
877 struct ip_vs_conn *cp; 878 struct ip_vs_conn *cp;
878 struct netns_ipvs *ipvs = net_ipvs(p->net); 879 struct netns_ipvs *ipvs = p->ipvs;
879 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, 880 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
880 p->protocol); 881 p->protocol);
881 882
882 cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); 883 cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
@@ -887,7 +888,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
887 888
888 INIT_HLIST_NODE(&cp->c_list); 889 INIT_HLIST_NODE(&cp->c_list);
889 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 890 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
890 ip_vs_conn_net_set(cp, p->net); 891 cp->ipvs = ipvs;
891 cp->af = p->af; 892 cp->af = p->af;
892 cp->daf = dest_af; 893 cp->daf = dest_af;
893 cp->protocol = p->protocol; 894 cp->protocol = p->protocol;
@@ -1061,7 +1062,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
1061 size_t len = 0; 1062 size_t len = 0;
1062 char dbuf[IP_VS_ADDRSTRLEN]; 1063 char dbuf[IP_VS_ADDRSTRLEN];
1063 1064
1064 if (!ip_vs_conn_net_eq(cp, net)) 1065 if (!net_eq(cp->ipvs->net, net))
1065 return 0; 1066 return 0;
1066 if (cp->pe_data) { 1067 if (cp->pe_data) {
1067 pe_data[0] = ' '; 1068 pe_data[0] = ' ';
@@ -1146,7 +1147,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
1146 const struct ip_vs_conn *cp = v; 1147 const struct ip_vs_conn *cp = v;
1147 struct net *net = seq_file_net(seq); 1148 struct net *net = seq_file_net(seq);
1148 1149
1149 if (!ip_vs_conn_net_eq(cp, net)) 1150 if (!net_eq(cp->ipvs->net, net))
1150 return 0; 1151 return 0;
1151 1152
1152#ifdef CONFIG_IP_VS_IPV6 1153#ifdef CONFIG_IP_VS_IPV6
@@ -1240,7 +1241,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
1240} 1241}
1241 1242
1242/* Called from keventd and must protect itself from softirqs */ 1243/* Called from keventd and must protect itself from softirqs */
1243void ip_vs_random_dropentry(struct net *net) 1244void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
1244{ 1245{
1245 int idx; 1246 int idx;
1246 struct ip_vs_conn *cp, *cp_c; 1247 struct ip_vs_conn *cp, *cp_c;
@@ -1256,7 +1257,7 @@ void ip_vs_random_dropentry(struct net *net)
1256 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 1257 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1257 /* connection template */ 1258 /* connection template */
1258 continue; 1259 continue;
1259 if (!ip_vs_conn_net_eq(cp, net)) 1260 if (cp->ipvs != ipvs)
1260 continue; 1261 continue;
1261 if (cp->protocol == IPPROTO_TCP) { 1262 if (cp->protocol == IPPROTO_TCP) {
1262 switch(cp->state) { 1263 switch(cp->state) {
@@ -1308,18 +1309,17 @@ void ip_vs_random_dropentry(struct net *net)
1308/* 1309/*
1309 * Flush all the connection entries in the ip_vs_conn_tab 1310 * Flush all the connection entries in the ip_vs_conn_tab
1310 */ 1311 */
1311static void ip_vs_conn_flush(struct net *net) 1312static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
1312{ 1313{
1313 int idx; 1314 int idx;
1314 struct ip_vs_conn *cp, *cp_c; 1315 struct ip_vs_conn *cp, *cp_c;
1315 struct netns_ipvs *ipvs = net_ipvs(net);
1316 1316
1317flush_again: 1317flush_again:
1318 rcu_read_lock(); 1318 rcu_read_lock();
1319 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 1319 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
1320 1320
1321 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { 1321 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
1322 if (!ip_vs_conn_net_eq(cp, net)) 1322 if (cp->ipvs != ipvs)
1323 continue; 1323 continue;
1324 IP_VS_DBG(4, "del connection\n"); 1324 IP_VS_DBG(4, "del connection\n");
1325 ip_vs_conn_expire_now(cp); 1325 ip_vs_conn_expire_now(cp);
@@ -1345,23 +1345,22 @@ flush_again:
1345/* 1345/*
1346 * per netns init and exit 1346 * per netns init and exit
1347 */ 1347 */
1348int __net_init ip_vs_conn_net_init(struct net *net) 1348int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
1349{ 1349{
1350 struct netns_ipvs *ipvs = net_ipvs(net);
1351
1352 atomic_set(&ipvs->conn_count, 0); 1350 atomic_set(&ipvs->conn_count, 0);
1353 1351
1354 proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops); 1352 proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops);
1355 proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops); 1353 proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net,
1354 &ip_vs_conn_sync_fops);
1356 return 0; 1355 return 0;
1357} 1356}
1358 1357
1359void __net_exit ip_vs_conn_net_cleanup(struct net *net) 1358void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
1360{ 1359{
1361 /* flush all the connection entries first */ 1360 /* flush all the connection entries first */
1362 ip_vs_conn_flush(net); 1361 ip_vs_conn_flush(ipvs);
1363 remove_proc_entry("ip_vs_conn", net->proc_net); 1362 remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
1364 remove_proc_entry("ip_vs_conn_sync", net->proc_net); 1363 remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
1365} 1364}
1366 1365
1367int __init ip_vs_conn_init(void) 1366int __init ip_vs_conn_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 38fbc194b9cb..f57b4dcdb233 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -112,7 +112,7 @@ static inline void
112ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 112ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
113{ 113{
114 struct ip_vs_dest *dest = cp->dest; 114 struct ip_vs_dest *dest = cp->dest;
115 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 115 struct netns_ipvs *ipvs = cp->ipvs;
116 116
117 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 117 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
118 struct ip_vs_cpu_stats *s; 118 struct ip_vs_cpu_stats *s;
@@ -146,7 +146,7 @@ static inline void
146ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 146ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
147{ 147{
148 struct ip_vs_dest *dest = cp->dest; 148 struct ip_vs_dest *dest = cp->dest;
149 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 149 struct netns_ipvs *ipvs = cp->ipvs;
150 150
151 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 151 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
152 struct ip_vs_cpu_stats *s; 152 struct ip_vs_cpu_stats *s;
@@ -179,7 +179,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
179static inline void 179static inline void
180ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) 180ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
181{ 181{
182 struct netns_ipvs *ipvs = net_ipvs(svc->net); 182 struct netns_ipvs *ipvs = svc->ipvs;
183 struct ip_vs_cpu_stats *s; 183 struct ip_vs_cpu_stats *s;
184 184
185 s = this_cpu_ptr(cp->dest->stats.cpustats); 185 s = this_cpu_ptr(cp->dest->stats.cpustats);
@@ -215,7 +215,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
215 const union nf_inet_addr *vaddr, __be16 vport, 215 const union nf_inet_addr *vaddr, __be16 vport,
216 struct ip_vs_conn_param *p) 216 struct ip_vs_conn_param *p)
217{ 217{
218 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, 218 ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr,
219 vport, p); 219 vport, p);
220 p->pe = rcu_dereference(svc->pe); 220 p->pe = rcu_dereference(svc->pe);
221 if (p->pe && p->pe->fill_param) 221 if (p->pe && p->pe->fill_param)
@@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
245 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; 245 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
246 union nf_inet_addr snet; /* source network of the client, 246 union nf_inet_addr snet; /* source network of the client,
247 after masking */ 247 after masking */
248 const union nf_inet_addr *src_addr, *dst_addr;
249
250 if (likely(!ip_vs_iph_inverse(iph))) {
251 src_addr = &iph->saddr;
252 dst_addr = &iph->daddr;
253 } else {
254 src_addr = &iph->daddr;
255 dst_addr = &iph->saddr;
256 }
257
248 258
249 /* Mask saddr with the netmask to adjust template granularity */ 259 /* Mask saddr with the netmask to adjust template granularity */
250#ifdef CONFIG_IP_VS_IPV6 260#ifdef CONFIG_IP_VS_IPV6
251 if (svc->af == AF_INET6) 261 if (svc->af == AF_INET6)
252 ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, 262 ipv6_addr_prefix(&snet.in6, &src_addr->in6,
253 (__force __u32) svc->netmask); 263 (__force __u32) svc->netmask);
254 else 264 else
255#endif 265#endif
256 snet.ip = iph->saddr.ip & svc->netmask; 266 snet.ip = src_addr->ip & svc->netmask;
257 267
258 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 268 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
259 "mnet %s\n", 269 "mnet %s\n",
260 IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port), 270 IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port),
261 IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port), 271 IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port),
262 IP_VS_DBG_ADDR(svc->af, &snet)); 272 IP_VS_DBG_ADDR(svc->af, &snet));
263 273
264 /* 274 /*
@@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
276 */ 286 */
277 { 287 {
278 int protocol = iph->protocol; 288 int protocol = iph->protocol;
279 const union nf_inet_addr *vaddr = &iph->daddr; 289 const union nf_inet_addr *vaddr = dst_addr;
280 __be16 vport = 0; 290 __be16 vport = 0;
281 291
282 if (dst_port == svc->port) { 292 if (dst_port == svc->port) {
@@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
366 /* 376 /*
367 * Create a new connection according to the template 377 * Create a new connection according to the template
368 */ 378 */
369 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, 379 ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr,
370 src_port, &iph->daddr, dst_port, &param); 380 src_port, dst_addr, dst_port, &param);
371 381
372 cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest, 382 cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
373 skb->mark); 383 skb->mark);
@@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
418 struct ip_vs_conn *cp = NULL; 428 struct ip_vs_conn *cp = NULL;
419 struct ip_vs_scheduler *sched; 429 struct ip_vs_scheduler *sched;
420 struct ip_vs_dest *dest; 430 struct ip_vs_dest *dest;
421 __be16 _ports[2], *pptr; 431 __be16 _ports[2], *pptr, cport, vport;
432 const void *caddr, *vaddr;
422 unsigned int flags; 433 unsigned int flags;
423 434
424 *ignored = 1; 435 *ignored = 1;
@@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
429 if (pptr == NULL) 440 if (pptr == NULL)
430 return NULL; 441 return NULL;
431 442
443 if (likely(!ip_vs_iph_inverse(iph))) {
444 cport = pptr[0];
445 caddr = &iph->saddr;
446 vport = pptr[1];
447 vaddr = &iph->daddr;
448 } else {
449 cport = pptr[1];
450 caddr = &iph->daddr;
451 vport = pptr[0];
452 vaddr = &iph->saddr;
453 }
454
432 /* 455 /*
433 * FTPDATA needs this check when using local real server. 456 * FTPDATA needs this check when using local real server.
434 * Never schedule Active FTPDATA connections from real server. 457 * Never schedule Active FTPDATA connections from real server.
435 * For LVS-NAT they must be already created. For other methods 458 * For LVS-NAT they must be already created. For other methods
436 * with persistence the connection is created on SYN+ACK. 459 * with persistence the connection is created on SYN+ACK.
437 */ 460 */
438 if (pptr[0] == FTPDATA) { 461 if (cport == FTPDATA) {
439 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 462 IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
440 "Not scheduling FTPDATA"); 463 "Not scheduling FTPDATA");
441 return NULL; 464 return NULL;
442 } 465 }
@@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
444 /* 467 /*
445 * Do not schedule replies from local real server. 468 * Do not schedule replies from local real server.
446 */ 469 */
447 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 470 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
448 (cp = pp->conn_in_get(svc->af, skb, iph, 1))) { 471 iph->hdr_flags ^= IP_VS_HDR_INVERSE;
449 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 472 cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph);
450 "Not scheduling reply for existing connection"); 473 iph->hdr_flags ^= IP_VS_HDR_INVERSE;
451 __ip_vs_conn_put(cp); 474
452 return NULL; 475 if (cp) {
476 IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
477 "Not scheduling reply for existing"
478 " connection");
479 __ip_vs_conn_put(cp);
480 return NULL;
481 }
453 } 482 }
454 483
455 /* 484 /*
456 * Persistent service 485 * Persistent service
457 */ 486 */
458 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 487 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
459 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored, 488 return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
460 iph); 489 iph);
461 490
462 *ignored = 0; 491 *ignored = 0;
@@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
464 /* 493 /*
465 * Non-persistent service 494 * Non-persistent service
466 */ 495 */
467 if (!svc->fwmark && pptr[1] != svc->port) { 496 if (!svc->fwmark && vport != svc->port) {
468 if (!svc->port) 497 if (!svc->port)
469 pr_err("Schedule: port zero only supported " 498 pr_err("Schedule: port zero only supported "
470 "in persistent services, " 499 "in persistent services, "
@@ -495,11 +524,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
495 { 524 {
496 struct ip_vs_conn_param p; 525 struct ip_vs_conn_param p;
497 526
498 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, 527 ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
499 &iph->saddr, pptr[0], &iph->daddr, 528 caddr, cport, vaddr, vport, &p);
500 pptr[1], &p);
501 cp = ip_vs_conn_new(&p, dest->af, &dest->addr, 529 cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
502 dest->port ? dest->port : pptr[1], 530 dest->port ? dest->port : vport,
503 flags, dest, skb->mark); 531 flags, dest, skb->mark);
504 if (!cp) { 532 if (!cp) {
505 *ignored = -1; 533 *ignored = -1;
@@ -519,6 +547,15 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
519 return cp; 547 return cp;
520} 548}
521 549
550static inline int ip_vs_addr_is_unicast(struct net *net, int af,
551 union nf_inet_addr *addr)
552{
553#ifdef CONFIG_IP_VS_IPV6
554 if (af == AF_INET6)
555 return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST;
556#endif
557 return (inet_addr_type(net, addr->ip) == RTN_UNICAST);
558}
522 559
523/* 560/*
524 * Pass or drop the packet. 561 * Pass or drop the packet.
@@ -528,33 +565,21 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
528int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 565int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
529 struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) 566 struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
530{ 567{
531 __be16 _ports[2], *pptr; 568 __be16 _ports[2], *pptr, dport;
532#ifdef CONFIG_SYSCTL 569 struct netns_ipvs *ipvs = svc->ipvs;
533 struct net *net; 570 struct net *net = ipvs->net;
534 struct netns_ipvs *ipvs;
535 int unicast;
536#endif
537 571
538 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); 572 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
539 if (pptr == NULL) { 573 if (!pptr)
540 return NF_DROP; 574 return NF_DROP;
541 } 575 dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
542
543#ifdef CONFIG_SYSCTL
544 net = skb_net(skb);
545
546#ifdef CONFIG_IP_VS_IPV6
547 if (svc->af == AF_INET6)
548 unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
549 else
550#endif
551 unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
552 576
553 /* if it is fwmark-based service, the cache_bypass sysctl is up 577 /* if it is fwmark-based service, the cache_bypass sysctl is up
554 and the destination is a non-local unicast, then create 578 and the destination is a non-local unicast, then create
555 a cache_bypass connection entry */ 579 a cache_bypass connection entry */
556 ipvs = net_ipvs(net); 580 if (sysctl_cache_bypass(ipvs) && svc->fwmark &&
557 if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { 581 !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) &&
582 ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) {
558 int ret; 583 int ret;
559 struct ip_vs_conn *cp; 584 struct ip_vs_conn *cp;
560 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 585 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -566,7 +591,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
566 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 591 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
567 { 592 {
568 struct ip_vs_conn_param p; 593 struct ip_vs_conn_param p;
569 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, 594 ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
570 &iph->saddr, pptr[0], 595 &iph->saddr, pptr[0],
571 &iph->daddr, pptr[1], &p); 596 &iph->daddr, pptr[1], &p);
572 cp = ip_vs_conn_new(&p, svc->af, &daddr, 0, 597 cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
@@ -590,7 +615,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
590 ip_vs_conn_put(cp); 615 ip_vs_conn_put(cp);
591 return ret; 616 return ret;
592 } 617 }
593#endif
594 618
595 /* 619 /*
596 * When the virtual ftp service is presented, packets destined 620 * When the virtual ftp service is presented, packets destined
@@ -598,9 +622,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
598 * listed in the ipvs table), pass the packets, because it is 622 * listed in the ipvs table), pass the packets, because it is
599 * not ipvs job to decide to drop the packets. 623 * not ipvs job to decide to drop the packets.
600 */ 624 */
601 if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) 625 if (svc->port == FTPPORT && dport != FTPPORT)
602 return NF_ACCEPT; 626 return NF_ACCEPT;
603 627
628 if (unlikely(ip_vs_iph_icmp(iph)))
629 return NF_DROP;
630
604 /* 631 /*
605 * Notify the client that the destination is unreachable, and 632 * Notify the client that the destination is unreachable, and
606 * release the socket buffer. 633 * release the socket buffer.
@@ -610,11 +637,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
610 */ 637 */
611#ifdef CONFIG_IP_VS_IPV6 638#ifdef CONFIG_IP_VS_IPV6
612 if (svc->af == AF_INET6) { 639 if (svc->af == AF_INET6) {
613 if (!skb->dev) { 640 if (!skb->dev)
614 struct net *net_ = dev_net(skb_dst(skb)->dev); 641 skb->dev = net->loopback_dev;
615
616 skb->dev = net_->loopback_dev;
617 }
618 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 642 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
619 } else 643 } else
620#endif 644#endif
@@ -625,15 +649,13 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
625 649
626#ifdef CONFIG_SYSCTL 650#ifdef CONFIG_SYSCTL
627 651
628static int sysctl_snat_reroute(struct sk_buff *skb) 652static int sysctl_snat_reroute(struct netns_ipvs *ipvs)
629{ 653{
630 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
631 return ipvs->sysctl_snat_reroute; 654 return ipvs->sysctl_snat_reroute;
632} 655}
633 656
634static int sysctl_nat_icmp_send(struct net *net) 657static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs)
635{ 658{
636 struct netns_ipvs *ipvs = net_ipvs(net);
637 return ipvs->sysctl_nat_icmp_send; 659 return ipvs->sysctl_nat_icmp_send;
638} 660}
639 661
@@ -644,8 +666,8 @@ static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
644 666
645#else 667#else
646 668
647static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } 669static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
648static int sysctl_nat_icmp_send(struct net *net) { return 0; } 670static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
649static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; } 671static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
650 672
651#endif 673#endif
@@ -664,12 +686,13 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
664 return IP_DEFRAG_VS_OUT; 686 return IP_DEFRAG_VS_OUT;
665} 687}
666 688
667static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) 689static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs,
690 struct sk_buff *skb, u_int32_t user)
668{ 691{
669 int err; 692 int err;
670 693
671 local_bh_disable(); 694 local_bh_disable();
672 err = ip_defrag(skb, user); 695 err = ip_defrag(ipvs->net, skb, user);
673 local_bh_enable(); 696 local_bh_enable();
674 if (!err) 697 if (!err)
675 ip_send_check(ip_hdr(skb)); 698 ip_send_check(ip_hdr(skb));
@@ -677,10 +700,10 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
677 return err; 700 return err;
678} 701}
679 702
680static int ip_vs_route_me_harder(int af, struct sk_buff *skb, 703static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
681 unsigned int hooknum) 704 struct sk_buff *skb, unsigned int hooknum)
682{ 705{
683 if (!sysctl_snat_reroute(skb)) 706 if (!sysctl_snat_reroute(ipvs))
684 return 0; 707 return 0;
685 /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */ 708 /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */
686 if (NF_INET_LOCAL_IN == hooknum) 709 if (NF_INET_LOCAL_IN == hooknum)
@@ -690,12 +713,12 @@ static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
690 struct dst_entry *dst = skb_dst(skb); 713 struct dst_entry *dst = skb_dst(skb);
691 714
692 if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && 715 if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
693 ip6_route_me_harder(skb) != 0) 716 ip6_route_me_harder(ipvs->net, skb) != 0)
694 return 1; 717 return 1;
695 } else 718 } else
696#endif 719#endif
697 if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && 720 if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
698 ip_route_me_harder(skb, RTN_LOCAL) != 0) 721 ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
699 return 1; 722 return 1;
700 723
701 return 0; 724 return 0;
@@ -848,7 +871,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
848#endif 871#endif
849 ip_vs_nat_icmp(skb, pp, cp, 1); 872 ip_vs_nat_icmp(skb, pp, cp, 1);
850 873
851 if (ip_vs_route_me_harder(af, skb, hooknum)) 874 if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
852 goto out; 875 goto out;
853 876
854 /* do the statistics and put it back */ 877 /* do the statistics and put it back */
@@ -872,8 +895,8 @@ out:
872 * Find any that might be relevant, check against existing connections. 895 * Find any that might be relevant, check against existing connections.
873 * Currently handles error types - unreachable, quench, ttl exceeded. 896 * Currently handles error types - unreachable, quench, ttl exceeded.
874 */ 897 */
875static int ip_vs_out_icmp(struct sk_buff *skb, int *related, 898static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb,
876 unsigned int hooknum) 899 int *related, unsigned int hooknum)
877{ 900{
878 struct iphdr *iph; 901 struct iphdr *iph;
879 struct icmphdr _icmph, *ic; 902 struct icmphdr _icmph, *ic;
@@ -888,7 +911,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
888 911
889 /* reassemble IP fragments */ 912 /* reassemble IP fragments */
890 if (ip_is_fragment(ip_hdr(skb))) { 913 if (ip_is_fragment(ip_hdr(skb))) {
891 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) 914 if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
892 return NF_STOLEN; 915 return NF_STOLEN;
893 } 916 }
894 917
@@ -934,10 +957,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
934 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, 957 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
935 "Checking outgoing ICMP for"); 958 "Checking outgoing ICMP for");
936 959
937 ip_vs_fill_ip4hdr(cih, &ciph); 960 ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
938 ciph.len += offset; 961
939 /* The embedded headers contain source and dest in reverse order */ 962 /* The embedded headers contain source and dest in reverse order */
940 cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); 963 cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph);
941 if (!cp) 964 if (!cp)
942 return NF_ACCEPT; 965 return NF_ACCEPT;
943 966
@@ -947,16 +970,16 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
947} 970}
948 971
949#ifdef CONFIG_IP_VS_IPV6 972#ifdef CONFIG_IP_VS_IPV6
950static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, 973static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
951 unsigned int hooknum, struct ip_vs_iphdr *ipvsh) 974 int *related, unsigned int hooknum,
975 struct ip_vs_iphdr *ipvsh)
952{ 976{
953 struct icmp6hdr _icmph, *ic; 977 struct icmp6hdr _icmph, *ic;
954 struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
955 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ 978 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
956 struct ip_vs_conn *cp; 979 struct ip_vs_conn *cp;
957 struct ip_vs_protocol *pp; 980 struct ip_vs_protocol *pp;
958 union nf_inet_addr snet; 981 union nf_inet_addr snet;
959 unsigned int writable; 982 unsigned int offset;
960 983
961 *related = 1; 984 *related = 1;
962 ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); 985 ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
@@ -984,31 +1007,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
984 ic->icmp6_type, ntohs(icmpv6_id(ic)), 1007 ic->icmp6_type, ntohs(icmpv6_id(ic)),
985 &ipvsh->saddr, &ipvsh->daddr); 1008 &ipvsh->saddr, &ipvsh->daddr);
986 1009
987 /* Now find the contained IP header */ 1010 if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph),
988 ciph.len = ipvsh->len + sizeof(_icmph); 1011 true, &ciph))
989 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
990 if (ip6h == NULL)
991 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1012 return NF_ACCEPT; /* The packet looks wrong, ignore */
992 ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
993 ciph.daddr.in6 = ip6h->daddr;
994 /* skip possible IPv6 exthdrs of contained IPv6 packet */
995 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
996 if (ciph.protocol < 0)
997 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
998 1013
999 pp = ip_vs_proto_get(ciph.protocol); 1014 pp = ip_vs_proto_get(ciph.protocol);
1000 if (!pp) 1015 if (!pp)
1001 return NF_ACCEPT; 1016 return NF_ACCEPT;
1002 1017
1003 /* The embedded headers contain source and dest in reverse order */ 1018 /* The embedded headers contain source and dest in reverse order */
1004 cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1); 1019 cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph);
1005 if (!cp) 1020 if (!cp)
1006 return NF_ACCEPT; 1021 return NF_ACCEPT;
1007 1022
1008 snet.in6 = ciph.saddr.in6; 1023 snet.in6 = ciph.saddr.in6;
1009 writable = ciph.len; 1024 offset = ciph.len;
1010 return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, 1025 return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
1011 pp, writable, sizeof(struct ipv6hdr), 1026 pp, offset, sizeof(struct ipv6hdr),
1012 hooknum); 1027 hooknum);
1013} 1028}
1014#endif 1029#endif
@@ -1093,7 +1108,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
1093{ 1108{
1094 struct ip_vs_protocol *pp = pd->pp; 1109 struct ip_vs_protocol *pp = pd->pp;
1095 1110
1096 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1111 IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
1097 1112
1098 if (!skb_make_writable(skb, iph->len)) 1113 if (!skb_make_writable(skb, iph->len))
1099 goto drop; 1114 goto drop;
@@ -1127,10 +1142,10 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
1127 * if it came from this machine itself. So re-compute 1142 * if it came from this machine itself. So re-compute
1128 * the routing information. 1143 * the routing information.
1129 */ 1144 */
1130 if (ip_vs_route_me_harder(af, skb, hooknum)) 1145 if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
1131 goto drop; 1146 goto drop;
1132 1147
1133 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); 1148 IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");
1134 1149
1135 ip_vs_out_stats(cp, skb); 1150 ip_vs_out_stats(cp, skb);
1136 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); 1151 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
@@ -1155,13 +1170,13 @@ drop:
1155 * Check if outgoing packet belongs to the established ip_vs_conn. 1170 * Check if outgoing packet belongs to the established ip_vs_conn.
1156 */ 1171 */
1157static unsigned int 1172static unsigned int
1158ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) 1173ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
1159{ 1174{
1160 struct net *net = NULL;
1161 struct ip_vs_iphdr iph; 1175 struct ip_vs_iphdr iph;
1162 struct ip_vs_protocol *pp; 1176 struct ip_vs_protocol *pp;
1163 struct ip_vs_proto_data *pd; 1177 struct ip_vs_proto_data *pd;
1164 struct ip_vs_conn *cp; 1178 struct ip_vs_conn *cp;
1179 struct sock *sk;
1165 1180
1166 EnterFunction(11); 1181 EnterFunction(11);
1167 1182
@@ -1169,29 +1184,27 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1169 if (skb->ipvs_property) 1184 if (skb->ipvs_property)
1170 return NF_ACCEPT; 1185 return NF_ACCEPT;
1171 1186
1187 sk = skb_to_full_sk(skb);
1172 /* Bad... Do not break raw sockets */ 1188 /* Bad... Do not break raw sockets */
1173 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1189 if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
1174 af == AF_INET)) { 1190 af == AF_INET)) {
1175 struct sock *sk = skb->sk;
1176 struct inet_sock *inet = inet_sk(skb->sk);
1177 1191
1178 if (inet && sk->sk_family == PF_INET && inet->nodefrag) 1192 if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
1179 return NF_ACCEPT; 1193 return NF_ACCEPT;
1180 } 1194 }
1181 1195
1182 if (unlikely(!skb_dst(skb))) 1196 if (unlikely(!skb_dst(skb)))
1183 return NF_ACCEPT; 1197 return NF_ACCEPT;
1184 1198
1185 net = skb_net(skb); 1199 if (!ipvs->enable)
1186 if (!net_ipvs(net)->enable)
1187 return NF_ACCEPT; 1200 return NF_ACCEPT;
1188 1201
1189 ip_vs_fill_iph_skb(af, skb, &iph); 1202 ip_vs_fill_iph_skb(af, skb, false, &iph);
1190#ifdef CONFIG_IP_VS_IPV6 1203#ifdef CONFIG_IP_VS_IPV6
1191 if (af == AF_INET6) { 1204 if (af == AF_INET6) {
1192 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1205 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1193 int related; 1206 int related;
1194 int verdict = ip_vs_out_icmp_v6(skb, &related, 1207 int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related,
1195 hooknum, &iph); 1208 hooknum, &iph);
1196 1209
1197 if (related) 1210 if (related)
@@ -1201,13 +1214,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1201#endif 1214#endif
1202 if (unlikely(iph.protocol == IPPROTO_ICMP)) { 1215 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
1203 int related; 1216 int related;
1204 int verdict = ip_vs_out_icmp(skb, &related, hooknum); 1217 int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum);
1205 1218
1206 if (related) 1219 if (related)
1207 return verdict; 1220 return verdict;
1208 } 1221 }
1209 1222
1210 pd = ip_vs_proto_data_get(net, iph.protocol); 1223 pd = ip_vs_proto_data_get(ipvs, iph.protocol);
1211 if (unlikely(!pd)) 1224 if (unlikely(!pd))
1212 return NF_ACCEPT; 1225 return NF_ACCEPT;
1213 pp = pd->pp; 1226 pp = pd->pp;
@@ -1217,21 +1230,21 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1217 if (af == AF_INET) 1230 if (af == AF_INET)
1218#endif 1231#endif
1219 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { 1232 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
1220 if (ip_vs_gather_frags(skb, 1233 if (ip_vs_gather_frags(ipvs, skb,
1221 ip_vs_defrag_user(hooknum))) 1234 ip_vs_defrag_user(hooknum)))
1222 return NF_STOLEN; 1235 return NF_STOLEN;
1223 1236
1224 ip_vs_fill_ip4hdr(skb_network_header(skb), &iph); 1237 ip_vs_fill_iph_skb(AF_INET, skb, false, &iph);
1225 } 1238 }
1226 1239
1227 /* 1240 /*
1228 * Check if the packet belongs to an existing entry 1241 * Check if the packet belongs to an existing entry
1229 */ 1242 */
1230 cp = pp->conn_out_get(af, skb, &iph, 0); 1243 cp = pp->conn_out_get(ipvs, af, skb, &iph);
1231 1244
1232 if (likely(cp)) 1245 if (likely(cp))
1233 return handle_response(af, skb, pd, cp, &iph, hooknum); 1246 return handle_response(af, skb, pd, cp, &iph, hooknum);
1234 if (sysctl_nat_icmp_send(net) && 1247 if (sysctl_nat_icmp_send(ipvs) &&
1235 (pp->protocol == IPPROTO_TCP || 1248 (pp->protocol == IPPROTO_TCP ||
1236 pp->protocol == IPPROTO_UDP || 1249 pp->protocol == IPPROTO_UDP ||
1237 pp->protocol == IPPROTO_SCTP)) { 1250 pp->protocol == IPPROTO_SCTP)) {
@@ -1241,7 +1254,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1241 sizeof(_ports), _ports, &iph); 1254 sizeof(_ports), _ports, &iph);
1242 if (pptr == NULL) 1255 if (pptr == NULL)
1243 return NF_ACCEPT; /* Not for me */ 1256 return NF_ACCEPT; /* Not for me */
1244 if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr, 1257 if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
1245 pptr[0])) { 1258 pptr[0])) {
1246 /* 1259 /*
1247 * Notify the real server: there is no 1260 * Notify the real server: there is no
@@ -1258,7 +1271,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1258#ifdef CONFIG_IP_VS_IPV6 1271#ifdef CONFIG_IP_VS_IPV6
1259 if (af == AF_INET6) { 1272 if (af == AF_INET6) {
1260 if (!skb->dev) 1273 if (!skb->dev)
1261 skb->dev = net->loopback_dev; 1274 skb->dev = ipvs->net->loopback_dev;
1262 icmpv6_send(skb, 1275 icmpv6_send(skb,
1263 ICMPV6_DEST_UNREACH, 1276 ICMPV6_DEST_UNREACH,
1264 ICMPV6_PORT_UNREACH, 1277 ICMPV6_PORT_UNREACH,
@@ -1272,7 +1285,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1272 } 1285 }
1273 } 1286 }
1274 } 1287 }
1275 IP_VS_DBG_PKT(12, af, pp, skb, 0, 1288 IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
1276 "ip_vs_out: packet continues traversal as normal"); 1289 "ip_vs_out: packet continues traversal as normal");
1277 return NF_ACCEPT; 1290 return NF_ACCEPT;
1278} 1291}
@@ -1283,10 +1296,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1283 * Check if packet is reply for established ip_vs_conn. 1296 * Check if packet is reply for established ip_vs_conn.
1284 */ 1297 */
1285static unsigned int 1298static unsigned int
1286ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, 1299ip_vs_reply4(void *priv, struct sk_buff *skb,
1287 const struct nf_hook_state *state) 1300 const struct nf_hook_state *state)
1288{ 1301{
1289 return ip_vs_out(ops->hooknum, skb, AF_INET); 1302 return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
1290} 1303}
1291 1304
1292/* 1305/*
@@ -1294,10 +1307,10 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
1294 * Check if packet is reply for established ip_vs_conn. 1307 * Check if packet is reply for established ip_vs_conn.
1295 */ 1308 */
1296static unsigned int 1309static unsigned int
1297ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, 1310ip_vs_local_reply4(void *priv, struct sk_buff *skb,
1298 const struct nf_hook_state *state) 1311 const struct nf_hook_state *state)
1299{ 1312{
1300 return ip_vs_out(ops->hooknum, skb, AF_INET); 1313 return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
1301} 1314}
1302 1315
1303#ifdef CONFIG_IP_VS_IPV6 1316#ifdef CONFIG_IP_VS_IPV6
@@ -1308,10 +1321,10 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
1308 * Check if packet is reply for established ip_vs_conn. 1321 * Check if packet is reply for established ip_vs_conn.
1309 */ 1322 */
1310static unsigned int 1323static unsigned int
1311ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, 1324ip_vs_reply6(void *priv, struct sk_buff *skb,
1312 const struct nf_hook_state *state) 1325 const struct nf_hook_state *state)
1313{ 1326{
1314 return ip_vs_out(ops->hooknum, skb, AF_INET6); 1327 return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
1315} 1328}
1316 1329
1317/* 1330/*
@@ -1319,14 +1332,51 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1319 * Check if packet is reply for established ip_vs_conn. 1332 * Check if packet is reply for established ip_vs_conn.
1320 */ 1333 */
1321static unsigned int 1334static unsigned int
1322ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, 1335ip_vs_local_reply6(void *priv, struct sk_buff *skb,
1323 const struct nf_hook_state *state) 1336 const struct nf_hook_state *state)
1324{ 1337{
1325 return ip_vs_out(ops->hooknum, skb, AF_INET6); 1338 return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
1326} 1339}
1327 1340
1328#endif 1341#endif
1329 1342
1343static unsigned int
1344ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
1345 struct ip_vs_proto_data *pd,
1346 int *verdict, struct ip_vs_conn **cpp,
1347 struct ip_vs_iphdr *iph)
1348{
1349 struct ip_vs_protocol *pp = pd->pp;
1350
1351 if (!iph->fragoffs) {
1352 /* No (second) fragments need to enter here, as nf_defrag_ipv6
1353 * replayed fragment zero will already have created the cp
1354 */
1355
1356 /* Schedule and create new connection entry into cpp */
1357 if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
1358 return 0;
1359 }
1360
1361 if (unlikely(!*cpp)) {
1362 /* sorry, all this trouble for a no-hit :) */
1363 IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
1364 "ip_vs_in: packet continues traversal as normal");
1365 if (iph->fragoffs) {
1366 /* Fragment that couldn't be mapped to a conn entry
1367 * is missing module nf_defrag_ipv6
1368 */
1369 IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
1370 IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
1371 "unhandled fragment");
1372 }
1373 *verdict = NF_ACCEPT;
1374 return 0;
1375 }
1376
1377 return 1;
1378}
1379
1330/* 1380/*
1331 * Handle ICMP messages in the outside-to-inside direction (incoming). 1381 * Handle ICMP messages in the outside-to-inside direction (incoming).
1332 * Find any that might be relevant, check against existing connections, 1382 * Find any that might be relevant, check against existing connections,
@@ -1334,9 +1384,9 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1334 * Currently handles error types - unreachable, quench, ttl exceeded. 1384 * Currently handles error types - unreachable, quench, ttl exceeded.
1335 */ 1385 */
1336static int 1386static int
1337ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) 1387ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
1388 unsigned int hooknum)
1338{ 1389{
1339 struct net *net = NULL;
1340 struct iphdr *iph; 1390 struct iphdr *iph;
1341 struct icmphdr _icmph, *ic; 1391 struct icmphdr _icmph, *ic;
1342 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ 1392 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -1345,13 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1345 struct ip_vs_protocol *pp; 1395 struct ip_vs_protocol *pp;
1346 struct ip_vs_proto_data *pd; 1396 struct ip_vs_proto_data *pd;
1347 unsigned int offset, offset2, ihl, verdict; 1397 unsigned int offset, offset2, ihl, verdict;
1348 bool ipip; 1398 bool ipip, new_cp = false;
1349 1399
1350 *related = 1; 1400 *related = 1;
1351 1401
1352 /* reassemble IP fragments */ 1402 /* reassemble IP fragments */
1353 if (ip_is_fragment(ip_hdr(skb))) { 1403 if (ip_is_fragment(ip_hdr(skb))) {
1354 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) 1404 if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
1355 return NF_STOLEN; 1405 return NF_STOLEN;
1356 } 1406 }
1357 1407
@@ -1385,8 +1435,6 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1385 if (cih == NULL) 1435 if (cih == NULL)
1386 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1436 return NF_ACCEPT; /* The packet looks wrong, ignore */
1387 1437
1388 net = skb_net(skb);
1389
1390 /* Special case for errors for IPIP packets */ 1438 /* Special case for errors for IPIP packets */
1391 ipip = false; 1439 ipip = false;
1392 if (cih->protocol == IPPROTO_IPIP) { 1440 if (cih->protocol == IPPROTO_IPIP) {
@@ -1402,7 +1450,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1402 ipip = true; 1450 ipip = true;
1403 } 1451 }
1404 1452
1405 pd = ip_vs_proto_data_get(net, cih->protocol); 1453 pd = ip_vs_proto_data_get(ipvs, cih->protocol);
1406 if (!pd) 1454 if (!pd)
1407 return NF_ACCEPT; 1455 return NF_ACCEPT;
1408 pp = pd->pp; 1456 pp = pd->pp;
@@ -1416,15 +1464,24 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1416 "Checking incoming ICMP for"); 1464 "Checking incoming ICMP for");
1417 1465
1418 offset2 = offset; 1466 offset2 = offset;
1419 ip_vs_fill_ip4hdr(cih, &ciph); 1467 ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph);
1420 ciph.len += offset;
1421 offset = ciph.len; 1468 offset = ciph.len;
1469
1422 /* The embedded headers contain source and dest in reverse order. 1470 /* The embedded headers contain source and dest in reverse order.
1423 * For IPIP this is error for request, not for reply. 1471 * For IPIP this is error for request, not for reply.
1424 */ 1472 */
1425 cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1); 1473 cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph);
1426 if (!cp) 1474
1427 return NF_ACCEPT; 1475 if (!cp) {
1476 int v;
1477
1478 if (!sysctl_schedule_icmp(ipvs))
1479 return NF_ACCEPT;
1480
1481 if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
1482 return v;
1483 new_cp = true;
1484 }
1428 1485
1429 verdict = NF_DROP; 1486 verdict = NF_DROP;
1430 1487
@@ -1455,7 +1512,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1455 skb_reset_network_header(skb); 1512 skb_reset_network_header(skb);
1456 IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", 1513 IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
1457 &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); 1514 &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
1458 ipv4_update_pmtu(skb, dev_net(skb->dev), 1515 ipv4_update_pmtu(skb, ipvs->net,
1459 mtu, 0, 0, 0, 0); 1516 mtu, 0, 0, 0, 0);
1460 /* Client uses PMTUD? */ 1517 /* Client uses PMTUD? */
1461 if (!(frag_off & htons(IP_DF))) 1518 if (!(frag_off & htons(IP_DF)))
@@ -1501,23 +1558,26 @@ ignore_ipip:
1501 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); 1558 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
1502 1559
1503out: 1560out:
1504 __ip_vs_conn_put(cp); 1561 if (likely(!new_cp))
1562 __ip_vs_conn_put(cp);
1563 else
1564 ip_vs_conn_put(cp);
1505 1565
1506 return verdict; 1566 return verdict;
1507} 1567}
1508 1568
1509#ifdef CONFIG_IP_VS_IPV6 1569#ifdef CONFIG_IP_VS_IPV6
1510static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, 1570static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
1511 unsigned int hooknum, struct ip_vs_iphdr *iph) 1571 int *related, unsigned int hooknum,
1572 struct ip_vs_iphdr *iph)
1512{ 1573{
1513 struct net *net = NULL;
1514 struct ipv6hdr _ip6h, *ip6h;
1515 struct icmp6hdr _icmph, *ic; 1574 struct icmp6hdr _icmph, *ic;
1516 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ 1575 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
1517 struct ip_vs_conn *cp; 1576 struct ip_vs_conn *cp;
1518 struct ip_vs_protocol *pp; 1577 struct ip_vs_protocol *pp;
1519 struct ip_vs_proto_data *pd; 1578 struct ip_vs_proto_data *pd;
1520 unsigned int offs_ciph, writable, verdict; 1579 unsigned int offset, verdict;
1580 bool new_cp = false;
1521 1581
1522 *related = 1; 1582 *related = 1;
1523 1583
@@ -1546,21 +1606,11 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
1546 ic->icmp6_type, ntohs(icmpv6_id(ic)), 1606 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1547 &iph->saddr, &iph->daddr); 1607 &iph->saddr, &iph->daddr);
1548 1608
1549 /* Now find the contained IP header */ 1609 offset = iph->len + sizeof(_icmph);
1550 ciph.len = iph->len + sizeof(_icmph); 1610 if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph))
1551 offs_ciph = ciph.len; /* Save ip header offset */ 1611 return NF_ACCEPT;
1552 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); 1612
1553 if (ip6h == NULL) 1613 pd = ip_vs_proto_data_get(ipvs, ciph.protocol);
1554 return NF_ACCEPT; /* The packet looks wrong, ignore */
1555 ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
1556 ciph.daddr.in6 = ip6h->daddr;
1557 /* skip possible IPv6 exthdrs of contained IPv6 packet */
1558 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
1559 if (ciph.protocol < 0)
1560 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
1561
1562 net = skb_net(skb);
1563 pd = ip_vs_proto_data_get(net, ciph.protocol);
1564 if (!pd) 1614 if (!pd)
1565 return NF_ACCEPT; 1615 return NF_ACCEPT;
1566 pp = pd->pp; 1616 pp = pd->pp;
@@ -1569,36 +1619,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
1569 if (ciph.fragoffs) 1619 if (ciph.fragoffs)
1570 return NF_ACCEPT; 1620 return NF_ACCEPT;
1571 1621
1572 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph, 1622 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
1573 "Checking incoming ICMPv6 for"); 1623 "Checking incoming ICMPv6 for");
1574 1624
1575 /* The embedded headers contain source and dest in reverse order 1625 /* The embedded headers contain source and dest in reverse order
1576 * if not from localhost 1626 * if not from localhost
1577 */ 1627 */
1578 cp = pp->conn_in_get(AF_INET6, skb, &ciph, 1628 cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph);
1579 (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1); 1629
1630 if (!cp) {
1631 int v;
1632
1633 if (!sysctl_schedule_icmp(ipvs))
1634 return NF_ACCEPT;
1635
1636 if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph))
1637 return v;
1638
1639 new_cp = true;
1640 }
1580 1641
1581 if (!cp)
1582 return NF_ACCEPT;
1583 /* VS/TUN, VS/DR and LOCALNODE just let it go */ 1642 /* VS/TUN, VS/DR and LOCALNODE just let it go */
1584 if ((hooknum == NF_INET_LOCAL_OUT) && 1643 if ((hooknum == NF_INET_LOCAL_OUT) &&
1585 (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { 1644 (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
1586 __ip_vs_conn_put(cp); 1645 verdict = NF_ACCEPT;
1587 return NF_ACCEPT; 1646 goto out;
1588 } 1647 }
1589 1648
1590 /* do the statistics and put it back */ 1649 /* do the statistics and put it back */
1591 ip_vs_in_stats(cp, skb); 1650 ip_vs_in_stats(cp, skb);
1592 1651
1593 /* Need to mangle contained IPv6 header in ICMPv6 packet */ 1652 /* Need to mangle contained IPv6 header in ICMPv6 packet */
1594 writable = ciph.len; 1653 offset = ciph.len;
1595 if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || 1654 if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
1596 IPPROTO_SCTP == ciph.protocol) 1655 IPPROTO_SCTP == ciph.protocol)
1597 writable += 2 * sizeof(__u16); /* Also mangle ports */ 1656 offset += 2 * sizeof(__u16); /* Also mangle ports */
1598 1657
1599 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph); 1658 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph);
1600 1659
1601 __ip_vs_conn_put(cp); 1660out:
1661 if (likely(!new_cp))
1662 __ip_vs_conn_put(cp);
1663 else
1664 ip_vs_conn_put(cp);
1602 1665
1603 return verdict; 1666 return verdict;
1604} 1667}
@@ -1610,16 +1673,15 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
1610 * and send it on its way... 1673 * and send it on its way...
1611 */ 1674 */
1612static unsigned int 1675static unsigned int
1613ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) 1676ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
1614{ 1677{
1615 struct net *net;
1616 struct ip_vs_iphdr iph; 1678 struct ip_vs_iphdr iph;
1617 struct ip_vs_protocol *pp; 1679 struct ip_vs_protocol *pp;
1618 struct ip_vs_proto_data *pd; 1680 struct ip_vs_proto_data *pd;
1619 struct ip_vs_conn *cp; 1681 struct ip_vs_conn *cp;
1620 int ret, pkts; 1682 int ret, pkts;
1621 struct netns_ipvs *ipvs;
1622 int conn_reuse_mode; 1683 int conn_reuse_mode;
1684 struct sock *sk;
1623 1685
1624 /* Already marked as IPVS request or reply? */ 1686 /* Already marked as IPVS request or reply? */
1625 if (skb->ipvs_property) 1687 if (skb->ipvs_property)
@@ -1633,7 +1695,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1633 if (unlikely((skb->pkt_type != PACKET_HOST && 1695 if (unlikely((skb->pkt_type != PACKET_HOST &&
1634 hooknum != NF_INET_LOCAL_OUT) || 1696 hooknum != NF_INET_LOCAL_OUT) ||
1635 !skb_dst(skb))) { 1697 !skb_dst(skb))) {
1636 ip_vs_fill_iph_skb(af, skb, &iph); 1698 ip_vs_fill_iph_skb(af, skb, false, &iph);
1637 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" 1699 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
1638 " ignored in hook %u\n", 1700 " ignored in hook %u\n",
1639 skb->pkt_type, iph.protocol, 1701 skb->pkt_type, iph.protocol,
@@ -1641,20 +1703,17 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1641 return NF_ACCEPT; 1703 return NF_ACCEPT;
1642 } 1704 }
1643 /* ipvs enabled in this netns ? */ 1705 /* ipvs enabled in this netns ? */
1644 net = skb_net(skb);
1645 ipvs = net_ipvs(net);
1646 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) 1706 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
1647 return NF_ACCEPT; 1707 return NF_ACCEPT;
1648 1708
1649 ip_vs_fill_iph_skb(af, skb, &iph); 1709 ip_vs_fill_iph_skb(af, skb, false, &iph);
1650 1710
1651 /* Bad... Do not break raw sockets */ 1711 /* Bad... Do not break raw sockets */
1652 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1712 sk = skb_to_full_sk(skb);
1713 if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
1653 af == AF_INET)) { 1714 af == AF_INET)) {
1654 struct sock *sk = skb->sk;
1655 struct inet_sock *inet = inet_sk(skb->sk);
1656 1715
1657 if (inet && sk->sk_family == PF_INET && inet->nodefrag) 1716 if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
1658 return NF_ACCEPT; 1717 return NF_ACCEPT;
1659 } 1718 }
1660 1719
@@ -1662,8 +1721,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1662 if (af == AF_INET6) { 1721 if (af == AF_INET6) {
1663 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1722 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1664 int related; 1723 int related;
1665 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, 1724 int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related,
1666 &iph); 1725 hooknum, &iph);
1667 1726
1668 if (related) 1727 if (related)
1669 return verdict; 1728 return verdict;
@@ -1672,21 +1731,30 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1672#endif 1731#endif
1673 if (unlikely(iph.protocol == IPPROTO_ICMP)) { 1732 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
1674 int related; 1733 int related;
1675 int verdict = ip_vs_in_icmp(skb, &related, hooknum); 1734 int verdict = ip_vs_in_icmp(ipvs, skb, &related,
1735 hooknum);
1676 1736
1677 if (related) 1737 if (related)
1678 return verdict; 1738 return verdict;
1679 } 1739 }
1680 1740
1681 /* Protocol supported? */ 1741 /* Protocol supported? */
1682 pd = ip_vs_proto_data_get(net, iph.protocol); 1742 pd = ip_vs_proto_data_get(ipvs, iph.protocol);
1683 if (unlikely(!pd)) 1743 if (unlikely(!pd)) {
1744 /* The only way we'll see this packet again is if it's
1745 * encapsulated, so mark it with ipvs_property=1 so we
1746 * skip it if we're ignoring tunneled packets
1747 */
1748 if (sysctl_ignore_tunneled(ipvs))
1749 skb->ipvs_property = 1;
1750
1684 return NF_ACCEPT; 1751 return NF_ACCEPT;
1752 }
1685 pp = pd->pp; 1753 pp = pd->pp;
1686 /* 1754 /*
1687 * Check if the packet belongs to an existing connection entry 1755 * Check if the packet belongs to an existing connection entry
1688 */ 1756 */
1689 cp = pp->conn_in_get(af, skb, &iph, 0); 1757 cp = pp->conn_in_get(ipvs, af, skb, &iph);
1690 1758
1691 conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); 1759 conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
1692 if (conn_reuse_mode && !iph.fragoffs && 1760 if (conn_reuse_mode && !iph.fragoffs &&
@@ -1700,32 +1768,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1700 cp = NULL; 1768 cp = NULL;
1701 } 1769 }
1702 1770
1703 if (unlikely(!cp) && !iph.fragoffs) { 1771 if (unlikely(!cp)) {
1704 /* No (second) fragments need to enter here, as nf_defrag_ipv6
1705 * replayed fragment zero will already have created the cp
1706 */
1707 int v; 1772 int v;
1708 1773
1709 /* Schedule and create new connection entry into &cp */ 1774 if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
1710 if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
1711 return v; 1775 return v;
1712 } 1776 }
1713 1777
1714 if (unlikely(!cp)) { 1778 IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
1715 /* sorry, all this trouble for a no-hit :) */
1716 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1717 "ip_vs_in: packet continues traversal as normal");
1718 if (iph.fragoffs) {
1719 /* Fragment that couldn't be mapped to a conn entry
1720 * is missing module nf_defrag_ipv6
1721 */
1722 IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
1723 IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
1724 }
1725 return NF_ACCEPT;
1726 }
1727 1779
1728 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
1729 /* Check the server status */ 1780 /* Check the server status */
1730 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1781 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
1731 /* the destination server is not available */ 1782 /* the destination server is not available */
@@ -1765,7 +1816,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1765 pkts = atomic_add_return(1, &cp->in_pkts); 1816 pkts = atomic_add_return(1, &cp->in_pkts);
1766 1817
1767 if (ipvs->sync_state & IP_VS_STATE_MASTER) 1818 if (ipvs->sync_state & IP_VS_STATE_MASTER)
1768 ip_vs_sync_conn(net, cp, pkts); 1819 ip_vs_sync_conn(ipvs, cp, pkts);
1769 1820
1770 ip_vs_conn_put(cp); 1821 ip_vs_conn_put(cp);
1771 return ret; 1822 return ret;
@@ -1776,10 +1827,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1776 * Schedule and forward packets from remote clients 1827 * Schedule and forward packets from remote clients
1777 */ 1828 */
1778static unsigned int 1829static unsigned int
1779ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, 1830ip_vs_remote_request4(void *priv, struct sk_buff *skb,
1780 const struct nf_hook_state *state) 1831 const struct nf_hook_state *state)
1781{ 1832{
1782 return ip_vs_in(ops->hooknum, skb, AF_INET); 1833 return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
1783} 1834}
1784 1835
1785/* 1836/*
@@ -1787,10 +1838,10 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
1787 * Schedule and forward packets from local clients 1838 * Schedule and forward packets from local clients
1788 */ 1839 */
1789static unsigned int 1840static unsigned int
1790ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, 1841ip_vs_local_request4(void *priv, struct sk_buff *skb,
1791 const struct nf_hook_state *state) 1842 const struct nf_hook_state *state)
1792{ 1843{
1793 return ip_vs_in(ops->hooknum, skb, AF_INET); 1844 return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
1794} 1845}
1795 1846
1796#ifdef CONFIG_IP_VS_IPV6 1847#ifdef CONFIG_IP_VS_IPV6
@@ -1800,10 +1851,10 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
1800 * Schedule and forward packets from remote clients 1851 * Schedule and forward packets from remote clients
1801 */ 1852 */
1802static unsigned int 1853static unsigned int
1803ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, 1854ip_vs_remote_request6(void *priv, struct sk_buff *skb,
1804 const struct nf_hook_state *state) 1855 const struct nf_hook_state *state)
1805{ 1856{
1806 return ip_vs_in(ops->hooknum, skb, AF_INET6); 1857 return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
1807} 1858}
1808 1859
1809/* 1860/*
@@ -1811,10 +1862,10 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1811 * Schedule and forward packets from local clients 1862 * Schedule and forward packets from local clients
1812 */ 1863 */
1813static unsigned int 1864static unsigned int
1814ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, 1865ip_vs_local_request6(void *priv, struct sk_buff *skb,
1815 const struct nf_hook_state *state) 1866 const struct nf_hook_state *state)
1816{ 1867{
1817 return ip_vs_in(ops->hooknum, skb, AF_INET6); 1868 return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
1818} 1869}
1819 1870
1820#endif 1871#endif
@@ -1830,46 +1881,40 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1830 * and send them to ip_vs_in_icmp. 1881 * and send them to ip_vs_in_icmp.
1831 */ 1882 */
1832static unsigned int 1883static unsigned int
1833ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, 1884ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
1834 const struct nf_hook_state *state) 1885 const struct nf_hook_state *state)
1835{ 1886{
1836 int r; 1887 int r;
1837 struct net *net; 1888 struct netns_ipvs *ipvs = net_ipvs(state->net);
1838 struct netns_ipvs *ipvs;
1839 1889
1840 if (ip_hdr(skb)->protocol != IPPROTO_ICMP) 1890 if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
1841 return NF_ACCEPT; 1891 return NF_ACCEPT;
1842 1892
1843 /* ipvs enabled in this netns ? */ 1893 /* ipvs enabled in this netns ? */
1844 net = skb_net(skb);
1845 ipvs = net_ipvs(net);
1846 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) 1894 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
1847 return NF_ACCEPT; 1895 return NF_ACCEPT;
1848 1896
1849 return ip_vs_in_icmp(skb, &r, ops->hooknum); 1897 return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
1850} 1898}
1851 1899
1852#ifdef CONFIG_IP_VS_IPV6 1900#ifdef CONFIG_IP_VS_IPV6
1853static unsigned int 1901static unsigned int
1854ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, 1902ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
1855 const struct nf_hook_state *state) 1903 const struct nf_hook_state *state)
1856{ 1904{
1857 int r; 1905 int r;
1858 struct net *net; 1906 struct netns_ipvs *ipvs = net_ipvs(state->net);
1859 struct netns_ipvs *ipvs;
1860 struct ip_vs_iphdr iphdr; 1907 struct ip_vs_iphdr iphdr;
1861 1908
1862 ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); 1909 ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
1863 if (iphdr.protocol != IPPROTO_ICMPV6) 1910 if (iphdr.protocol != IPPROTO_ICMPV6)
1864 return NF_ACCEPT; 1911 return NF_ACCEPT;
1865 1912
1866 /* ipvs enabled in this netns ? */ 1913 /* ipvs enabled in this netns ? */
1867 net = skb_net(skb);
1868 ipvs = net_ipvs(net);
1869 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) 1914 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
1870 return NF_ACCEPT; 1915 return NF_ACCEPT;
1871 1916
1872 return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); 1917 return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
1873} 1918}
1874#endif 1919#endif
1875 1920
@@ -1878,7 +1923,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1878 /* After packet filtering, change source only for VS/NAT */ 1923 /* After packet filtering, change source only for VS/NAT */
1879 { 1924 {
1880 .hook = ip_vs_reply4, 1925 .hook = ip_vs_reply4,
1881 .owner = THIS_MODULE,
1882 .pf = NFPROTO_IPV4, 1926 .pf = NFPROTO_IPV4,
1883 .hooknum = NF_INET_LOCAL_IN, 1927 .hooknum = NF_INET_LOCAL_IN,
1884 .priority = NF_IP_PRI_NAT_SRC - 2, 1928 .priority = NF_IP_PRI_NAT_SRC - 2,
@@ -1888,7 +1932,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1888 * applied to IPVS. */ 1932 * applied to IPVS. */
1889 { 1933 {
1890 .hook = ip_vs_remote_request4, 1934 .hook = ip_vs_remote_request4,
1891 .owner = THIS_MODULE,
1892 .pf = NFPROTO_IPV4, 1935 .pf = NFPROTO_IPV4,
1893 .hooknum = NF_INET_LOCAL_IN, 1936 .hooknum = NF_INET_LOCAL_IN,
1894 .priority = NF_IP_PRI_NAT_SRC - 1, 1937 .priority = NF_IP_PRI_NAT_SRC - 1,
@@ -1896,7 +1939,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1896 /* Before ip_vs_in, change source only for VS/NAT */ 1939 /* Before ip_vs_in, change source only for VS/NAT */
1897 { 1940 {
1898 .hook = ip_vs_local_reply4, 1941 .hook = ip_vs_local_reply4,
1899 .owner = THIS_MODULE,
1900 .pf = NFPROTO_IPV4, 1942 .pf = NFPROTO_IPV4,
1901 .hooknum = NF_INET_LOCAL_OUT, 1943 .hooknum = NF_INET_LOCAL_OUT,
1902 .priority = NF_IP_PRI_NAT_DST + 1, 1944 .priority = NF_IP_PRI_NAT_DST + 1,
@@ -1904,7 +1946,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1904 /* After mangle, schedule and forward local requests */ 1946 /* After mangle, schedule and forward local requests */
1905 { 1947 {
1906 .hook = ip_vs_local_request4, 1948 .hook = ip_vs_local_request4,
1907 .owner = THIS_MODULE,
1908 .pf = NFPROTO_IPV4, 1949 .pf = NFPROTO_IPV4,
1909 .hooknum = NF_INET_LOCAL_OUT, 1950 .hooknum = NF_INET_LOCAL_OUT,
1910 .priority = NF_IP_PRI_NAT_DST + 2, 1951 .priority = NF_IP_PRI_NAT_DST + 2,
@@ -1913,7 +1954,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1913 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1954 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
1914 { 1955 {
1915 .hook = ip_vs_forward_icmp, 1956 .hook = ip_vs_forward_icmp,
1916 .owner = THIS_MODULE,
1917 .pf = NFPROTO_IPV4, 1957 .pf = NFPROTO_IPV4,
1918 .hooknum = NF_INET_FORWARD, 1958 .hooknum = NF_INET_FORWARD,
1919 .priority = 99, 1959 .priority = 99,
@@ -1921,7 +1961,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1921 /* After packet filtering, change source only for VS/NAT */ 1961 /* After packet filtering, change source only for VS/NAT */
1922 { 1962 {
1923 .hook = ip_vs_reply4, 1963 .hook = ip_vs_reply4,
1924 .owner = THIS_MODULE,
1925 .pf = NFPROTO_IPV4, 1964 .pf = NFPROTO_IPV4,
1926 .hooknum = NF_INET_FORWARD, 1965 .hooknum = NF_INET_FORWARD,
1927 .priority = 100, 1966 .priority = 100,
@@ -1930,7 +1969,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1930 /* After packet filtering, change source only for VS/NAT */ 1969 /* After packet filtering, change source only for VS/NAT */
1931 { 1970 {
1932 .hook = ip_vs_reply6, 1971 .hook = ip_vs_reply6,
1933 .owner = THIS_MODULE,
1934 .pf = NFPROTO_IPV6, 1972 .pf = NFPROTO_IPV6,
1935 .hooknum = NF_INET_LOCAL_IN, 1973 .hooknum = NF_INET_LOCAL_IN,
1936 .priority = NF_IP6_PRI_NAT_SRC - 2, 1974 .priority = NF_IP6_PRI_NAT_SRC - 2,
@@ -1940,7 +1978,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1940 * applied to IPVS. */ 1978 * applied to IPVS. */
1941 { 1979 {
1942 .hook = ip_vs_remote_request6, 1980 .hook = ip_vs_remote_request6,
1943 .owner = THIS_MODULE,
1944 .pf = NFPROTO_IPV6, 1981 .pf = NFPROTO_IPV6,
1945 .hooknum = NF_INET_LOCAL_IN, 1982 .hooknum = NF_INET_LOCAL_IN,
1946 .priority = NF_IP6_PRI_NAT_SRC - 1, 1983 .priority = NF_IP6_PRI_NAT_SRC - 1,
@@ -1948,7 +1985,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1948 /* Before ip_vs_in, change source only for VS/NAT */ 1985 /* Before ip_vs_in, change source only for VS/NAT */
1949 { 1986 {
1950 .hook = ip_vs_local_reply6, 1987 .hook = ip_vs_local_reply6,
1951 .owner = THIS_MODULE,
1952 .pf = NFPROTO_IPV6, 1988 .pf = NFPROTO_IPV6,
1953 .hooknum = NF_INET_LOCAL_OUT, 1989 .hooknum = NF_INET_LOCAL_OUT,
1954 .priority = NF_IP6_PRI_NAT_DST + 1, 1990 .priority = NF_IP6_PRI_NAT_DST + 1,
@@ -1956,7 +1992,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1956 /* After mangle, schedule and forward local requests */ 1992 /* After mangle, schedule and forward local requests */
1957 { 1993 {
1958 .hook = ip_vs_local_request6, 1994 .hook = ip_vs_local_request6,
1959 .owner = THIS_MODULE,
1960 .pf = NFPROTO_IPV6, 1995 .pf = NFPROTO_IPV6,
1961 .hooknum = NF_INET_LOCAL_OUT, 1996 .hooknum = NF_INET_LOCAL_OUT,
1962 .priority = NF_IP6_PRI_NAT_DST + 2, 1997 .priority = NF_IP6_PRI_NAT_DST + 2,
@@ -1965,7 +2000,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1965 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 2000 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
1966 { 2001 {
1967 .hook = ip_vs_forward_icmp_v6, 2002 .hook = ip_vs_forward_icmp_v6,
1968 .owner = THIS_MODULE,
1969 .pf = NFPROTO_IPV6, 2003 .pf = NFPROTO_IPV6,
1970 .hooknum = NF_INET_FORWARD, 2004 .hooknum = NF_INET_FORWARD,
1971 .priority = 99, 2005 .priority = 99,
@@ -1973,7 +2007,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1973 /* After packet filtering, change source only for VS/NAT */ 2007 /* After packet filtering, change source only for VS/NAT */
1974 { 2008 {
1975 .hook = ip_vs_reply6, 2009 .hook = ip_vs_reply6,
1976 .owner = THIS_MODULE,
1977 .pf = NFPROTO_IPV6, 2010 .pf = NFPROTO_IPV6,
1978 .hooknum = NF_INET_FORWARD, 2011 .hooknum = NF_INET_FORWARD,
1979 .priority = 100, 2012 .priority = 100,
@@ -1999,22 +2032,22 @@ static int __net_init __ip_vs_init(struct net *net)
1999 atomic_inc(&ipvs_netns_cnt); 2032 atomic_inc(&ipvs_netns_cnt);
2000 net->ipvs = ipvs; 2033 net->ipvs = ipvs;
2001 2034
2002 if (ip_vs_estimator_net_init(net) < 0) 2035 if (ip_vs_estimator_net_init(ipvs) < 0)
2003 goto estimator_fail; 2036 goto estimator_fail;
2004 2037
2005 if (ip_vs_control_net_init(net) < 0) 2038 if (ip_vs_control_net_init(ipvs) < 0)
2006 goto control_fail; 2039 goto control_fail;
2007 2040
2008 if (ip_vs_protocol_net_init(net) < 0) 2041 if (ip_vs_protocol_net_init(ipvs) < 0)
2009 goto protocol_fail; 2042 goto protocol_fail;
2010 2043
2011 if (ip_vs_app_net_init(net) < 0) 2044 if (ip_vs_app_net_init(ipvs) < 0)
2012 goto app_fail; 2045 goto app_fail;
2013 2046
2014 if (ip_vs_conn_net_init(net) < 0) 2047 if (ip_vs_conn_net_init(ipvs) < 0)
2015 goto conn_fail; 2048 goto conn_fail;
2016 2049
2017 if (ip_vs_sync_net_init(net) < 0) 2050 if (ip_vs_sync_net_init(ipvs) < 0)
2018 goto sync_fail; 2051 goto sync_fail;
2019 2052
2020 printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", 2053 printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
@@ -2025,15 +2058,15 @@ static int __net_init __ip_vs_init(struct net *net)
2025 */ 2058 */
2026 2059
2027sync_fail: 2060sync_fail:
2028 ip_vs_conn_net_cleanup(net); 2061 ip_vs_conn_net_cleanup(ipvs);
2029conn_fail: 2062conn_fail:
2030 ip_vs_app_net_cleanup(net); 2063 ip_vs_app_net_cleanup(ipvs);
2031app_fail: 2064app_fail:
2032 ip_vs_protocol_net_cleanup(net); 2065 ip_vs_protocol_net_cleanup(ipvs);
2033protocol_fail: 2066protocol_fail:
2034 ip_vs_control_net_cleanup(net); 2067 ip_vs_control_net_cleanup(ipvs);
2035control_fail: 2068control_fail:
2036 ip_vs_estimator_net_cleanup(net); 2069 ip_vs_estimator_net_cleanup(ipvs);
2037estimator_fail: 2070estimator_fail:
2038 net->ipvs = NULL; 2071 net->ipvs = NULL;
2039 return -ENOMEM; 2072 return -ENOMEM;
@@ -2041,22 +2074,25 @@ estimator_fail:
2041 2074
2042static void __net_exit __ip_vs_cleanup(struct net *net) 2075static void __net_exit __ip_vs_cleanup(struct net *net)
2043{ 2076{
2044 ip_vs_service_net_cleanup(net); /* ip_vs_flush() with locks */ 2077 struct netns_ipvs *ipvs = net_ipvs(net);
2045 ip_vs_conn_net_cleanup(net); 2078
2046 ip_vs_app_net_cleanup(net); 2079 ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
2047 ip_vs_protocol_net_cleanup(net); 2080 ip_vs_conn_net_cleanup(ipvs);
2048 ip_vs_control_net_cleanup(net); 2081 ip_vs_app_net_cleanup(ipvs);
2049 ip_vs_estimator_net_cleanup(net); 2082 ip_vs_protocol_net_cleanup(ipvs);
2050 IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); 2083 ip_vs_control_net_cleanup(ipvs);
2084 ip_vs_estimator_net_cleanup(ipvs);
2085 IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
2051 net->ipvs = NULL; 2086 net->ipvs = NULL;
2052} 2087}
2053 2088
2054static void __net_exit __ip_vs_dev_cleanup(struct net *net) 2089static void __net_exit __ip_vs_dev_cleanup(struct net *net)
2055{ 2090{
2091 struct netns_ipvs *ipvs = net_ipvs(net);
2056 EnterFunction(2); 2092 EnterFunction(2);
2057 net_ipvs(net)->enable = 0; /* Disable packet reception */ 2093 ipvs->enable = 0; /* Disable packet reception */
2058 smp_wmb(); 2094 smp_wmb();
2059 ip_vs_sync_net_cleanup(net); 2095 ip_vs_sync_net_cleanup(ipvs);
2060 LeaveFunction(2); 2096 LeaveFunction(2);
2061} 2097}
2062 2098
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1a23e91d50d8..e7c1b052c2a3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -228,7 +228,7 @@ static void defense_work_handler(struct work_struct *work)
228 228
229 update_defense_level(ipvs); 229 update_defense_level(ipvs);
230 if (atomic_read(&ipvs->dropentry)) 230 if (atomic_read(&ipvs->dropentry))
231 ip_vs_random_dropentry(ipvs->net); 231 ip_vs_random_dropentry(ipvs);
232 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 232 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
233} 233}
234#endif 234#endif
@@ -263,7 +263,7 @@ static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263 * Returns hash value for virtual service 263 * Returns hash value for virtual service
264 */ 264 */
265static inline unsigned int 265static inline unsigned int
266ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, 266ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
267 const union nf_inet_addr *addr, __be16 port) 267 const union nf_inet_addr *addr, __be16 port)
268{ 268{
269 register unsigned int porth = ntohs(port); 269 register unsigned int porth = ntohs(port);
@@ -276,7 +276,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
276 addr->ip6[2]^addr->ip6[3]; 276 addr->ip6[2]^addr->ip6[3];
277#endif 277#endif
278 ahash = ntohl(addr_fold); 278 ahash = ntohl(addr_fold);
279 ahash ^= ((size_t) net >> 8); 279 ahash ^= ((size_t) ipvs >> 8);
280 280
281 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 281 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
282 IP_VS_SVC_TAB_MASK; 282 IP_VS_SVC_TAB_MASK;
@@ -285,9 +285,9 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
285/* 285/*
286 * Returns hash value of fwmark for virtual service lookup 286 * Returns hash value of fwmark for virtual service lookup
287 */ 287 */
288static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) 288static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
289{ 289{
290 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 290 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
291} 291}
292 292
293/* 293/*
@@ -309,14 +309,14 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
309 /* 309 /*
310 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 310 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
311 */ 311 */
312 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, 312 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
313 &svc->addr, svc->port); 313 &svc->addr, svc->port);
314 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 314 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
315 } else { 315 } else {
316 /* 316 /*
317 * Hash it by fwmark in svc_fwm_table 317 * Hash it by fwmark in svc_fwm_table
318 */ 318 */
319 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); 319 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
320 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 320 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
321 } 321 }
322 322
@@ -357,21 +357,21 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
357 * Get service by {netns, proto,addr,port} in the service table. 357 * Get service by {netns, proto,addr,port} in the service table.
358 */ 358 */
359static inline struct ip_vs_service * 359static inline struct ip_vs_service *
360__ip_vs_service_find(struct net *net, int af, __u16 protocol, 360__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
361 const union nf_inet_addr *vaddr, __be16 vport) 361 const union nf_inet_addr *vaddr, __be16 vport)
362{ 362{
363 unsigned int hash; 363 unsigned int hash;
364 struct ip_vs_service *svc; 364 struct ip_vs_service *svc;
365 365
366 /* Check for "full" addressed entries */ 366 /* Check for "full" addressed entries */
367 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); 367 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
368 368
369 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 369 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
370 if ((svc->af == af) 370 if ((svc->af == af)
371 && ip_vs_addr_equal(af, &svc->addr, vaddr) 371 && ip_vs_addr_equal(af, &svc->addr, vaddr)
372 && (svc->port == vport) 372 && (svc->port == vport)
373 && (svc->protocol == protocol) 373 && (svc->protocol == protocol)
374 && net_eq(svc->net, net)) { 374 && (svc->ipvs == ipvs)) {
375 /* HIT */ 375 /* HIT */
376 return svc; 376 return svc;
377 } 377 }
@@ -385,17 +385,17 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
385 * Get service by {fwmark} in the service table. 385 * Get service by {fwmark} in the service table.
386 */ 386 */
387static inline struct ip_vs_service * 387static inline struct ip_vs_service *
388__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) 388__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
389{ 389{
390 unsigned int hash; 390 unsigned int hash;
391 struct ip_vs_service *svc; 391 struct ip_vs_service *svc;
392 392
393 /* Check for fwmark addressed entries */ 393 /* Check for fwmark addressed entries */
394 hash = ip_vs_svc_fwm_hashkey(net, fwmark); 394 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
395 395
396 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 396 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
397 if (svc->fwmark == fwmark && svc->af == af 397 if (svc->fwmark == fwmark && svc->af == af
398 && net_eq(svc->net, net)) { 398 && (svc->ipvs == ipvs)) {
399 /* HIT */ 399 /* HIT */
400 return svc; 400 return svc;
401 } 401 }
@@ -406,17 +406,16 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
406 406
407/* Find service, called under RCU lock */ 407/* Find service, called under RCU lock */
408struct ip_vs_service * 408struct ip_vs_service *
409ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, 409ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
410 const union nf_inet_addr *vaddr, __be16 vport) 410 const union nf_inet_addr *vaddr, __be16 vport)
411{ 411{
412 struct ip_vs_service *svc; 412 struct ip_vs_service *svc;
413 struct netns_ipvs *ipvs = net_ipvs(net);
414 413
415 /* 414 /*
416 * Check the table hashed by fwmark first 415 * Check the table hashed by fwmark first
417 */ 416 */
418 if (fwmark) { 417 if (fwmark) {
419 svc = __ip_vs_svc_fwm_find(net, af, fwmark); 418 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
420 if (svc) 419 if (svc)
421 goto out; 420 goto out;
422 } 421 }
@@ -425,7 +424,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
425 * Check the table hashed by <protocol,addr,port> 424 * Check the table hashed by <protocol,addr,port>
426 * for "full" addressed entries 425 * for "full" addressed entries
427 */ 426 */
428 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); 427 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
429 428
430 if (svc == NULL 429 if (svc == NULL
431 && protocol == IPPROTO_TCP 430 && protocol == IPPROTO_TCP
@@ -435,7 +434,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
435 * Check if ftp service entry exists, the packet 434 * Check if ftp service entry exists, the packet
436 * might belong to FTP data connections. 435 * might belong to FTP data connections.
437 */ 436 */
438 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); 437 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
439 } 438 }
440 439
441 if (svc == NULL 440 if (svc == NULL
@@ -443,7 +442,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
443 /* 442 /*
444 * Check if the catch-all port (port zero) exists 443 * Check if the catch-all port (port zero) exists
445 */ 444 */
446 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); 445 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
447 } 446 }
448 447
449 out: 448 out:
@@ -543,10 +542,9 @@ static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
543} 542}
544 543
545/* Check if real service by <proto,addr,port> is present */ 544/* Check if real service by <proto,addr,port> is present */
546bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, 545bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
547 const union nf_inet_addr *daddr, __be16 dport) 546 const union nf_inet_addr *daddr, __be16 dport)
548{ 547{
549 struct netns_ipvs *ipvs = net_ipvs(net);
550 unsigned int hash; 548 unsigned int hash;
551 struct ip_vs_dest *dest; 549 struct ip_vs_dest *dest;
552 550
@@ -601,7 +599,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
601 * on the backup. 599 * on the backup.
602 * Called under RCU lock, no refcnt is returned. 600 * Called under RCU lock, no refcnt is returned.
603 */ 601 */
604struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af, 602struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
605 const union nf_inet_addr *daddr, 603 const union nf_inet_addr *daddr,
606 __be16 dport, 604 __be16 dport,
607 const union nf_inet_addr *vaddr, 605 const union nf_inet_addr *vaddr,
@@ -612,7 +610,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
612 struct ip_vs_service *svc; 610 struct ip_vs_service *svc;
613 __be16 port = dport; 611 __be16 port = dport;
614 612
615 svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport); 613 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
616 if (!svc) 614 if (!svc)
617 return NULL; 615 return NULL;
618 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 616 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -660,7 +658,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
660 const union nf_inet_addr *daddr, __be16 dport) 658 const union nf_inet_addr *daddr, __be16 dport)
661{ 659{
662 struct ip_vs_dest *dest; 660 struct ip_vs_dest *dest;
663 struct netns_ipvs *ipvs = net_ipvs(svc->net); 661 struct netns_ipvs *ipvs = svc->ipvs;
664 662
665 /* 663 /*
666 * Find the destination in trash 664 * Find the destination in trash
@@ -715,10 +713,9 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest)
715 * are expired, and the refcnt of each destination in the trash must 713 * are expired, and the refcnt of each destination in the trash must
716 * be 0, so we simply release them here. 714 * be 0, so we simply release them here.
717 */ 715 */
718static void ip_vs_trash_cleanup(struct net *net) 716static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
719{ 717{
720 struct ip_vs_dest *dest, *nxt; 718 struct ip_vs_dest *dest, *nxt;
721 struct netns_ipvs *ipvs = net_ipvs(net);
722 719
723 del_timer_sync(&ipvs->dest_trash_timer); 720 del_timer_sync(&ipvs->dest_trash_timer);
724 /* No need to use dest_trash_lock */ 721 /* No need to use dest_trash_lock */
@@ -788,7 +785,7 @@ static void
788__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 785__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
789 struct ip_vs_dest_user_kern *udest, int add) 786 struct ip_vs_dest_user_kern *udest, int add)
790{ 787{
791 struct netns_ipvs *ipvs = net_ipvs(svc->net); 788 struct netns_ipvs *ipvs = svc->ipvs;
792 struct ip_vs_service *old_svc; 789 struct ip_vs_service *old_svc;
793 struct ip_vs_scheduler *sched; 790 struct ip_vs_scheduler *sched;
794 int conn_flags; 791 int conn_flags;
@@ -843,7 +840,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
843 spin_unlock_bh(&dest->dst_lock); 840 spin_unlock_bh(&dest->dst_lock);
844 841
845 if (add) { 842 if (add) {
846 ip_vs_start_estimator(svc->net, &dest->stats); 843 ip_vs_start_estimator(svc->ipvs, &dest->stats);
847 list_add_rcu(&dest->n_list, &svc->destinations); 844 list_add_rcu(&dest->n_list, &svc->destinations);
848 svc->num_dests++; 845 svc->num_dests++;
849 sched = rcu_dereference_protected(svc->scheduler, 1); 846 sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -874,12 +871,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
874 atype = ipv6_addr_type(&udest->addr.in6); 871 atype = ipv6_addr_type(&udest->addr.in6);
875 if ((!(atype & IPV6_ADDR_UNICAST) || 872 if ((!(atype & IPV6_ADDR_UNICAST) ||
876 atype & IPV6_ADDR_LINKLOCAL) && 873 atype & IPV6_ADDR_LINKLOCAL) &&
877 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) 874 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
878 return -EINVAL; 875 return -EINVAL;
879 } else 876 } else
880#endif 877#endif
881 { 878 {
882 atype = inet_addr_type(svc->net, udest->addr.ip); 879 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
883 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 880 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
884 return -EINVAL; 881 return -EINVAL;
885 } 882 }
@@ -1036,12 +1033,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1036/* 1033/*
1037 * Delete a destination (must be already unlinked from the service) 1034 * Delete a destination (must be already unlinked from the service)
1038 */ 1035 */
1039static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, 1036static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
1040 bool cleanup) 1037 bool cleanup)
1041{ 1038{
1042 struct netns_ipvs *ipvs = net_ipvs(net); 1039 ip_vs_stop_estimator(ipvs, &dest->stats);
1043
1044 ip_vs_stop_estimator(net, &dest->stats);
1045 1040
1046 /* 1041 /*
1047 * Remove it from the d-linked list with the real services. 1042 * Remove it from the d-linked list with the real services.
@@ -1079,7 +1074,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1079 svc->num_dests--; 1074 svc->num_dests--;
1080 1075
1081 if (dest->af != svc->af) 1076 if (dest->af != svc->af)
1082 net_ipvs(svc->net)->mixed_address_family_dests--; 1077 svc->ipvs->mixed_address_family_dests--;
1083 1078
1084 if (svcupd) { 1079 if (svcupd) {
1085 struct ip_vs_scheduler *sched; 1080 struct ip_vs_scheduler *sched;
@@ -1120,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1120 /* 1115 /*
1121 * Delete the destination 1116 * Delete the destination
1122 */ 1117 */
1123 __ip_vs_del_dest(svc->net, dest, false); 1118 __ip_vs_del_dest(svc->ipvs, dest, false);
1124 1119
1125 LeaveFunction(2); 1120 LeaveFunction(2);
1126 1121
@@ -1129,8 +1124,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1129 1124
1130static void ip_vs_dest_trash_expire(unsigned long data) 1125static void ip_vs_dest_trash_expire(unsigned long data)
1131{ 1126{
1132 struct net *net = (struct net *) data; 1127 struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
1133 struct netns_ipvs *ipvs = net_ipvs(net);
1134 struct ip_vs_dest *dest, *next; 1128 struct ip_vs_dest *dest, *next;
1135 unsigned long now = jiffies; 1129 unsigned long now = jiffies;
1136 1130
@@ -1163,14 +1157,13 @@ static void ip_vs_dest_trash_expire(unsigned long data)
1163 * Add a service into the service hash table 1157 * Add a service into the service hash table
1164 */ 1158 */
1165static int 1159static int
1166ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, 1160ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1167 struct ip_vs_service **svc_p) 1161 struct ip_vs_service **svc_p)
1168{ 1162{
1169 int ret = 0, i; 1163 int ret = 0, i;
1170 struct ip_vs_scheduler *sched = NULL; 1164 struct ip_vs_scheduler *sched = NULL;
1171 struct ip_vs_pe *pe = NULL; 1165 struct ip_vs_pe *pe = NULL;
1172 struct ip_vs_service *svc = NULL; 1166 struct ip_vs_service *svc = NULL;
1173 struct netns_ipvs *ipvs = net_ipvs(net);
1174 1167
1175 /* increase the module use count */ 1168 /* increase the module use count */
1176 ip_vs_use_count_inc(); 1169 ip_vs_use_count_inc();
@@ -1237,7 +1230,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1237 svc->flags = u->flags; 1230 svc->flags = u->flags;
1238 svc->timeout = u->timeout * HZ; 1231 svc->timeout = u->timeout * HZ;
1239 svc->netmask = u->netmask; 1232 svc->netmask = u->netmask;
1240 svc->net = net; 1233 svc->ipvs = ipvs;
1241 1234
1242 INIT_LIST_HEAD(&svc->destinations); 1235 INIT_LIST_HEAD(&svc->destinations);
1243 spin_lock_init(&svc->sched_lock); 1236 spin_lock_init(&svc->sched_lock);
@@ -1261,7 +1254,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1261 else if (svc->port == 0) 1254 else if (svc->port == 0)
1262 atomic_inc(&ipvs->nullsvc_counter); 1255 atomic_inc(&ipvs->nullsvc_counter);
1263 1256
1264 ip_vs_start_estimator(net, &svc->stats); 1257 ip_vs_start_estimator(ipvs, &svc->stats);
1265 1258
1266 /* Count only IPv4 services for old get/setsockopt interface */ 1259 /* Count only IPv4 services for old get/setsockopt interface */
1267 if (svc->af == AF_INET) 1260 if (svc->af == AF_INET)
@@ -1381,7 +1374,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1381 struct ip_vs_dest *dest, *nxt; 1374 struct ip_vs_dest *dest, *nxt;
1382 struct ip_vs_scheduler *old_sched; 1375 struct ip_vs_scheduler *old_sched;
1383 struct ip_vs_pe *old_pe; 1376 struct ip_vs_pe *old_pe;
1384 struct netns_ipvs *ipvs = net_ipvs(svc->net); 1377 struct netns_ipvs *ipvs = svc->ipvs;
1385 1378
1386 pr_info("%s: enter\n", __func__); 1379 pr_info("%s: enter\n", __func__);
1387 1380
@@ -1389,7 +1382,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1389 if (svc->af == AF_INET) 1382 if (svc->af == AF_INET)
1390 ipvs->num_services--; 1383 ipvs->num_services--;
1391 1384
1392 ip_vs_stop_estimator(svc->net, &svc->stats); 1385 ip_vs_stop_estimator(svc->ipvs, &svc->stats);
1393 1386
1394 /* Unbind scheduler */ 1387 /* Unbind scheduler */
1395 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1388 old_sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -1405,7 +1398,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1405 */ 1398 */
1406 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1399 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1407 __ip_vs_unlink_dest(svc, dest, 0); 1400 __ip_vs_unlink_dest(svc, dest, 0);
1408 __ip_vs_del_dest(svc->net, dest, cleanup); 1401 __ip_vs_del_dest(svc->ipvs, dest, cleanup);
1409 } 1402 }
1410 1403
1411 /* 1404 /*
@@ -1456,7 +1449,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1456/* 1449/*
1457 * Flush all the virtual services 1450 * Flush all the virtual services
1458 */ 1451 */
1459static int ip_vs_flush(struct net *net, bool cleanup) 1452static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
1460{ 1453{
1461 int idx; 1454 int idx;
1462 struct ip_vs_service *svc; 1455 struct ip_vs_service *svc;
@@ -1468,7 +1461,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
1468 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1461 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1469 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1462 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1470 s_list) { 1463 s_list) {
1471 if (net_eq(svc->net, net)) 1464 if (svc->ipvs == ipvs)
1472 ip_vs_unlink_service(svc, cleanup); 1465 ip_vs_unlink_service(svc, cleanup);
1473 } 1466 }
1474 } 1467 }
@@ -1479,7 +1472,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
1479 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1472 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1480 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1473 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1481 f_list) { 1474 f_list) {
1482 if (net_eq(svc->net, net)) 1475 if (svc->ipvs == ipvs)
1483 ip_vs_unlink_service(svc, cleanup); 1476 ip_vs_unlink_service(svc, cleanup);
1484 } 1477 }
1485 } 1478 }
@@ -1491,12 +1484,12 @@ static int ip_vs_flush(struct net *net, bool cleanup)
1491 * Delete service by {netns} in the service table. 1484 * Delete service by {netns} in the service table.
1492 * Called by __ip_vs_cleanup() 1485 * Called by __ip_vs_cleanup()
1493 */ 1486 */
1494void ip_vs_service_net_cleanup(struct net *net) 1487void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
1495{ 1488{
1496 EnterFunction(2); 1489 EnterFunction(2);
1497 /* Check for "full" addressed entries */ 1490 /* Check for "full" addressed entries */
1498 mutex_lock(&__ip_vs_mutex); 1491 mutex_lock(&__ip_vs_mutex);
1499 ip_vs_flush(net, true); 1492 ip_vs_flush(ipvs, true);
1500 mutex_unlock(&__ip_vs_mutex); 1493 mutex_unlock(&__ip_vs_mutex);
1501 LeaveFunction(2); 1494 LeaveFunction(2);
1502} 1495}
@@ -1540,7 +1533,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1540 mutex_lock(&__ip_vs_mutex); 1533 mutex_lock(&__ip_vs_mutex);
1541 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1534 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1542 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1535 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1543 if (net_eq(svc->net, net)) { 1536 if (svc->ipvs == ipvs) {
1544 list_for_each_entry(dest, &svc->destinations, 1537 list_for_each_entry(dest, &svc->destinations,
1545 n_list) { 1538 n_list) {
1546 ip_vs_forget_dev(dest, dev); 1539 ip_vs_forget_dev(dest, dev);
@@ -1549,7 +1542,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1549 } 1542 }
1550 1543
1551 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1544 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1552 if (net_eq(svc->net, net)) { 1545 if (svc->ipvs == ipvs) {
1553 list_for_each_entry(dest, &svc->destinations, 1546 list_for_each_entry(dest, &svc->destinations,
1554 n_list) { 1547 n_list) {
1555 ip_vs_forget_dev(dest, dev); 1548 ip_vs_forget_dev(dest, dev);
@@ -1583,26 +1576,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
1583 return 0; 1576 return 0;
1584} 1577}
1585 1578
1586static int ip_vs_zero_all(struct net *net) 1579static int ip_vs_zero_all(struct netns_ipvs *ipvs)
1587{ 1580{
1588 int idx; 1581 int idx;
1589 struct ip_vs_service *svc; 1582 struct ip_vs_service *svc;
1590 1583
1591 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1584 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1592 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1585 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1593 if (net_eq(svc->net, net)) 1586 if (svc->ipvs == ipvs)
1594 ip_vs_zero_service(svc); 1587 ip_vs_zero_service(svc);
1595 } 1588 }
1596 } 1589 }
1597 1590
1598 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1591 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1599 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1592 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1600 if (net_eq(svc->net, net)) 1593 if (svc->ipvs == ipvs)
1601 ip_vs_zero_service(svc); 1594 ip_vs_zero_service(svc);
1602 } 1595 }
1603 } 1596 }
1604 1597
1605 ip_vs_zero_stats(&net_ipvs(net)->tot_stats); 1598 ip_vs_zero_stats(&ipvs->tot_stats);
1606 return 0; 1599 return 0;
1607} 1600}
1608 1601
@@ -1615,7 +1608,7 @@ static int
1615proc_do_defense_mode(struct ctl_table *table, int write, 1608proc_do_defense_mode(struct ctl_table *table, int write,
1616 void __user *buffer, size_t *lenp, loff_t *ppos) 1609 void __user *buffer, size_t *lenp, loff_t *ppos)
1617{ 1610{
1618 struct net *net = current->nsproxy->net_ns; 1611 struct netns_ipvs *ipvs = table->extra2;
1619 int *valp = table->data; 1612 int *valp = table->data;
1620 int val = *valp; 1613 int val = *valp;
1621 int rc; 1614 int rc;
@@ -1626,7 +1619,7 @@ proc_do_defense_mode(struct ctl_table *table, int write,
1626 /* Restore the correct value */ 1619 /* Restore the correct value */
1627 *valp = val; 1620 *valp = val;
1628 } else { 1621 } else {
1629 update_defense_level(net_ipvs(net)); 1622 update_defense_level(ipvs);
1630 } 1623 }
1631 } 1624 }
1632 return rc; 1625 return rc;
@@ -1844,6 +1837,18 @@ static struct ctl_table vs_vars[] = {
1844 .mode = 0644, 1837 .mode = 0644,
1845 .proc_handler = proc_dointvec, 1838 .proc_handler = proc_dointvec,
1846 }, 1839 },
1840 {
1841 .procname = "schedule_icmp",
1842 .maxlen = sizeof(int),
1843 .mode = 0644,
1844 .proc_handler = proc_dointvec,
1845 },
1846 {
1847 .procname = "ignore_tunneled",
1848 .maxlen = sizeof(int),
1849 .mode = 0644,
1850 .proc_handler = proc_dointvec,
1851 },
1847#ifdef CONFIG_IP_VS_DEBUG 1852#ifdef CONFIG_IP_VS_DEBUG
1848 { 1853 {
1849 .procname = "debug_level", 1854 .procname = "debug_level",
@@ -1889,6 +1894,7 @@ static inline const char *ip_vs_fwd_name(unsigned int flags)
1889static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1894static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1890{ 1895{
1891 struct net *net = seq_file_net(seq); 1896 struct net *net = seq_file_net(seq);
1897 struct netns_ipvs *ipvs = net_ipvs(net);
1892 struct ip_vs_iter *iter = seq->private; 1898 struct ip_vs_iter *iter = seq->private;
1893 int idx; 1899 int idx;
1894 struct ip_vs_service *svc; 1900 struct ip_vs_service *svc;
@@ -1896,7 +1902,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1896 /* look in hash by protocol */ 1902 /* look in hash by protocol */
1897 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1903 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1898 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 1904 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
1899 if (net_eq(svc->net, net) && pos-- == 0) { 1905 if ((svc->ipvs == ipvs) && pos-- == 0) {
1900 iter->table = ip_vs_svc_table; 1906 iter->table = ip_vs_svc_table;
1901 iter->bucket = idx; 1907 iter->bucket = idx;
1902 return svc; 1908 return svc;
@@ -1908,7 +1914,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1908 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1914 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1909 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 1915 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
1910 f_list) { 1916 f_list) {
1911 if (net_eq(svc->net, net) && pos-- == 0) { 1917 if ((svc->ipvs == ipvs) && pos-- == 0) {
1912 iter->table = ip_vs_svc_fwm_table; 1918 iter->table = ip_vs_svc_fwm_table;
1913 iter->bucket = idx; 1919 iter->bucket = idx;
1914 return svc; 1920 return svc;
@@ -2196,7 +2202,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = {
2196/* 2202/*
2197 * Set timeout values for tcp tcpfin udp in the timeout_table. 2203 * Set timeout values for tcp tcpfin udp in the timeout_table.
2198 */ 2204 */
2199static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) 2205static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2200{ 2206{
2201#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2207#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2202 struct ip_vs_proto_data *pd; 2208 struct ip_vs_proto_data *pd;
@@ -2209,13 +2215,13 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2209 2215
2210#ifdef CONFIG_IP_VS_PROTO_TCP 2216#ifdef CONFIG_IP_VS_PROTO_TCP
2211 if (u->tcp_timeout) { 2217 if (u->tcp_timeout) {
2212 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2218 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2213 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2219 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2214 = u->tcp_timeout * HZ; 2220 = u->tcp_timeout * HZ;
2215 } 2221 }
2216 2222
2217 if (u->tcp_fin_timeout) { 2223 if (u->tcp_fin_timeout) {
2218 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2224 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2219 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2225 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2220 = u->tcp_fin_timeout * HZ; 2226 = u->tcp_fin_timeout * HZ;
2221 } 2227 }
@@ -2223,7 +2229,7 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2223 2229
2224#ifdef CONFIG_IP_VS_PROTO_UDP 2230#ifdef CONFIG_IP_VS_PROTO_UDP
2225 if (u->udp_timeout) { 2231 if (u->udp_timeout) {
2226 pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 2232 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2227 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2233 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2228 = u->udp_timeout * HZ; 2234 = u->udp_timeout * HZ;
2229 } 2235 }
@@ -2344,12 +2350,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2344 cfg.syncid = dm->syncid; 2350 cfg.syncid = dm->syncid;
2345 rtnl_lock(); 2351 rtnl_lock();
2346 mutex_lock(&ipvs->sync_mutex); 2352 mutex_lock(&ipvs->sync_mutex);
2347 ret = start_sync_thread(net, &cfg, dm->state); 2353 ret = start_sync_thread(ipvs, &cfg, dm->state);
2348 mutex_unlock(&ipvs->sync_mutex); 2354 mutex_unlock(&ipvs->sync_mutex);
2349 rtnl_unlock(); 2355 rtnl_unlock();
2350 } else { 2356 } else {
2351 mutex_lock(&ipvs->sync_mutex); 2357 mutex_lock(&ipvs->sync_mutex);
2352 ret = stop_sync_thread(net, dm->state); 2358 ret = stop_sync_thread(ipvs, dm->state);
2353 mutex_unlock(&ipvs->sync_mutex); 2359 mutex_unlock(&ipvs->sync_mutex);
2354 } 2360 }
2355 goto out_dec; 2361 goto out_dec;
@@ -2358,11 +2364,11 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2358 mutex_lock(&__ip_vs_mutex); 2364 mutex_lock(&__ip_vs_mutex);
2359 if (cmd == IP_VS_SO_SET_FLUSH) { 2365 if (cmd == IP_VS_SO_SET_FLUSH) {
2360 /* Flush the virtual service */ 2366 /* Flush the virtual service */
2361 ret = ip_vs_flush(net, false); 2367 ret = ip_vs_flush(ipvs, false);
2362 goto out_unlock; 2368 goto out_unlock;
2363 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2369 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2364 /* Set timeout values for (tcp tcpfin udp) */ 2370 /* Set timeout values for (tcp tcpfin udp) */
2365 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); 2371 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
2366 goto out_unlock; 2372 goto out_unlock;
2367 } 2373 }
2368 2374
@@ -2377,7 +2383,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2377 if (cmd == IP_VS_SO_SET_ZERO) { 2383 if (cmd == IP_VS_SO_SET_ZERO) {
2378 /* if no service address is set, zero counters in all */ 2384 /* if no service address is set, zero counters in all */
2379 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2385 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2380 ret = ip_vs_zero_all(net); 2386 ret = ip_vs_zero_all(ipvs);
2381 goto out_unlock; 2387 goto out_unlock;
2382 } 2388 }
2383 } 2389 }
@@ -2395,10 +2401,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2395 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2401 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2396 rcu_read_lock(); 2402 rcu_read_lock();
2397 if (usvc.fwmark == 0) 2403 if (usvc.fwmark == 0)
2398 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, 2404 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
2399 &usvc.addr, usvc.port); 2405 &usvc.addr, usvc.port);
2400 else 2406 else
2401 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); 2407 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
2402 rcu_read_unlock(); 2408 rcu_read_unlock();
2403 2409
2404 if (cmd != IP_VS_SO_SET_ADD 2410 if (cmd != IP_VS_SO_SET_ADD
@@ -2412,7 +2418,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2412 if (svc != NULL) 2418 if (svc != NULL)
2413 ret = -EEXIST; 2419 ret = -EEXIST;
2414 else 2420 else
2415 ret = ip_vs_add_service(net, &usvc, &svc); 2421 ret = ip_vs_add_service(ipvs, &usvc, &svc);
2416 break; 2422 break;
2417 case IP_VS_SO_SET_EDIT: 2423 case IP_VS_SO_SET_EDIT:
2418 ret = ip_vs_edit_service(svc, &usvc); 2424 ret = ip_vs_edit_service(svc, &usvc);
@@ -2471,7 +2477,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2471} 2477}
2472 2478
2473static inline int 2479static inline int
2474__ip_vs_get_service_entries(struct net *net, 2480__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
2475 const struct ip_vs_get_services *get, 2481 const struct ip_vs_get_services *get,
2476 struct ip_vs_get_services __user *uptr) 2482 struct ip_vs_get_services __user *uptr)
2477{ 2483{
@@ -2483,7 +2489,7 @@ __ip_vs_get_service_entries(struct net *net,
2483 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2489 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2484 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2490 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2485 /* Only expose IPv4 entries to old interface */ 2491 /* Only expose IPv4 entries to old interface */
2486 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2492 if (svc->af != AF_INET || (svc->ipvs != ipvs))
2487 continue; 2493 continue;
2488 2494
2489 if (count >= get->num_services) 2495 if (count >= get->num_services)
@@ -2502,7 +2508,7 @@ __ip_vs_get_service_entries(struct net *net,
2502 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2508 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2503 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2509 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2504 /* Only expose IPv4 entries to old interface */ 2510 /* Only expose IPv4 entries to old interface */
2505 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2511 if (svc->af != AF_INET || (svc->ipvs != ipvs))
2506 continue; 2512 continue;
2507 2513
2508 if (count >= get->num_services) 2514 if (count >= get->num_services)
@@ -2522,7 +2528,7 @@ out:
2522} 2528}
2523 2529
2524static inline int 2530static inline int
2525__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, 2531__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
2526 struct ip_vs_get_dests __user *uptr) 2532 struct ip_vs_get_dests __user *uptr)
2527{ 2533{
2528 struct ip_vs_service *svc; 2534 struct ip_vs_service *svc;
@@ -2531,9 +2537,9 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2531 2537
2532 rcu_read_lock(); 2538 rcu_read_lock();
2533 if (get->fwmark) 2539 if (get->fwmark)
2534 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); 2540 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
2535 else 2541 else
2536 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, 2542 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
2537 get->port); 2543 get->port);
2538 rcu_read_unlock(); 2544 rcu_read_unlock();
2539 2545
@@ -2578,7 +2584,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2578} 2584}
2579 2585
2580static inline void 2586static inline void
2581__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) 2587__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2582{ 2588{
2583#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2589#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2584 struct ip_vs_proto_data *pd; 2590 struct ip_vs_proto_data *pd;
@@ -2587,12 +2593,12 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2587 memset(u, 0, sizeof (*u)); 2593 memset(u, 0, sizeof (*u));
2588 2594
2589#ifdef CONFIG_IP_VS_PROTO_TCP 2595#ifdef CONFIG_IP_VS_PROTO_TCP
2590 pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2596 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2591 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2597 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2592 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2598 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2593#endif 2599#endif
2594#ifdef CONFIG_IP_VS_PROTO_UDP 2600#ifdef CONFIG_IP_VS_PROTO_UDP
2595 pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 2601 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2596 u->udp_timeout = 2602 u->udp_timeout =
2597 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2603 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2598#endif 2604#endif
@@ -2711,7 +2717,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2711 ret = -EINVAL; 2717 ret = -EINVAL;
2712 goto out; 2718 goto out;
2713 } 2719 }
2714 ret = __ip_vs_get_service_entries(net, get, user); 2720 ret = __ip_vs_get_service_entries(ipvs, get, user);
2715 } 2721 }
2716 break; 2722 break;
2717 2723
@@ -2725,9 +2731,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2725 addr.ip = entry->addr; 2731 addr.ip = entry->addr;
2726 rcu_read_lock(); 2732 rcu_read_lock();
2727 if (entry->fwmark) 2733 if (entry->fwmark)
2728 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); 2734 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
2729 else 2735 else
2730 svc = __ip_vs_service_find(net, AF_INET, 2736 svc = __ip_vs_service_find(ipvs, AF_INET,
2731 entry->protocol, &addr, 2737 entry->protocol, &addr,
2732 entry->port); 2738 entry->port);
2733 rcu_read_unlock(); 2739 rcu_read_unlock();
@@ -2753,7 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2753 ret = -EINVAL; 2759 ret = -EINVAL;
2754 goto out; 2760 goto out;
2755 } 2761 }
2756 ret = __ip_vs_get_dest_entries(net, get, user); 2762 ret = __ip_vs_get_dest_entries(ipvs, get, user);
2757 } 2763 }
2758 break; 2764 break;
2759 2765
@@ -2761,7 +2767,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2761 { 2767 {
2762 struct ip_vs_timeout_user t; 2768 struct ip_vs_timeout_user t;
2763 2769
2764 __ip_vs_get_timeouts(net, &t); 2770 __ip_vs_get_timeouts(ipvs, &t);
2765 if (copy_to_user(user, &t, sizeof(t)) != 0) 2771 if (copy_to_user(user, &t, sizeof(t)) != 0)
2766 ret = -EFAULT; 2772 ret = -EFAULT;
2767 } 2773 }
@@ -2996,12 +3002,13 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2996 int idx = 0, i; 3002 int idx = 0, i;
2997 int start = cb->args[0]; 3003 int start = cb->args[0];
2998 struct ip_vs_service *svc; 3004 struct ip_vs_service *svc;
2999 struct net *net = skb_sknet(skb); 3005 struct net *net = sock_net(skb->sk);
3006 struct netns_ipvs *ipvs = net_ipvs(net);
3000 3007
3001 mutex_lock(&__ip_vs_mutex); 3008 mutex_lock(&__ip_vs_mutex);
3002 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3009 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3003 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 3010 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
3004 if (++idx <= start || !net_eq(svc->net, net)) 3011 if (++idx <= start || (svc->ipvs != ipvs))
3005 continue; 3012 continue;
3006 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3013 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3007 idx--; 3014 idx--;
@@ -3012,7 +3019,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
3012 3019
3013 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3020 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3014 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 3021 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
3015 if (++idx <= start || !net_eq(svc->net, net)) 3022 if (++idx <= start || (svc->ipvs != ipvs))
3016 continue; 3023 continue;
3017 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3024 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3018 idx--; 3025 idx--;
@@ -3028,7 +3035,7 @@ nla_put_failure:
3028 return skb->len; 3035 return skb->len;
3029} 3036}
3030 3037
3031static int ip_vs_genl_parse_service(struct net *net, 3038static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
3032 struct ip_vs_service_user_kern *usvc, 3039 struct ip_vs_service_user_kern *usvc,
3033 struct nlattr *nla, int full_entry, 3040 struct nlattr *nla, int full_entry,
3034 struct ip_vs_service **ret_svc) 3041 struct ip_vs_service **ret_svc)
@@ -3073,9 +3080,9 @@ static int ip_vs_genl_parse_service(struct net *net,
3073 3080
3074 rcu_read_lock(); 3081 rcu_read_lock();
3075 if (usvc->fwmark) 3082 if (usvc->fwmark)
3076 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); 3083 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
3077 else 3084 else
3078 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, 3085 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
3079 &usvc->addr, usvc->port); 3086 &usvc->addr, usvc->port);
3080 rcu_read_unlock(); 3087 rcu_read_unlock();
3081 *ret_svc = svc; 3088 *ret_svc = svc;
@@ -3113,14 +3120,14 @@ static int ip_vs_genl_parse_service(struct net *net,
3113 return 0; 3120 return 0;
3114} 3121}
3115 3122
3116static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, 3123static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
3117 struct nlattr *nla) 3124 struct nlattr *nla)
3118{ 3125{
3119 struct ip_vs_service_user_kern usvc; 3126 struct ip_vs_service_user_kern usvc;
3120 struct ip_vs_service *svc; 3127 struct ip_vs_service *svc;
3121 int ret; 3128 int ret;
3122 3129
3123 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); 3130 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
3124 return ret ? ERR_PTR(ret) : svc; 3131 return ret ? ERR_PTR(ret) : svc;
3125} 3132}
3126 3133
@@ -3195,7 +3202,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3195 struct ip_vs_service *svc; 3202 struct ip_vs_service *svc;
3196 struct ip_vs_dest *dest; 3203 struct ip_vs_dest *dest;
3197 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3204 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3198 struct net *net = skb_sknet(skb); 3205 struct net *net = sock_net(skb->sk);
3206 struct netns_ipvs *ipvs = net_ipvs(net);
3199 3207
3200 mutex_lock(&__ip_vs_mutex); 3208 mutex_lock(&__ip_vs_mutex);
3201 3209
@@ -3205,7 +3213,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3205 goto out_err; 3213 goto out_err;
3206 3214
3207 3215
3208 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); 3216 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
3209 if (IS_ERR(svc) || svc == NULL) 3217 if (IS_ERR(svc) || svc == NULL)
3210 goto out_err; 3218 goto out_err;
3211 3219
@@ -3341,7 +3349,7 @@ nla_put_failure:
3341static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3349static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3342 struct netlink_callback *cb) 3350 struct netlink_callback *cb)
3343{ 3351{
3344 struct net *net = skb_sknet(skb); 3352 struct net *net = sock_net(skb->sk);
3345 struct netns_ipvs *ipvs = net_ipvs(net); 3353 struct netns_ipvs *ipvs = net_ipvs(net);
3346 3354
3347 mutex_lock(&ipvs->sync_mutex); 3355 mutex_lock(&ipvs->sync_mutex);
@@ -3367,9 +3375,8 @@ nla_put_failure:
3367 return skb->len; 3375 return skb->len;
3368} 3376}
3369 3377
3370static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) 3378static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3371{ 3379{
3372 struct netns_ipvs *ipvs = net_ipvs(net);
3373 struct ipvs_sync_daemon_cfg c; 3380 struct ipvs_sync_daemon_cfg c;
3374 struct nlattr *a; 3381 struct nlattr *a;
3375 int ret; 3382 int ret;
@@ -3426,33 +3433,32 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3426 3433
3427 rtnl_lock(); 3434 rtnl_lock();
3428 mutex_lock(&ipvs->sync_mutex); 3435 mutex_lock(&ipvs->sync_mutex);
3429 ret = start_sync_thread(net, &c, 3436 ret = start_sync_thread(ipvs, &c,
3430 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3437 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3431 mutex_unlock(&ipvs->sync_mutex); 3438 mutex_unlock(&ipvs->sync_mutex);
3432 rtnl_unlock(); 3439 rtnl_unlock();
3433 return ret; 3440 return ret;
3434} 3441}
3435 3442
3436static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) 3443static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3437{ 3444{
3438 struct netns_ipvs *ipvs = net_ipvs(net);
3439 int ret; 3445 int ret;
3440 3446
3441 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3447 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3442 return -EINVAL; 3448 return -EINVAL;
3443 3449
3444 mutex_lock(&ipvs->sync_mutex); 3450 mutex_lock(&ipvs->sync_mutex);
3445 ret = stop_sync_thread(net, 3451 ret = stop_sync_thread(ipvs,
3446 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3452 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3447 mutex_unlock(&ipvs->sync_mutex); 3453 mutex_unlock(&ipvs->sync_mutex);
3448 return ret; 3454 return ret;
3449} 3455}
3450 3456
3451static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) 3457static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
3452{ 3458{
3453 struct ip_vs_timeout_user t; 3459 struct ip_vs_timeout_user t;
3454 3460
3455 __ip_vs_get_timeouts(net, &t); 3461 __ip_vs_get_timeouts(ipvs, &t);
3456 3462
3457 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3463 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3458 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3464 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3464,17 +3470,15 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3464 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3470 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3465 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3471 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3466 3472
3467 return ip_vs_set_timeout(net, &t); 3473 return ip_vs_set_timeout(ipvs, &t);
3468} 3474}
3469 3475
3470static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3476static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3471{ 3477{
3472 int ret = -EINVAL, cmd; 3478 int ret = -EINVAL, cmd;
3473 struct net *net; 3479 struct net *net = sock_net(skb->sk);
3474 struct netns_ipvs *ipvs; 3480 struct netns_ipvs *ipvs = net_ipvs(net);
3475 3481
3476 net = skb_sknet(skb);
3477 ipvs = net_ipvs(net);
3478 cmd = info->genlhdr->cmd; 3482 cmd = info->genlhdr->cmd;
3479 3483
3480 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3484 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3487,9 +3491,9 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3487 goto out; 3491 goto out;
3488 3492
3489 if (cmd == IPVS_CMD_NEW_DAEMON) 3493 if (cmd == IPVS_CMD_NEW_DAEMON)
3490 ret = ip_vs_genl_new_daemon(net, daemon_attrs); 3494 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
3491 else 3495 else
3492 ret = ip_vs_genl_del_daemon(net, daemon_attrs); 3496 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
3493 } 3497 }
3494 3498
3495out: 3499out:
@@ -3503,22 +3507,22 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3503 struct ip_vs_dest_user_kern udest; 3507 struct ip_vs_dest_user_kern udest;
3504 int ret = 0, cmd; 3508 int ret = 0, cmd;
3505 int need_full_svc = 0, need_full_dest = 0; 3509 int need_full_svc = 0, need_full_dest = 0;
3506 struct net *net; 3510 struct net *net = sock_net(skb->sk);
3511 struct netns_ipvs *ipvs = net_ipvs(net);
3507 3512
3508 net = skb_sknet(skb);
3509 cmd = info->genlhdr->cmd; 3513 cmd = info->genlhdr->cmd;
3510 3514
3511 mutex_lock(&__ip_vs_mutex); 3515 mutex_lock(&__ip_vs_mutex);
3512 3516
3513 if (cmd == IPVS_CMD_FLUSH) { 3517 if (cmd == IPVS_CMD_FLUSH) {
3514 ret = ip_vs_flush(net, false); 3518 ret = ip_vs_flush(ipvs, false);
3515 goto out; 3519 goto out;
3516 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3520 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3517 ret = ip_vs_genl_set_config(net, info->attrs); 3521 ret = ip_vs_genl_set_config(ipvs, info->attrs);
3518 goto out; 3522 goto out;
3519 } else if (cmd == IPVS_CMD_ZERO && 3523 } else if (cmd == IPVS_CMD_ZERO &&
3520 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3524 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3521 ret = ip_vs_zero_all(net); 3525 ret = ip_vs_zero_all(ipvs);
3522 goto out; 3526 goto out;
3523 } 3527 }
3524 3528
@@ -3528,7 +3532,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3528 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3532 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3529 need_full_svc = 1; 3533 need_full_svc = 1;
3530 3534
3531 ret = ip_vs_genl_parse_service(net, &usvc, 3535 ret = ip_vs_genl_parse_service(ipvs, &usvc,
3532 info->attrs[IPVS_CMD_ATTR_SERVICE], 3536 info->attrs[IPVS_CMD_ATTR_SERVICE],
3533 need_full_svc, &svc); 3537 need_full_svc, &svc);
3534 if (ret) 3538 if (ret)
@@ -3567,7 +3571,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3567 /* The synchronization protocol is incompatible 3571 /* The synchronization protocol is incompatible
3568 * with mixed family services 3572 * with mixed family services
3569 */ 3573 */
3570 if (net_ipvs(net)->sync_state) { 3574 if (ipvs->sync_state) {
3571 ret = -EINVAL; 3575 ret = -EINVAL;
3572 goto out; 3576 goto out;
3573 } 3577 }
@@ -3587,7 +3591,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3587 switch (cmd) { 3591 switch (cmd) {
3588 case IPVS_CMD_NEW_SERVICE: 3592 case IPVS_CMD_NEW_SERVICE:
3589 if (svc == NULL) 3593 if (svc == NULL)
3590 ret = ip_vs_add_service(net, &usvc, &svc); 3594 ret = ip_vs_add_service(ipvs, &usvc, &svc);
3591 else 3595 else
3592 ret = -EEXIST; 3596 ret = -EEXIST;
3593 break; 3597 break;
@@ -3625,9 +3629,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3625 struct sk_buff *msg; 3629 struct sk_buff *msg;
3626 void *reply; 3630 void *reply;
3627 int ret, cmd, reply_cmd; 3631 int ret, cmd, reply_cmd;
3628 struct net *net; 3632 struct net *net = sock_net(skb->sk);
3633 struct netns_ipvs *ipvs = net_ipvs(net);
3629 3634
3630 net = skb_sknet(skb);
3631 cmd = info->genlhdr->cmd; 3635 cmd = info->genlhdr->cmd;
3632 3636
3633 if (cmd == IPVS_CMD_GET_SERVICE) 3637 if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3656,7 +3660,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3656 { 3660 {
3657 struct ip_vs_service *svc; 3661 struct ip_vs_service *svc;
3658 3662
3659 svc = ip_vs_genl_find_service(net, 3663 svc = ip_vs_genl_find_service(ipvs,
3660 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3664 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3661 if (IS_ERR(svc)) { 3665 if (IS_ERR(svc)) {
3662 ret = PTR_ERR(svc); 3666 ret = PTR_ERR(svc);
@@ -3677,7 +3681,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3677 { 3681 {
3678 struct ip_vs_timeout_user t; 3682 struct ip_vs_timeout_user t;
3679 3683
3680 __ip_vs_get_timeouts(net, &t); 3684 __ip_vs_get_timeouts(ipvs, &t);
3681#ifdef CONFIG_IP_VS_PROTO_TCP 3685#ifdef CONFIG_IP_VS_PROTO_TCP
3682 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3686 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3683 t.tcp_timeout) || 3687 t.tcp_timeout) ||
@@ -3832,10 +3836,10 @@ static void ip_vs_genl_unregister(void)
3832 * per netns intit/exit func. 3836 * per netns intit/exit func.
3833 */ 3837 */
3834#ifdef CONFIG_SYSCTL 3838#ifdef CONFIG_SYSCTL
3835static int __net_init ip_vs_control_net_init_sysctl(struct net *net) 3839static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
3836{ 3840{
3841 struct net *net = ipvs->net;
3837 int idx; 3842 int idx;
3838 struct netns_ipvs *ipvs = net_ipvs(net);
3839 struct ctl_table *tbl; 3843 struct ctl_table *tbl;
3840 3844
3841 atomic_set(&ipvs->dropentry, 0); 3845 atomic_set(&ipvs->dropentry, 0);
@@ -3854,6 +3858,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3854 } else 3858 } else
3855 tbl = vs_vars; 3859 tbl = vs_vars;
3856 /* Initialize sysctl defaults */ 3860 /* Initialize sysctl defaults */
3861 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
3862 if (tbl[idx].proc_handler == proc_do_defense_mode)
3863 tbl[idx].extra2 = ipvs;
3864 }
3857 idx = 0; 3865 idx = 0;
3858 ipvs->sysctl_amemthresh = 1024; 3866 ipvs->sysctl_amemthresh = 1024;
3859 tbl[idx++].data = &ipvs->sysctl_amemthresh; 3867 tbl[idx++].data = &ipvs->sysctl_amemthresh;
@@ -3895,7 +3903,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3895 tbl[idx++].data = &ipvs->sysctl_backup_only; 3903 tbl[idx++].data = &ipvs->sysctl_backup_only;
3896 ipvs->sysctl_conn_reuse_mode = 1; 3904 ipvs->sysctl_conn_reuse_mode = 1;
3897 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; 3905 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
3898 3906 tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
3907 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
3899 3908
3900 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 3909 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3901 if (ipvs->sysctl_hdr == NULL) { 3910 if (ipvs->sysctl_hdr == NULL) {
@@ -3903,7 +3912,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3903 kfree(tbl); 3912 kfree(tbl);
3904 return -ENOMEM; 3913 return -ENOMEM;
3905 } 3914 }
3906 ip_vs_start_estimator(net, &ipvs->tot_stats); 3915 ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
3907 ipvs->sysctl_tbl = tbl; 3916 ipvs->sysctl_tbl = tbl;
3908 /* Schedule defense work */ 3917 /* Schedule defense work */
3909 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 3918 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
@@ -3912,14 +3921,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3912 return 0; 3921 return 0;
3913} 3922}
3914 3923
3915static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) 3924static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
3916{ 3925{
3917 struct netns_ipvs *ipvs = net_ipvs(net); 3926 struct net *net = ipvs->net;
3918 3927
3919 cancel_delayed_work_sync(&ipvs->defense_work); 3928 cancel_delayed_work_sync(&ipvs->defense_work);
3920 cancel_work_sync(&ipvs->defense_work.work); 3929 cancel_work_sync(&ipvs->defense_work.work);
3921 unregister_net_sysctl_table(ipvs->sysctl_hdr); 3930 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3922 ip_vs_stop_estimator(net, &ipvs->tot_stats); 3931 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
3923 3932
3924 if (!net_eq(net, &init_net)) 3933 if (!net_eq(net, &init_net))
3925 kfree(ipvs->sysctl_tbl); 3934 kfree(ipvs->sysctl_tbl);
@@ -3927,8 +3936,8 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3927 3936
3928#else 3937#else
3929 3938
3930static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } 3939static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
3931static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } 3940static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
3932 3941
3933#endif 3942#endif
3934 3943
@@ -3936,10 +3945,10 @@ static struct notifier_block ip_vs_dst_notifier = {
3936 .notifier_call = ip_vs_dst_event, 3945 .notifier_call = ip_vs_dst_event,
3937}; 3946};
3938 3947
3939int __net_init ip_vs_control_net_init(struct net *net) 3948int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3940{ 3949{
3950 struct net *net = ipvs->net;
3941 int i, idx; 3951 int i, idx;
3942 struct netns_ipvs *ipvs = net_ipvs(net);
3943 3952
3944 /* Initialize rs_table */ 3953 /* Initialize rs_table */
3945 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 3954 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
@@ -3948,7 +3957,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
3948 INIT_LIST_HEAD(&ipvs->dest_trash); 3957 INIT_LIST_HEAD(&ipvs->dest_trash);
3949 spin_lock_init(&ipvs->dest_trash_lock); 3958 spin_lock_init(&ipvs->dest_trash_lock);
3950 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 3959 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
3951 (unsigned long) net); 3960 (unsigned long) ipvs);
3952 atomic_set(&ipvs->ftpsvc_counter, 0); 3961 atomic_set(&ipvs->ftpsvc_counter, 0);
3953 atomic_set(&ipvs->nullsvc_counter, 0); 3962 atomic_set(&ipvs->nullsvc_counter, 0);
3954 3963
@@ -3970,7 +3979,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
3970 proc_create("ip_vs_stats_percpu", 0, net->proc_net, 3979 proc_create("ip_vs_stats_percpu", 0, net->proc_net,
3971 &ip_vs_stats_percpu_fops); 3980 &ip_vs_stats_percpu_fops);
3972 3981
3973 if (ip_vs_control_net_init_sysctl(net)) 3982 if (ip_vs_control_net_init_sysctl(ipvs))
3974 goto err; 3983 goto err;
3975 3984
3976 return 0; 3985 return 0;
@@ -3980,12 +3989,12 @@ err:
3980 return -ENOMEM; 3989 return -ENOMEM;
3981} 3990}
3982 3991
3983void __net_exit ip_vs_control_net_cleanup(struct net *net) 3992void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
3984{ 3993{
3985 struct netns_ipvs *ipvs = net_ipvs(net); 3994 struct net *net = ipvs->net;
3986 3995
3987 ip_vs_trash_cleanup(net); 3996 ip_vs_trash_cleanup(ipvs);
3988 ip_vs_control_net_cleanup_sysctl(net); 3997 ip_vs_control_net_cleanup_sysctl(ipvs);
3989 remove_proc_entry("ip_vs_stats_percpu", net->proc_net); 3998 remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
3990 remove_proc_entry("ip_vs_stats", net->proc_net); 3999 remove_proc_entry("ip_vs_stats", net->proc_net);
3991 remove_proc_entry("ip_vs", net->proc_net); 4000 remove_proc_entry("ip_vs", net->proc_net);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ef0eb0a8d552..457c6c193e13 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -102,10 +102,8 @@ static void estimation_timer(unsigned long arg)
102 struct ip_vs_estimator *e; 102 struct ip_vs_estimator *e;
103 struct ip_vs_stats *s; 103 struct ip_vs_stats *s;
104 u64 rate; 104 u64 rate;
105 struct net *net = (struct net *)arg; 105 struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
106 struct netns_ipvs *ipvs;
107 106
108 ipvs = net_ipvs(net);
109 spin_lock(&ipvs->est_lock); 107 spin_lock(&ipvs->est_lock);
110 list_for_each_entry(e, &ipvs->est_list, list) { 108 list_for_each_entry(e, &ipvs->est_list, list) {
111 s = container_of(e, struct ip_vs_stats, est); 109 s = container_of(e, struct ip_vs_stats, est);
@@ -140,9 +138,8 @@ static void estimation_timer(unsigned long arg)
140 mod_timer(&ipvs->est_timer, jiffies + 2*HZ); 138 mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
141} 139}
142 140
143void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats) 141void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
144{ 142{
145 struct netns_ipvs *ipvs = net_ipvs(net);
146 struct ip_vs_estimator *est = &stats->est; 143 struct ip_vs_estimator *est = &stats->est;
147 144
148 INIT_LIST_HEAD(&est->list); 145 INIT_LIST_HEAD(&est->list);
@@ -152,9 +149,8 @@ void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
152 spin_unlock_bh(&ipvs->est_lock); 149 spin_unlock_bh(&ipvs->est_lock);
153} 150}
154 151
155void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) 152void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
156{ 153{
157 struct netns_ipvs *ipvs = net_ipvs(net);
158 struct ip_vs_estimator *est = &stats->est; 154 struct ip_vs_estimator *est = &stats->est;
159 155
160 spin_lock_bh(&ipvs->est_lock); 156 spin_lock_bh(&ipvs->est_lock);
@@ -192,18 +188,16 @@ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
192 dst->outbps = (e->outbps + 0xF) >> 5; 188 dst->outbps = (e->outbps + 0xF) >> 5;
193} 189}
194 190
195int __net_init ip_vs_estimator_net_init(struct net *net) 191int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
196{ 192{
197 struct netns_ipvs *ipvs = net_ipvs(net);
198
199 INIT_LIST_HEAD(&ipvs->est_list); 193 INIT_LIST_HEAD(&ipvs->est_list);
200 spin_lock_init(&ipvs->est_lock); 194 spin_lock_init(&ipvs->est_lock);
201 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); 195 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs);
202 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); 196 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
203 return 0; 197 return 0;
204} 198}
205 199
206void __net_exit ip_vs_estimator_net_cleanup(struct net *net) 200void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs)
207{ 201{
208 del_timer_sync(&net_ipvs(net)->est_timer); 202 del_timer_sync(&ipvs->est_timer);
209} 203}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 5d3daae98bf0..d30c327bb578 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -181,7 +181,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
181 int ret = 0; 181 int ret = 0;
182 enum ip_conntrack_info ctinfo; 182 enum ip_conntrack_info ctinfo;
183 struct nf_conn *ct; 183 struct nf_conn *ct;
184 struct net *net;
185 184
186 *diff = 0; 185 *diff = 0;
187 186
@@ -223,14 +222,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
223 */ 222 */
224 { 223 {
225 struct ip_vs_conn_param p; 224 struct ip_vs_conn_param p;
226 ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, 225 ip_vs_conn_fill_param(cp->ipvs, AF_INET,
227 iph->protocol, &from, port, 226 iph->protocol, &from, port,
228 &cp->caddr, 0, &p); 227 &cp->caddr, 0, &p);
229 n_cp = ip_vs_conn_out_get(&p); 228 n_cp = ip_vs_conn_out_get(&p);
230 } 229 }
231 if (!n_cp) { 230 if (!n_cp) {
232 struct ip_vs_conn_param p; 231 struct ip_vs_conn_param p;
233 ip_vs_conn_fill_param(ip_vs_conn_net(cp), 232 ip_vs_conn_fill_param(cp->ipvs,
234 AF_INET, IPPROTO_TCP, &cp->caddr, 233 AF_INET, IPPROTO_TCP, &cp->caddr,
235 0, &cp->vaddr, port, &p); 234 0, &cp->vaddr, port, &p);
236 /* As above, this is ipv4 only */ 235 /* As above, this is ipv4 only */
@@ -289,9 +288,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
289 * would be adjusted twice. 288 * would be adjusted twice.
290 */ 289 */
291 290
292 net = skb_net(skb);
293 cp->app_data = NULL; 291 cp->app_data = NULL;
294 ip_vs_tcp_conn_listen(net, n_cp); 292 ip_vs_tcp_conn_listen(n_cp);
295 ip_vs_conn_put(n_cp); 293 ip_vs_conn_put(n_cp);
296 return ret; 294 return ret;
297 } 295 }
@@ -320,7 +318,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
320 union nf_inet_addr to; 318 union nf_inet_addr to;
321 __be16 port; 319 __be16 port;
322 struct ip_vs_conn *n_cp; 320 struct ip_vs_conn *n_cp;
323 struct net *net;
324 321
325 /* no diff required for incoming packets */ 322 /* no diff required for incoming packets */
326 *diff = 0; 323 *diff = 0;
@@ -392,7 +389,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
392 389
393 { 390 {
394 struct ip_vs_conn_param p; 391 struct ip_vs_conn_param p;
395 ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, 392 ip_vs_conn_fill_param(cp->ipvs, AF_INET,
396 iph->protocol, &to, port, &cp->vaddr, 393 iph->protocol, &to, port, &cp->vaddr,
397 htons(ntohs(cp->vport)-1), &p); 394 htons(ntohs(cp->vport)-1), &p);
398 n_cp = ip_vs_conn_in_get(&p); 395 n_cp = ip_vs_conn_in_get(&p);
@@ -413,8 +410,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
413 /* 410 /*
414 * Move tunnel to listen state 411 * Move tunnel to listen state
415 */ 412 */
416 net = skb_net(skb); 413 ip_vs_tcp_conn_listen(n_cp);
417 ip_vs_tcp_conn_listen(net, n_cp);
418 ip_vs_conn_put(n_cp); 414 ip_vs_conn_put(n_cp);
419 415
420 return 1; 416 return 1;
@@ -447,14 +443,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
447 if (!ipvs) 443 if (!ipvs)
448 return -ENOENT; 444 return -ENOENT;
449 445
450 app = register_ip_vs_app(net, &ip_vs_ftp); 446 app = register_ip_vs_app(ipvs, &ip_vs_ftp);
451 if (IS_ERR(app)) 447 if (IS_ERR(app))
452 return PTR_ERR(app); 448 return PTR_ERR(app);
453 449
454 for (i = 0; i < ports_count; i++) { 450 for (i = 0; i < ports_count; i++) {
455 if (!ports[i]) 451 if (!ports[i])
456 continue; 452 continue;
457 ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); 453 ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]);
458 if (ret) 454 if (ret)
459 goto err_unreg; 455 goto err_unreg;
460 pr_info("%s: loaded support on port[%d] = %d\n", 456 pr_info("%s: loaded support on port[%d] = %d\n",
@@ -463,7 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
463 return 0; 459 return 0;
464 460
465err_unreg: 461err_unreg:
466 unregister_ip_vs_app(net, &ip_vs_ftp); 462 unregister_ip_vs_app(ipvs, &ip_vs_ftp);
467 return ret; 463 return ret;
468} 464}
469/* 465/*
@@ -471,7 +467,12 @@ err_unreg:
471 */ 467 */
472static void __ip_vs_ftp_exit(struct net *net) 468static void __ip_vs_ftp_exit(struct net *net)
473{ 469{
474 unregister_ip_vs_app(net, &ip_vs_ftp); 470 struct netns_ipvs *ipvs = net_ipvs(net);
471
472 if (!ipvs)
473 return;
474
475 unregister_ip_vs_app(ipvs, &ip_vs_ftp);
475} 476}
476 477
477static struct pernet_operations ip_vs_ftp_ops = { 478static struct pernet_operations ip_vs_ftp_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 127f14046c51..cccf4d637412 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -250,8 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
250static int sysctl_lblc_expiration(struct ip_vs_service *svc) 250static int sysctl_lblc_expiration(struct ip_vs_service *svc)
251{ 251{
252#ifdef CONFIG_SYSCTL 252#ifdef CONFIG_SYSCTL
253 struct netns_ipvs *ipvs = net_ipvs(svc->net); 253 return svc->ipvs->sysctl_lblc_expiration;
254 return ipvs->sysctl_lblc_expiration;
255#else 254#else
256 return DEFAULT_EXPIRATION; 255 return DEFAULT_EXPIRATION;
257#endif 256#endif
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 2229d2d8bbe0..796d70e47ddd 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -415,8 +415,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
415static int sysctl_lblcr_expiration(struct ip_vs_service *svc) 415static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
416{ 416{
417#ifdef CONFIG_SYSCTL 417#ifdef CONFIG_SYSCTL
418 struct netns_ipvs *ipvs = net_ipvs(svc->net); 418 return svc->ipvs->sysctl_lblcr_expiration;
419 return ipvs->sysctl_lblcr_expiration;
420#else 419#else
421 return DEFAULT_EXPIRATION; 420 return DEFAULT_EXPIRATION;
422#endif 421#endif
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 136184572fc9..30434fb133df 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -161,7 +161,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
161 161
162 /* RS->CLIENT */ 162 /* RS->CLIENT */
163 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 163 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
164 ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, 164 ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
165 &orig->src.u3, orig->src.u.tcp.port, 165 &orig->src.u3, orig->src.u.tcp.port,
166 &orig->dst.u3, orig->dst.u.tcp.port, &p); 166 &orig->dst.u3, orig->dst.u.tcp.port, &p);
167 cp = ip_vs_conn_out_get(&p); 167 cp = ip_vs_conn_out_get(&p);
@@ -274,8 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
274 " for conn " FMT_CONN "\n", 274 " for conn " FMT_CONN "\n",
275 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); 275 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
276 276
277 h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt, 277 h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
278 &tuple);
279 if (h) { 278 if (h) {
280 ct = nf_ct_tuplehash_to_ctrack(h); 279 ct = nf_ct_tuplehash_to_ctrack(h);
281 /* Show what happens instead of calling nf_ct_kill() */ 280 /* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index bed5f7042529..1b8d594e493a 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70 const char *dptr; 70 const char *dptr;
71 int retc; 71 int retc;
72 72
73 ip_vs_fill_iph_skb(p->af, skb, &iph); 73 ip_vs_fill_iph_skb(p->af, skb, false, &iph);
74 74
75 /* Only useful with UDP */ 75 /* Only useful with UDP */
76 if (iph.protocol != IPPROTO_UDP) 76 if (iph.protocol != IPPROTO_UDP)
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 939f7fbe9b46..8ae480715cea 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -63,9 +63,8 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
63 * register an ipvs protocols netns related data 63 * register an ipvs protocols netns related data
64 */ 64 */
65static int 65static int
66register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) 66register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp)
67{ 67{
68 struct netns_ipvs *ipvs = net_ipvs(net);
69 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 68 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
70 struct ip_vs_proto_data *pd = 69 struct ip_vs_proto_data *pd =
71 kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); 70 kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
@@ -79,7 +78,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
79 atomic_set(&pd->appcnt, 0); /* Init app counter */ 78 atomic_set(&pd->appcnt, 0); /* Init app counter */
80 79
81 if (pp->init_netns != NULL) { 80 if (pp->init_netns != NULL) {
82 int ret = pp->init_netns(net, pd); 81 int ret = pp->init_netns(ipvs, pd);
83 if (ret) { 82 if (ret) {
84 /* unlink an free proto data */ 83 /* unlink an free proto data */
85 ipvs->proto_data_table[hash] = pd->next; 84 ipvs->proto_data_table[hash] = pd->next;
@@ -116,9 +115,8 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
116 * unregister an ipvs protocols netns data 115 * unregister an ipvs protocols netns data
117 */ 116 */
118static int 117static int
119unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) 118unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
120{ 119{
121 struct netns_ipvs *ipvs = net_ipvs(net);
122 struct ip_vs_proto_data **pd_p; 120 struct ip_vs_proto_data **pd_p;
123 unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); 121 unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol);
124 122
@@ -127,7 +125,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
127 if (*pd_p == pd) { 125 if (*pd_p == pd) {
128 *pd_p = pd->next; 126 *pd_p = pd->next;
129 if (pd->pp->exit_netns != NULL) 127 if (pd->pp->exit_netns != NULL)
130 pd->pp->exit_netns(net, pd); 128 pd->pp->exit_netns(ipvs, pd);
131 kfree(pd); 129 kfree(pd);
132 return 0; 130 return 0;
133 } 131 }
@@ -156,8 +154,8 @@ EXPORT_SYMBOL(ip_vs_proto_get);
156/* 154/*
157 * get ip_vs_protocol object data by netns and proto 155 * get ip_vs_protocol object data by netns and proto
158 */ 156 */
159static struct ip_vs_proto_data * 157struct ip_vs_proto_data *
160__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) 158ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
161{ 159{
162 struct ip_vs_proto_data *pd; 160 struct ip_vs_proto_data *pd;
163 unsigned int hash = IP_VS_PROTO_HASH(proto); 161 unsigned int hash = IP_VS_PROTO_HASH(proto);
@@ -169,14 +167,6 @@ __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
169 167
170 return NULL; 168 return NULL;
171} 169}
172
173struct ip_vs_proto_data *
174ip_vs_proto_data_get(struct net *net, unsigned short proto)
175{
176 struct netns_ipvs *ipvs = net_ipvs(net);
177
178 return __ipvs_proto_data_get(ipvs, proto);
179}
180EXPORT_SYMBOL(ip_vs_proto_data_get); 170EXPORT_SYMBOL(ip_vs_proto_data_get);
181 171
182/* 172/*
@@ -317,7 +307,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
317/* 307/*
318 * per network name-space init 308 * per network name-space init
319 */ 309 */
320int __net_init ip_vs_protocol_net_init(struct net *net) 310int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs)
321{ 311{
322 int i, ret; 312 int i, ret;
323 static struct ip_vs_protocol *protos[] = { 313 static struct ip_vs_protocol *protos[] = {
@@ -339,27 +329,26 @@ int __net_init ip_vs_protocol_net_init(struct net *net)
339 }; 329 };
340 330
341 for (i = 0; i < ARRAY_SIZE(protos); i++) { 331 for (i = 0; i < ARRAY_SIZE(protos); i++) {
342 ret = register_ip_vs_proto_netns(net, protos[i]); 332 ret = register_ip_vs_proto_netns(ipvs, protos[i]);
343 if (ret < 0) 333 if (ret < 0)
344 goto cleanup; 334 goto cleanup;
345 } 335 }
346 return 0; 336 return 0;
347 337
348cleanup: 338cleanup:
349 ip_vs_protocol_net_cleanup(net); 339 ip_vs_protocol_net_cleanup(ipvs);
350 return ret; 340 return ret;
351} 341}
352 342
353void __net_exit ip_vs_protocol_net_cleanup(struct net *net) 343void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
354{ 344{
355 struct netns_ipvs *ipvs = net_ipvs(net);
356 struct ip_vs_proto_data *pd; 345 struct ip_vs_proto_data *pd;
357 int i; 346 int i;
358 347
359 /* unregister all the ipvs proto data for this netns */ 348 /* unregister all the ipvs proto data for this netns */
360 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 349 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
361 while ((pd = ipvs->proto_data_table[i]) != NULL) 350 while ((pd = ipvs->proto_data_table[i]) != NULL)
362 unregister_ip_vs_proto_netns(net, pd); 351 unregister_ip_vs_proto_netns(ipvs, pd);
363 } 352 }
364} 353}
365 354
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5de3dd312c0f..5320d39976e1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,30 +41,28 @@ struct isakmp_hdr {
41#define PORT_ISAKMP 500 41#define PORT_ISAKMP 500
42 42
43static void 43static void
44ah_esp_conn_fill_param_proto(struct net *net, int af, 44ah_esp_conn_fill_param_proto(struct netns_ipvs *ipvs, int af,
45 const struct ip_vs_iphdr *iph, int inverse, 45 const struct ip_vs_iphdr *iph,
46 struct ip_vs_conn_param *p) 46 struct ip_vs_conn_param *p)
47{ 47{
48 if (likely(!inverse)) 48 if (likely(!ip_vs_iph_inverse(iph)))
49 ip_vs_conn_fill_param(net, af, IPPROTO_UDP, 49 ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
50 &iph->saddr, htons(PORT_ISAKMP), 50 &iph->saddr, htons(PORT_ISAKMP),
51 &iph->daddr, htons(PORT_ISAKMP), p); 51 &iph->daddr, htons(PORT_ISAKMP), p);
52 else 52 else
53 ip_vs_conn_fill_param(net, af, IPPROTO_UDP, 53 ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
54 &iph->daddr, htons(PORT_ISAKMP), 54 &iph->daddr, htons(PORT_ISAKMP),
55 &iph->saddr, htons(PORT_ISAKMP), p); 55 &iph->saddr, htons(PORT_ISAKMP), p);
56} 56}
57 57
58static struct ip_vs_conn * 58static struct ip_vs_conn *
59ah_esp_conn_in_get(int af, const struct sk_buff *skb, 59ah_esp_conn_in_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
60 const struct ip_vs_iphdr *iph, 60 const struct ip_vs_iphdr *iph)
61 int inverse)
62{ 61{
63 struct ip_vs_conn *cp; 62 struct ip_vs_conn *cp;
64 struct ip_vs_conn_param p; 63 struct ip_vs_conn_param p;
65 struct net *net = skb_net(skb);
66 64
67 ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); 65 ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
68 cp = ip_vs_conn_in_get(&p); 66 cp = ip_vs_conn_in_get(&p);
69 if (!cp) { 67 if (!cp) {
70 /* 68 /*
@@ -73,7 +71,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
73 */ 71 */
74 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " 72 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
75 "%s%s %s->%s\n", 73 "%s%s %s->%s\n",
76 inverse ? "ICMP+" : "", 74 ip_vs_iph_icmp(iph) ? "ICMP+" : "",
77 ip_vs_proto_get(iph->protocol)->name, 75 ip_vs_proto_get(iph->protocol)->name,
78 IP_VS_DBG_ADDR(af, &iph->saddr), 76 IP_VS_DBG_ADDR(af, &iph->saddr),
79 IP_VS_DBG_ADDR(af, &iph->daddr)); 77 IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -84,19 +82,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
84 82
85 83
86static struct ip_vs_conn * 84static struct ip_vs_conn *
87ah_esp_conn_out_get(int af, const struct sk_buff *skb, 85ah_esp_conn_out_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
88 const struct ip_vs_iphdr *iph, int inverse) 86 const struct ip_vs_iphdr *iph)
89{ 87{
90 struct ip_vs_conn *cp; 88 struct ip_vs_conn *cp;
91 struct ip_vs_conn_param p; 89 struct ip_vs_conn_param p;
92 struct net *net = skb_net(skb);
93 90
94 ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); 91 ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
95 cp = ip_vs_conn_out_get(&p); 92 cp = ip_vs_conn_out_get(&p);
96 if (!cp) { 93 if (!cp) {
97 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " 94 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
98 "%s%s %s->%s\n", 95 "%s%s %s->%s\n",
99 inverse ? "ICMP+" : "", 96 ip_vs_iph_icmp(iph) ? "ICMP+" : "",
100 ip_vs_proto_get(iph->protocol)->name, 97 ip_vs_proto_get(iph->protocol)->name,
101 IP_VS_DBG_ADDR(af, &iph->saddr), 98 IP_VS_DBG_ADDR(af, &iph->saddr),
102 IP_VS_DBG_ADDR(af, &iph->daddr)); 99 IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -107,7 +104,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
107 104
108 105
109static int 106static int
110ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 107ah_esp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
108 struct ip_vs_proto_data *pd,
111 int *verdict, struct ip_vs_conn **cpp, 109 int *verdict, struct ip_vs_conn **cpp,
112 struct ip_vs_iphdr *iph) 110 struct ip_vs_iphdr *iph)
113{ 111{
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 5b84c0b56642..010ddeec135f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,35 +9,44 @@
9#include <net/ip_vs.h> 9#include <net/ip_vs.h>
10 10
11static int 11static int
12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 12sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
13 struct ip_vs_proto_data *pd,
13 int *verdict, struct ip_vs_conn **cpp, 14 int *verdict, struct ip_vs_conn **cpp,
14 struct ip_vs_iphdr *iph) 15 struct ip_vs_iphdr *iph)
15{ 16{
16 struct net *net;
17 struct ip_vs_service *svc; 17 struct ip_vs_service *svc;
18 struct netns_ipvs *ipvs;
19 sctp_chunkhdr_t _schunkh, *sch; 18 sctp_chunkhdr_t _schunkh, *sch;
20 sctp_sctphdr_t *sh, _sctph; 19 sctp_sctphdr_t *sh, _sctph;
21 20 __be16 _ports[2], *ports = NULL;
22 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); 21
23 if (sh == NULL) { 22 if (likely(!ip_vs_iph_icmp(iph))) {
24 *verdict = NF_DROP; 23 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
25 return 0; 24 if (sh) {
25 sch = skb_header_pointer(
26 skb, iph->len + sizeof(sctp_sctphdr_t),
27 sizeof(_schunkh), &_schunkh);
28 if (sch && (sch->type == SCTP_CID_INIT ||
29 sysctl_sloppy_sctp(ipvs)))
30 ports = &sh->source;
31 }
32 } else {
33 ports = skb_header_pointer(
34 skb, iph->len, sizeof(_ports), &_ports);
26 } 35 }
27 36
28 sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), 37 if (!ports) {
29 sizeof(_schunkh), &_schunkh);
30 if (sch == NULL) {
31 *verdict = NF_DROP; 38 *verdict = NF_DROP;
32 return 0; 39 return 0;
33 } 40 }
34 41
35 net = skb_net(skb);
36 ipvs = net_ipvs(net);
37 rcu_read_lock(); 42 rcu_read_lock();
38 if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && 43 if (likely(!ip_vs_iph_inverse(iph)))
39 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 44 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
40 &iph->daddr, sh->dest))) { 45 &iph->daddr, ports[1]);
46 else
47 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
48 &iph->saddr, ports[0]);
49 if (svc) {
41 int ignored; 50 int ignored;
42 51
43 if (ip_vs_todrop(ipvs)) { 52 if (ip_vs_todrop(ipvs)) {
@@ -474,14 +483,13 @@ static inline __u16 sctp_app_hashkey(__be16 port)
474 & SCTP_APP_TAB_MASK; 483 & SCTP_APP_TAB_MASK;
475} 484}
476 485
477static int sctp_register_app(struct net *net, struct ip_vs_app *inc) 486static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
478{ 487{
479 struct ip_vs_app *i; 488 struct ip_vs_app *i;
480 __u16 hash; 489 __u16 hash;
481 __be16 port = inc->port; 490 __be16 port = inc->port;
482 int ret = 0; 491 int ret = 0;
483 struct netns_ipvs *ipvs = net_ipvs(net); 492 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
484 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
485 493
486 hash = sctp_app_hashkey(port); 494 hash = sctp_app_hashkey(port);
487 495
@@ -498,9 +506,9 @@ out:
498 return ret; 506 return ret;
499} 507}
500 508
501static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) 509static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
502{ 510{
503 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); 511 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
504 512
505 atomic_dec(&pd->appcnt); 513 atomic_dec(&pd->appcnt);
506 list_del_rcu(&inc->p_list); 514 list_del_rcu(&inc->p_list);
@@ -508,7 +516,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
508 516
509static int sctp_app_conn_bind(struct ip_vs_conn *cp) 517static int sctp_app_conn_bind(struct ip_vs_conn *cp)
510{ 518{
511 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 519 struct netns_ipvs *ipvs = cp->ipvs;
512 int hash; 520 int hash;
513 struct ip_vs_app *inc; 521 struct ip_vs_app *inc;
514 int result = 0; 522 int result = 0;
@@ -549,10 +557,8 @@ out:
549 * timeouts is netns related now. 557 * timeouts is netns related now.
550 * --------------------------------------------- 558 * ---------------------------------------------
551 */ 559 */
552static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) 560static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
553{ 561{
554 struct netns_ipvs *ipvs = net_ipvs(net);
555
556 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); 562 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
557 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, 563 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
558 sizeof(sctp_timeouts)); 564 sizeof(sctp_timeouts));
@@ -561,7 +567,7 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
561 return 0; 567 return 0;
562} 568}
563 569
564static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) 570static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
565{ 571{
566 kfree(pd->timeout_table); 572 kfree(pd->timeout_table);
567} 573}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 8e92beb0cca9..d7024b2ed769 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -32,27 +32,47 @@
32#include <net/ip_vs.h> 32#include <net/ip_vs.h>
33 33
34static int 34static int
35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 35tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
36 struct ip_vs_proto_data *pd,
36 int *verdict, struct ip_vs_conn **cpp, 37 int *verdict, struct ip_vs_conn **cpp,
37 struct ip_vs_iphdr *iph) 38 struct ip_vs_iphdr *iph)
38{ 39{
39 struct net *net;
40 struct ip_vs_service *svc; 40 struct ip_vs_service *svc;
41 struct tcphdr _tcph, *th; 41 struct tcphdr _tcph, *th;
42 struct netns_ipvs *ipvs; 42 __be16 _ports[2], *ports = NULL;
43 43
44 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); 44 /* In the event of icmp, we're only guaranteed to have the first 8
45 if (th == NULL) { 45 * bytes of the transport header, so we only check the rest of the
46 * TCP packet for non-ICMP packets
47 */
48 if (likely(!ip_vs_iph_icmp(iph))) {
49 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
50 if (th) {
51 if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
52 return 1;
53 ports = &th->source;
54 }
55 } else {
56 ports = skb_header_pointer(
57 skb, iph->len, sizeof(_ports), &_ports);
58 }
59
60 if (!ports) {
46 *verdict = NF_DROP; 61 *verdict = NF_DROP;
47 return 0; 62 return 0;
48 } 63 }
49 net = skb_net(skb); 64
50 ipvs = net_ipvs(net);
51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 65 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
52 rcu_read_lock(); 66 rcu_read_lock();
53 if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && 67
54 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 68 if (likely(!ip_vs_iph_inverse(iph)))
55 &iph->daddr, th->dest))) { 69 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
70 &iph->daddr, ports[1]);
71 else
72 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
73 &iph->saddr, ports[0]);
74
75 if (svc) {
56 int ignored; 76 int ignored;
57 77
58 if (ip_vs_todrop(ipvs)) { 78 if (ip_vs_todrop(ipvs)) {
@@ -571,14 +591,13 @@ static inline __u16 tcp_app_hashkey(__be16 port)
571} 591}
572 592
573 593
574static int tcp_register_app(struct net *net, struct ip_vs_app *inc) 594static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
575{ 595{
576 struct ip_vs_app *i; 596 struct ip_vs_app *i;
577 __u16 hash; 597 __u16 hash;
578 __be16 port = inc->port; 598 __be16 port = inc->port;
579 int ret = 0; 599 int ret = 0;
580 struct netns_ipvs *ipvs = net_ipvs(net); 600 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
581 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
582 601
583 hash = tcp_app_hashkey(port); 602 hash = tcp_app_hashkey(port);
584 603
@@ -597,9 +616,9 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
597 616
598 617
599static void 618static void
600tcp_unregister_app(struct net *net, struct ip_vs_app *inc) 619tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
601{ 620{
602 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 621 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
603 622
604 atomic_dec(&pd->appcnt); 623 atomic_dec(&pd->appcnt);
605 list_del_rcu(&inc->p_list); 624 list_del_rcu(&inc->p_list);
@@ -609,7 +628,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
609static int 628static int
610tcp_app_conn_bind(struct ip_vs_conn *cp) 629tcp_app_conn_bind(struct ip_vs_conn *cp)
611{ 630{
612 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 631 struct netns_ipvs *ipvs = cp->ipvs;
613 int hash; 632 int hash;
614 struct ip_vs_app *inc; 633 struct ip_vs_app *inc;
615 int result = 0; 634 int result = 0;
@@ -653,9 +672,9 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
653/* 672/*
654 * Set LISTEN timeout. (ip_vs_conn_put will setup timer) 673 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
655 */ 674 */
656void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) 675void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
657{ 676{
658 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 677 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP);
659 678
660 spin_lock_bh(&cp->lock); 679 spin_lock_bh(&cp->lock);
661 cp->state = IP_VS_TCP_S_LISTEN; 680 cp->state = IP_VS_TCP_S_LISTEN;
@@ -668,10 +687,8 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
668 * timeouts is netns related now. 687 * timeouts is netns related now.
669 * --------------------------------------------- 688 * ---------------------------------------------
670 */ 689 */
671static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) 690static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
672{ 691{
673 struct netns_ipvs *ipvs = net_ipvs(net);
674
675 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); 692 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
676 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, 693 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
677 sizeof(tcp_timeouts)); 694 sizeof(tcp_timeouts));
@@ -681,7 +698,7 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
681 return 0; 698 return 0;
682} 699}
683 700
684static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) 701static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
685{ 702{
686 kfree(pd->timeout_table); 703 kfree(pd->timeout_table);
687} 704}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index b62a3c0ff9bf..e494e9a88c7f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -29,28 +29,42 @@
29#include <net/ip6_checksum.h> 29#include <net/ip6_checksum.h>
30 30
31static int 31static int
32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 32udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
33 struct ip_vs_proto_data *pd,
33 int *verdict, struct ip_vs_conn **cpp, 34 int *verdict, struct ip_vs_conn **cpp,
34 struct ip_vs_iphdr *iph) 35 struct ip_vs_iphdr *iph)
35{ 36{
36 struct net *net;
37 struct ip_vs_service *svc; 37 struct ip_vs_service *svc;
38 struct udphdr _udph, *uh; 38 struct udphdr _udph, *uh;
39 __be16 _ports[2], *ports = NULL;
39 40
40 /* IPv6 fragments, only first fragment will hit this */ 41 if (likely(!ip_vs_iph_icmp(iph))) {
41 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); 42 /* IPv6 fragments, only first fragment will hit this */
42 if (uh == NULL) { 43 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
44 if (uh)
45 ports = &uh->source;
46 } else {
47 ports = skb_header_pointer(
48 skb, iph->len, sizeof(_ports), &_ports);
49 }
50
51 if (!ports) {
43 *verdict = NF_DROP; 52 *verdict = NF_DROP;
44 return 0; 53 return 0;
45 } 54 }
46 net = skb_net(skb); 55
47 rcu_read_lock(); 56 rcu_read_lock();
48 svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 57 if (likely(!ip_vs_iph_inverse(iph)))
49 &iph->daddr, uh->dest); 58 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
59 &iph->daddr, ports[1]);
60 else
61 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
62 &iph->saddr, ports[0]);
63
50 if (svc) { 64 if (svc) {
51 int ignored; 65 int ignored;
52 66
53 if (ip_vs_todrop(net_ipvs(net))) { 67 if (ip_vs_todrop(ipvs)) {
54 /* 68 /*
55 * It seems that we are very loaded. 69 * It seems that we are very loaded.
56 * We have to drop this packet :( 70 * We have to drop this packet :(
@@ -348,14 +362,13 @@ static inline __u16 udp_app_hashkey(__be16 port)
348} 362}
349 363
350 364
351static int udp_register_app(struct net *net, struct ip_vs_app *inc) 365static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
352{ 366{
353 struct ip_vs_app *i; 367 struct ip_vs_app *i;
354 __u16 hash; 368 __u16 hash;
355 __be16 port = inc->port; 369 __be16 port = inc->port;
356 int ret = 0; 370 int ret = 0;
357 struct netns_ipvs *ipvs = net_ipvs(net); 371 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
358 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
359 372
360 hash = udp_app_hashkey(port); 373 hash = udp_app_hashkey(port);
361 374
@@ -374,9 +387,9 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
374 387
375 388
376static void 389static void
377udp_unregister_app(struct net *net, struct ip_vs_app *inc) 390udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
378{ 391{
379 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 392 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
380 393
381 atomic_dec(&pd->appcnt); 394 atomic_dec(&pd->appcnt);
382 list_del_rcu(&inc->p_list); 395 list_del_rcu(&inc->p_list);
@@ -385,7 +398,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc)
385 398
386static int udp_app_conn_bind(struct ip_vs_conn *cp) 399static int udp_app_conn_bind(struct ip_vs_conn *cp)
387{ 400{
388 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 401 struct netns_ipvs *ipvs = cp->ipvs;
389 int hash; 402 int hash;
390 struct ip_vs_app *inc; 403 struct ip_vs_app *inc;
391 int result = 0; 404 int result = 0;
@@ -456,10 +469,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
456 cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; 469 cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
457} 470}
458 471
459static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) 472static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
460{ 473{
461 struct netns_ipvs *ipvs = net_ipvs(net);
462
463 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); 474 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
464 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, 475 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
465 sizeof(udp_timeouts)); 476 sizeof(udp_timeouts));
@@ -468,7 +479,7 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
468 return 0; 479 return 0;
469} 480}
470 481
471static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) 482static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
472{ 483{
473 kfree(pd->timeout_table); 484 kfree(pd->timeout_table);
474} 485}
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 98a13433b68c..1e373a5e44e3 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
280static inline __be16 280static inline __be16
281ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) 281ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
282{ 282{
283 __be16 port; 283 __be16 _ports[2], *ports;
284 struct tcphdr _tcph, *th;
285 struct udphdr _udph, *uh;
286 sctp_sctphdr_t _sctph, *sh;
287 284
285 /* At this point we know that we have a valid packet of some kind.
286 * Because ICMP packets are only guaranteed to have the first 8
287 * bytes, let's just grab the ports. Fortunately they're in the
288 * same position for all three of the protocols we care about.
289 */
288 switch (iph->protocol) { 290 switch (iph->protocol) {
289 case IPPROTO_TCP: 291 case IPPROTO_TCP:
290 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
291 if (unlikely(th == NULL))
292 return 0;
293 port = th->source;
294 break;
295 case IPPROTO_UDP: 292 case IPPROTO_UDP:
296 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
297 if (unlikely(uh == NULL))
298 return 0;
299 port = uh->source;
300 break;
301 case IPPROTO_SCTP: 293 case IPPROTO_SCTP:
302 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); 294 ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
303 if (unlikely(sh == NULL)) 295 &_ports);
296 if (unlikely(!ports))
304 return 0; 297 return 0;
305 port = sh->source; 298
306 break; 299 if (likely(!ip_vs_iph_inverse(iph)))
300 return ports[0];
301 else
302 return ports[1];
307 default: 303 default:
308 port = 0; 304 return 0;
309 } 305 }
310
311 return port;
312} 306}
313 307
314 308
@@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
322 struct ip_vs_dest *dest; 316 struct ip_vs_dest *dest;
323 struct ip_vs_sh_state *s; 317 struct ip_vs_sh_state *s;
324 __be16 port = 0; 318 __be16 port = 0;
319 const union nf_inet_addr *hash_addr;
320
321 hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
325 322
326 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 323 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
327 324
@@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
331 s = (struct ip_vs_sh_state *) svc->sched_data; 328 s = (struct ip_vs_sh_state *) svc->sched_data;
332 329
333 if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) 330 if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
334 dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); 331 dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port);
335 else 332 else
336 dest = ip_vs_sh_get(svc, s, &iph->saddr, port); 333 dest = ip_vs_sh_get(svc, s, hash_addr, port);
337 334
338 if (!dest) { 335 if (!dest) {
339 ip_vs_scheduler_err(svc, "no destination available"); 336 ip_vs_scheduler_err(svc, "no destination available");
@@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
341 } 338 }
342 339
343 IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", 340 IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
344 IP_VS_DBG_ADDR(svc->af, &iph->saddr), 341 IP_VS_DBG_ADDR(svc->af, hash_addr),
345 IP_VS_DBG_ADDR(dest->af, &dest->addr), 342 IP_VS_DBG_ADDR(dest->af, &dest->addr),
346 ntohs(dest->port)); 343 ntohs(dest->port));
347 344
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 43f140950075..803001a45aa1 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -193,7 +193,7 @@ union ip_vs_sync_conn {
193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) 193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194 194
195struct ip_vs_sync_thread_data { 195struct ip_vs_sync_thread_data {
196 struct net *net; 196 struct netns_ipvs *ipvs;
197 struct socket *sock; 197 struct socket *sock;
198 char *buf; 198 char *buf;
199 int id; 199 int id;
@@ -533,10 +533,9 @@ set:
533 * Version 0 , could be switched in by sys_ctl. 533 * Version 0 , could be switched in by sys_ctl.
534 * Add an ip_vs_conn information into the current sync_buff. 534 * Add an ip_vs_conn information into the current sync_buff.
535 */ 535 */
536static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, 536static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
537 int pkts) 537 int pkts)
538{ 538{
539 struct netns_ipvs *ipvs = net_ipvs(net);
540 struct ip_vs_sync_mesg_v0 *m; 539 struct ip_vs_sync_mesg_v0 *m;
541 struct ip_vs_sync_conn_v0 *s; 540 struct ip_vs_sync_conn_v0 *s;
542 struct ip_vs_sync_buff *buff; 541 struct ip_vs_sync_buff *buff;
@@ -615,7 +614,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
615 pkts = atomic_add_return(1, &cp->in_pkts); 614 pkts = atomic_add_return(1, &cp->in_pkts);
616 else 615 else
617 pkts = sysctl_sync_threshold(ipvs); 616 pkts = sysctl_sync_threshold(ipvs);
618 ip_vs_sync_conn(net, cp, pkts); 617 ip_vs_sync_conn(ipvs, cp, pkts);
619 } 618 }
620} 619}
621 620
@@ -624,9 +623,8 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
624 * Called by ip_vs_in. 623 * Called by ip_vs_in.
625 * Sending Version 1 messages 624 * Sending Version 1 messages
626 */ 625 */
627void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) 626void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
628{ 627{
629 struct netns_ipvs *ipvs = net_ipvs(net);
630 struct ip_vs_sync_mesg *m; 628 struct ip_vs_sync_mesg *m;
631 union ip_vs_sync_conn *s; 629 union ip_vs_sync_conn *s;
632 struct ip_vs_sync_buff *buff; 630 struct ip_vs_sync_buff *buff;
@@ -637,7 +635,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
637 635
638 /* Handle old version of the protocol */ 636 /* Handle old version of the protocol */
639 if (sysctl_sync_ver(ipvs) == 0) { 637 if (sysctl_sync_ver(ipvs) == 0) {
640 ip_vs_sync_conn_v0(net, cp, pkts); 638 ip_vs_sync_conn_v0(ipvs, cp, pkts);
641 return; 639 return;
642 } 640 }
643 /* Do not sync ONE PACKET */ 641 /* Do not sync ONE PACKET */
@@ -784,21 +782,21 @@ control:
784 * fill_param used by version 1 782 * fill_param used by version 1
785 */ 783 */
786static inline int 784static inline int
787ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, 785ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
788 struct ip_vs_conn_param *p, 786 struct ip_vs_conn_param *p,
789 __u8 *pe_data, unsigned int pe_data_len, 787 __u8 *pe_data, unsigned int pe_data_len,
790 __u8 *pe_name, unsigned int pe_name_len) 788 __u8 *pe_name, unsigned int pe_name_len)
791{ 789{
792#ifdef CONFIG_IP_VS_IPV6 790#ifdef CONFIG_IP_VS_IPV6
793 if (af == AF_INET6) 791 if (af == AF_INET6)
794 ip_vs_conn_fill_param(net, af, sc->v6.protocol, 792 ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
795 (const union nf_inet_addr *)&sc->v6.caddr, 793 (const union nf_inet_addr *)&sc->v6.caddr,
796 sc->v6.cport, 794 sc->v6.cport,
797 (const union nf_inet_addr *)&sc->v6.vaddr, 795 (const union nf_inet_addr *)&sc->v6.vaddr,
798 sc->v6.vport, p); 796 sc->v6.vport, p);
799 else 797 else
800#endif 798#endif
801 ip_vs_conn_fill_param(net, af, sc->v4.protocol, 799 ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
802 (const union nf_inet_addr *)&sc->v4.caddr, 800 (const union nf_inet_addr *)&sc->v4.caddr,
803 sc->v4.cport, 801 sc->v4.cport,
804 (const union nf_inet_addr *)&sc->v4.vaddr, 802 (const union nf_inet_addr *)&sc->v4.vaddr,
@@ -837,7 +835,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
837 * Param: ... 835 * Param: ...
838 * timeout is in sec. 836 * timeout is in sec.
839 */ 837 */
840static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, 838static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
841 unsigned int flags, unsigned int state, 839 unsigned int flags, unsigned int state,
842 unsigned int protocol, unsigned int type, 840 unsigned int protocol, unsigned int type,
843 const union nf_inet_addr *daddr, __be16 dport, 841 const union nf_inet_addr *daddr, __be16 dport,
@@ -846,7 +844,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
846{ 844{
847 struct ip_vs_dest *dest; 845 struct ip_vs_dest *dest;
848 struct ip_vs_conn *cp; 846 struct ip_vs_conn *cp;
849 struct netns_ipvs *ipvs = net_ipvs(net);
850 847
851 if (!(flags & IP_VS_CONN_F_TEMPLATE)) { 848 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
852 cp = ip_vs_conn_in_get(param); 849 cp = ip_vs_conn_in_get(param);
@@ -904,7 +901,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
904 * with synchronization, so we can make the assumption that 901 * with synchronization, so we can make the assumption that
905 * the svc_af is the same as the dest_af 902 * the svc_af is the same as the dest_af
906 */ 903 */
907 dest = ip_vs_find_dest(net, type, type, daddr, dport, 904 dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
908 param->vaddr, param->vport, protocol, 905 param->vaddr, param->vport, protocol,
909 fwmark, flags); 906 fwmark, flags);
910 907
@@ -941,7 +938,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
941 } else { 938 } else {
942 struct ip_vs_proto_data *pd; 939 struct ip_vs_proto_data *pd;
943 940
944 pd = ip_vs_proto_data_get(net, protocol); 941 pd = ip_vs_proto_data_get(ipvs, protocol);
945 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) 942 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
946 cp->timeout = pd->timeout_table[state]; 943 cp->timeout = pd->timeout_table[state];
947 else 944 else
@@ -953,7 +950,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
953/* 950/*
954 * Process received multicast message for Version 0 951 * Process received multicast message for Version 0
955 */ 952 */
956static void ip_vs_process_message_v0(struct net *net, const char *buffer, 953static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
957 const size_t buflen) 954 const size_t buflen)
958{ 955{
959 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; 956 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
@@ -1009,14 +1006,14 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
1009 } 1006 }
1010 } 1007 }
1011 1008
1012 ip_vs_conn_fill_param(net, AF_INET, s->protocol, 1009 ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
1013 (const union nf_inet_addr *)&s->caddr, 1010 (const union nf_inet_addr *)&s->caddr,
1014 s->cport, 1011 s->cport,
1015 (const union nf_inet_addr *)&s->vaddr, 1012 (const union nf_inet_addr *)&s->vaddr,
1016 s->vport, &param); 1013 s->vport, &param);
1017 1014
1018 /* Send timeout as Zero */ 1015 /* Send timeout as Zero */
1019 ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET, 1016 ip_vs_proc_conn(ipvs, &param, flags, state, s->protocol, AF_INET,
1020 (union nf_inet_addr *)&s->daddr, s->dport, 1017 (union nf_inet_addr *)&s->daddr, s->dport,
1021 0, 0, opt); 1018 0, 0, opt);
1022 } 1019 }
@@ -1067,7 +1064,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
1067/* 1064/*
1068 * Process a Version 1 sync. connection 1065 * Process a Version 1 sync. connection
1069 */ 1066 */
1070static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) 1067static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
1071{ 1068{
1072 struct ip_vs_sync_conn_options opt; 1069 struct ip_vs_sync_conn_options opt;
1073 union ip_vs_sync_conn *s; 1070 union ip_vs_sync_conn *s;
@@ -1171,21 +1168,21 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
1171 state = 0; 1168 state = 0;
1172 } 1169 }
1173 } 1170 }
1174 if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data, 1171 if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
1175 pe_data_len, pe_name, pe_name_len)) { 1172 pe_data_len, pe_name, pe_name_len)) {
1176 retc = 50; 1173 retc = 50;
1177 goto out; 1174 goto out;
1178 } 1175 }
1179 /* If only IPv4, just silent skip IPv6 */ 1176 /* If only IPv4, just silent skip IPv6 */
1180 if (af == AF_INET) 1177 if (af == AF_INET)
1181 ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af, 1178 ip_vs_proc_conn(ipvs, &param, flags, state, s->v4.protocol, af,
1182 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, 1179 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1183 ntohl(s->v4.timeout), ntohl(s->v4.fwmark), 1180 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1184 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) 1181 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1185 ); 1182 );
1186#ifdef CONFIG_IP_VS_IPV6 1183#ifdef CONFIG_IP_VS_IPV6
1187 else 1184 else
1188 ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af, 1185 ip_vs_proc_conn(ipvs, &param, flags, state, s->v6.protocol, af,
1189 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, 1186 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1190 ntohl(s->v6.timeout), ntohl(s->v6.fwmark), 1187 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1191 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) 1188 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
@@ -1204,10 +1201,9 @@ out:
1204 * ip_vs_conn entries. 1201 * ip_vs_conn entries.
1205 * Handles Version 0 & 1 1202 * Handles Version 0 & 1
1206 */ 1203 */
1207static void ip_vs_process_message(struct net *net, __u8 *buffer, 1204static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
1208 const size_t buflen) 1205 const size_t buflen)
1209{ 1206{
1210 struct netns_ipvs *ipvs = net_ipvs(net);
1211 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; 1207 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1212 __u8 *p, *msg_end; 1208 __u8 *p, *msg_end;
1213 int i, nr_conns; 1209 int i, nr_conns;
@@ -1257,7 +1253,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
1257 return; 1253 return;
1258 } 1254 }
1259 /* Process a single sync_conn */ 1255 /* Process a single sync_conn */
1260 retc = ip_vs_proc_sync_conn(net, p, msg_end); 1256 retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
1261 if (retc < 0) { 1257 if (retc < 0) {
1262 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", 1258 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1263 retc); 1259 retc);
@@ -1268,7 +1264,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
1268 } 1264 }
1269 } else { 1265 } else {
1270 /* Old type of message */ 1266 /* Old type of message */
1271 ip_vs_process_message_v0(net, buffer, buflen); 1267 ip_vs_process_message_v0(ipvs, buffer, buflen);
1272 return; 1268 return;
1273 } 1269 }
1274} 1270}
@@ -1493,16 +1489,15 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
1493/* 1489/*
1494 * Set up sending multicast socket over UDP 1490 * Set up sending multicast socket over UDP
1495 */ 1491 */
1496static struct socket *make_send_sock(struct net *net, int id) 1492static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
1497{ 1493{
1498 struct netns_ipvs *ipvs = net_ipvs(net);
1499 /* multicast addr */ 1494 /* multicast addr */
1500 union ipvs_sockaddr mcast_addr; 1495 union ipvs_sockaddr mcast_addr;
1501 struct socket *sock; 1496 struct socket *sock;
1502 int result, salen; 1497 int result, salen;
1503 1498
1504 /* First create a socket */ 1499 /* First create a socket */
1505 result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM, 1500 result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
1506 IPPROTO_UDP, &sock); 1501 IPPROTO_UDP, &sock);
1507 if (result < 0) { 1502 if (result < 0) {
1508 pr_err("Error during creation of socket; terminating\n"); 1503 pr_err("Error during creation of socket; terminating\n");
@@ -1550,16 +1545,15 @@ error:
1550/* 1545/*
1551 * Set up receiving multicast socket over UDP 1546 * Set up receiving multicast socket over UDP
1552 */ 1547 */
1553static struct socket *make_receive_sock(struct net *net, int id) 1548static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
1554{ 1549{
1555 struct netns_ipvs *ipvs = net_ipvs(net);
1556 /* multicast addr */ 1550 /* multicast addr */
1557 union ipvs_sockaddr mcast_addr; 1551 union ipvs_sockaddr mcast_addr;
1558 struct socket *sock; 1552 struct socket *sock;
1559 int result, salen; 1553 int result, salen;
1560 1554
1561 /* First create a socket */ 1555 /* First create a socket */
1562 result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM, 1556 result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
1563 IPPROTO_UDP, &sock); 1557 IPPROTO_UDP, &sock);
1564 if (result < 0) { 1558 if (result < 0) {
1565 pr_err("Error during creation of socket; terminating\n"); 1559 pr_err("Error during creation of socket; terminating\n");
@@ -1687,7 +1681,7 @@ next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1687static int sync_thread_master(void *data) 1681static int sync_thread_master(void *data)
1688{ 1682{
1689 struct ip_vs_sync_thread_data *tinfo = data; 1683 struct ip_vs_sync_thread_data *tinfo = data;
1690 struct netns_ipvs *ipvs = net_ipvs(tinfo->net); 1684 struct netns_ipvs *ipvs = tinfo->ipvs;
1691 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; 1685 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1692 struct sock *sk = tinfo->sock->sk; 1686 struct sock *sk = tinfo->sock->sk;
1693 struct ip_vs_sync_buff *sb; 1687 struct ip_vs_sync_buff *sb;
@@ -1743,7 +1737,7 @@ done:
1743static int sync_thread_backup(void *data) 1737static int sync_thread_backup(void *data)
1744{ 1738{
1745 struct ip_vs_sync_thread_data *tinfo = data; 1739 struct ip_vs_sync_thread_data *tinfo = data;
1746 struct netns_ipvs *ipvs = net_ipvs(tinfo->net); 1740 struct netns_ipvs *ipvs = tinfo->ipvs;
1747 int len; 1741 int len;
1748 1742
1749 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " 1743 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
@@ -1765,7 +1759,7 @@ static int sync_thread_backup(void *data)
1765 break; 1759 break;
1766 } 1760 }
1767 1761
1768 ip_vs_process_message(tinfo->net, tinfo->buf, len); 1762 ip_vs_process_message(ipvs, tinfo->buf, len);
1769 } 1763 }
1770 } 1764 }
1771 1765
@@ -1778,13 +1772,12 @@ static int sync_thread_backup(void *data)
1778} 1772}
1779 1773
1780 1774
1781int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, 1775int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
1782 int state) 1776 int state)
1783{ 1777{
1784 struct ip_vs_sync_thread_data *tinfo; 1778 struct ip_vs_sync_thread_data *tinfo;
1785 struct task_struct **array = NULL, *task; 1779 struct task_struct **array = NULL, *task;
1786 struct socket *sock; 1780 struct socket *sock;
1787 struct netns_ipvs *ipvs = net_ipvs(net);
1788 struct net_device *dev; 1781 struct net_device *dev;
1789 char *name; 1782 char *name;
1790 int (*threadfn)(void *data); 1783 int (*threadfn)(void *data);
@@ -1811,7 +1804,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
1811 if (!c->mcast_ttl) 1804 if (!c->mcast_ttl)
1812 c->mcast_ttl = 1; 1805 c->mcast_ttl = 1;
1813 1806
1814 dev = __dev_get_by_name(net, c->mcast_ifn); 1807 dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
1815 if (!dev) { 1808 if (!dev) {
1816 pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); 1809 pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
1817 return -ENODEV; 1810 return -ENODEV;
@@ -1873,9 +1866,9 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
1873 tinfo = NULL; 1866 tinfo = NULL;
1874 for (id = 0; id < count; id++) { 1867 for (id = 0; id < count; id++) {
1875 if (state == IP_VS_STATE_MASTER) 1868 if (state == IP_VS_STATE_MASTER)
1876 sock = make_send_sock(net, id); 1869 sock = make_send_sock(ipvs, id);
1877 else 1870 else
1878 sock = make_receive_sock(net, id); 1871 sock = make_receive_sock(ipvs, id);
1879 if (IS_ERR(sock)) { 1872 if (IS_ERR(sock)) {
1880 result = PTR_ERR(sock); 1873 result = PTR_ERR(sock);
1881 goto outtinfo; 1874 goto outtinfo;
@@ -1883,7 +1876,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
1883 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); 1876 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1884 if (!tinfo) 1877 if (!tinfo)
1885 goto outsocket; 1878 goto outsocket;
1886 tinfo->net = net; 1879 tinfo->ipvs = ipvs;
1887 tinfo->sock = sock; 1880 tinfo->sock = sock;
1888 if (state == IP_VS_STATE_BACKUP) { 1881 if (state == IP_VS_STATE_BACKUP) {
1889 tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, 1882 tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
@@ -1947,9 +1940,8 @@ out:
1947} 1940}
1948 1941
1949 1942
1950int stop_sync_thread(struct net *net, int state) 1943int stop_sync_thread(struct netns_ipvs *ipvs, int state)
1951{ 1944{
1952 struct netns_ipvs *ipvs = net_ipvs(net);
1953 struct task_struct **array; 1945 struct task_struct **array;
1954 int id; 1946 int id;
1955 int retc = -EINVAL; 1947 int retc = -EINVAL;
@@ -2015,27 +2007,24 @@ int stop_sync_thread(struct net *net, int state)
2015/* 2007/*
2016 * Initialize data struct for each netns 2008 * Initialize data struct for each netns
2017 */ 2009 */
2018int __net_init ip_vs_sync_net_init(struct net *net) 2010int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
2019{ 2011{
2020 struct netns_ipvs *ipvs = net_ipvs(net);
2021
2022 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); 2012 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
2023 spin_lock_init(&ipvs->sync_lock); 2013 spin_lock_init(&ipvs->sync_lock);
2024 spin_lock_init(&ipvs->sync_buff_lock); 2014 spin_lock_init(&ipvs->sync_buff_lock);
2025 return 0; 2015 return 0;
2026} 2016}
2027 2017
2028void ip_vs_sync_net_cleanup(struct net *net) 2018void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
2029{ 2019{
2030 int retc; 2020 int retc;
2031 struct netns_ipvs *ipvs = net_ipvs(net);
2032 2021
2033 mutex_lock(&ipvs->sync_mutex); 2022 mutex_lock(&ipvs->sync_mutex);
2034 retc = stop_sync_thread(net, IP_VS_STATE_MASTER); 2023 retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
2035 if (retc && retc != -ESRCH) 2024 if (retc && retc != -ESRCH)
2036 pr_err("Failed to stop Master Daemon\n"); 2025 pr_err("Failed to stop Master Daemon\n");
2037 2026
2038 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); 2027 retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
2039 if (retc && retc != -ESRCH) 2028 if (retc && retc != -ESRCH)
2040 pr_err("Failed to stop Backup Daemon\n"); 2029 pr_err("Failed to stop Backup Daemon\n");
2041 mutex_unlock(&ipvs->sync_mutex); 2030 mutex_unlock(&ipvs->sync_mutex);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 258a0b0e82a2..3264cb49b333 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -212,19 +212,20 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
212 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); 212 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
213} 213}
214 214
215static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, 215static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
216 int rt_mode,
216 struct ip_vs_iphdr *ipvsh, 217 struct ip_vs_iphdr *ipvsh,
217 struct sk_buff *skb, int mtu) 218 struct sk_buff *skb, int mtu)
218{ 219{
219#ifdef CONFIG_IP_VS_IPV6 220#ifdef CONFIG_IP_VS_IPV6
220 if (skb_af == AF_INET6) { 221 if (skb_af == AF_INET6) {
221 struct net *net = dev_net(skb_dst(skb)->dev); 222 struct net *net = ipvs->net;
222 223
223 if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { 224 if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
224 if (!skb->dev) 225 if (!skb->dev)
225 skb->dev = net->loopback_dev; 226 skb->dev = net->loopback_dev;
226 /* only send ICMP too big on first fragment */ 227 /* only send ICMP too big on first fragment */
227 if (!ipvsh->fragoffs) 228 if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh))
228 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 229 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
229 IP_VS_DBG(1, "frag needed for %pI6c\n", 230 IP_VS_DBG(1, "frag needed for %pI6c\n",
230 &ipv6_hdr(skb)->saddr); 231 &ipv6_hdr(skb)->saddr);
@@ -233,8 +234,6 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
233 } else 234 } else
234#endif 235#endif
235 { 236 {
236 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
237
238 /* If we're going to tunnel the packet and pmtu discovery 237 /* If we're going to tunnel the packet and pmtu discovery
239 * is disabled, we'll just fragment it anyway 238 * is disabled, we'll just fragment it anyway
240 */ 239 */
@@ -242,7 +241,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
242 return true; 241 return true;
243 242
244 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && 243 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
245 skb->len > mtu && !skb_is_gso(skb))) { 244 skb->len > mtu && !skb_is_gso(skb) &&
245 !ip_vs_iph_icmp(ipvsh))) {
246 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 246 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
247 htonl(mtu)); 247 htonl(mtu));
248 IP_VS_DBG(1, "frag needed for %pI4\n", 248 IP_VS_DBG(1, "frag needed for %pI4\n",
@@ -256,11 +256,12 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
256 256
257/* Get route to destination or remote server */ 257/* Get route to destination or remote server */
258static int 258static int
259__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, 259__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
260 struct ip_vs_dest *dest,
260 __be32 daddr, int rt_mode, __be32 *ret_saddr, 261 __be32 daddr, int rt_mode, __be32 *ret_saddr,
261 struct ip_vs_iphdr *ipvsh) 262 struct ip_vs_iphdr *ipvsh)
262{ 263{
263 struct net *net = dev_net(skb_dst(skb)->dev); 264 struct net *net = ipvs->net;
264 struct ip_vs_dest_dst *dest_dst; 265 struct ip_vs_dest_dst *dest_dst;
265 struct rtable *rt; /* Route to the other host */ 266 struct rtable *rt; /* Route to the other host */
266 int mtu; 267 int mtu;
@@ -336,7 +337,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
336 maybe_update_pmtu(skb_af, skb, mtu); 337 maybe_update_pmtu(skb_af, skb, mtu);
337 } 338 }
338 339
339 if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) 340 if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
340 goto err_put; 341 goto err_put;
341 342
342 skb_dst_drop(skb); 343 skb_dst_drop(skb);
@@ -402,11 +403,12 @@ out_err:
402 * Get route to destination or remote server 403 * Get route to destination or remote server
403 */ 404 */
404static int 405static int
405__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, 406__ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
407 struct ip_vs_dest *dest,
406 struct in6_addr *daddr, struct in6_addr *ret_saddr, 408 struct in6_addr *daddr, struct in6_addr *ret_saddr,
407 struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) 409 struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
408{ 410{
409 struct net *net = dev_net(skb_dst(skb)->dev); 411 struct net *net = ipvs->net;
410 struct ip_vs_dest_dst *dest_dst; 412 struct ip_vs_dest_dst *dest_dst;
411 struct rt6_info *rt; /* Route to the other host */ 413 struct rt6_info *rt; /* Route to the other host */
412 struct dst_entry *dst; 414 struct dst_entry *dst;
@@ -484,7 +486,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
484 maybe_update_pmtu(skb_af, skb, mtu); 486 maybe_update_pmtu(skb_af, skb, mtu);
485 } 487 }
486 488
487 if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) 489 if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
488 goto err_put; 490 goto err_put;
489 491
490 skb_dst_drop(skb); 492 skb_dst_drop(skb);
@@ -573,8 +575,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
573 skb_forward_csum(skb); 575 skb_forward_csum(skb);
574 if (!skb->sk) 576 if (!skb->sk)
575 skb_sender_cpu_clear(skb); 577 skb_sender_cpu_clear(skb);
576 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, 578 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
577 NULL, skb_dst(skb)->dev, dst_output_sk); 579 NULL, skb_dst(skb)->dev, dst_output);
578 } else 580 } else
579 ret = NF_ACCEPT; 581 ret = NF_ACCEPT;
580 582
@@ -595,8 +597,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
595 skb_forward_csum(skb); 597 skb_forward_csum(skb);
596 if (!skb->sk) 598 if (!skb->sk)
597 skb_sender_cpu_clear(skb); 599 skb_sender_cpu_clear(skb);
598 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, 600 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
599 NULL, skb_dst(skb)->dev, dst_output_sk); 601 NULL, skb_dst(skb)->dev, dst_output);
600 } else 602 } else
601 ret = NF_ACCEPT; 603 ret = NF_ACCEPT;
602 return ret; 604 return ret;
@@ -629,7 +631,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
629 EnterFunction(10); 631 EnterFunction(10);
630 632
631 rcu_read_lock(); 633 rcu_read_lock();
632 if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr, 634 if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
633 IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) 635 IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
634 goto tx_error; 636 goto tx_error;
635 637
@@ -656,10 +658,13 @@ int
656ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 658ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
657 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 659 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
658{ 660{
661 struct ipv6hdr *iph = ipv6_hdr(skb);
662
659 EnterFunction(10); 663 EnterFunction(10);
660 664
661 rcu_read_lock(); 665 rcu_read_lock();
662 if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL, 666 if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
667 &iph->daddr, NULL,
663 ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) 668 ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
664 goto tx_error; 669 goto tx_error;
665 670
@@ -706,7 +711,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
706 } 711 }
707 712
708 was_input = rt_is_input_route(skb_rtable(skb)); 713 was_input = rt_is_input_route(skb_rtable(skb));
709 local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, 714 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
710 IP_VS_RT_MODE_LOCAL | 715 IP_VS_RT_MODE_LOCAL |
711 IP_VS_RT_MODE_NON_LOCAL | 716 IP_VS_RT_MODE_NON_LOCAL |
712 IP_VS_RT_MODE_RDR, NULL, ipvsh); 717 IP_VS_RT_MODE_RDR, NULL, ipvsh);
@@ -723,7 +728,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
723 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 728 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
724 729
725 if (ct && !nf_ct_is_untracked(ct)) { 730 if (ct && !nf_ct_is_untracked(ct)) {
726 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, 731 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
727 "ip_vs_nat_xmit(): " 732 "ip_vs_nat_xmit(): "
728 "stopping DNAT to local address"); 733 "stopping DNAT to local address");
729 goto tx_error; 734 goto tx_error;
@@ -733,8 +738,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
733 738
734 /* From world but DNAT to loopback address? */ 739 /* From world but DNAT to loopback address? */
735 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { 740 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
736 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 741 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off,
737 "stopping DNAT to loopback address"); 742 "ip_vs_nat_xmit(): stopping DNAT to loopback "
743 "address");
738 goto tx_error; 744 goto tx_error;
739 } 745 }
740 746
@@ -751,7 +757,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
751 ip_hdr(skb)->daddr = cp->daddr.ip; 757 ip_hdr(skb)->daddr = cp->daddr.ip;
752 ip_send_check(ip_hdr(skb)); 758 ip_send_check(ip_hdr(skb));
753 759
754 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); 760 IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT");
755 761
756 /* FIXME: when application helper enlarges the packet and the length 762 /* FIXME: when application helper enlarges the packet and the length
757 is larger than the MTU of outgoing device, there will be still 763 is larger than the MTU of outgoing device, there will be still
@@ -794,7 +800,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
794 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 800 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
795 } 801 }
796 802
797 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 803 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
804 &cp->daddr.in6,
798 NULL, ipvsh, 0, 805 NULL, ipvsh, 0,
799 IP_VS_RT_MODE_LOCAL | 806 IP_VS_RT_MODE_LOCAL |
800 IP_VS_RT_MODE_NON_LOCAL | 807 IP_VS_RT_MODE_NON_LOCAL |
@@ -812,7 +819,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
812 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 819 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
813 820
814 if (ct && !nf_ct_is_untracked(ct)) { 821 if (ct && !nf_ct_is_untracked(ct)) {
815 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, 822 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off,
816 "ip_vs_nat_xmit_v6(): " 823 "ip_vs_nat_xmit_v6(): "
817 "stopping DNAT to local address"); 824 "stopping DNAT to local address");
818 goto tx_error; 825 goto tx_error;
@@ -823,7 +830,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
823 /* From world but DNAT to loopback address? */ 830 /* From world but DNAT to loopback address? */
824 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 831 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
825 ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { 832 ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
826 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, 833 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off,
827 "ip_vs_nat_xmit_v6(): " 834 "ip_vs_nat_xmit_v6(): "
828 "stopping DNAT to loopback address"); 835 "stopping DNAT to loopback address");
829 goto tx_error; 836 goto tx_error;
@@ -841,7 +848,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
841 goto tx_error; 848 goto tx_error;
842 ipv6_hdr(skb)->daddr = cp->daddr.in6; 849 ipv6_hdr(skb)->daddr = cp->daddr.in6;
843 850
844 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); 851 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT");
845 852
846 /* FIXME: when application helper enlarges the packet and the length 853 /* FIXME: when application helper enlarges the packet and the length
847 is larger than the MTU of outgoing device, there will be still 854 is larger than the MTU of outgoing device, there will be still
@@ -967,8 +974,8 @@ int
967ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 974ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
968 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 975 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
969{ 976{
970 struct net *net = skb_net(skb); 977 struct netns_ipvs *ipvs = cp->ipvs;
971 struct netns_ipvs *ipvs = net_ipvs(net); 978 struct net *net = ipvs->net;
972 struct rtable *rt; /* Route to the other host */ 979 struct rtable *rt; /* Route to the other host */
973 __be32 saddr; /* Source for tunnel */ 980 __be32 saddr; /* Source for tunnel */
974 struct net_device *tdev; /* Device to other host */ 981 struct net_device *tdev; /* Device to other host */
@@ -984,7 +991,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
984 EnterFunction(10); 991 EnterFunction(10);
985 992
986 rcu_read_lock(); 993 rcu_read_lock();
987 local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, 994 local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
988 IP_VS_RT_MODE_LOCAL | 995 IP_VS_RT_MODE_LOCAL |
989 IP_VS_RT_MODE_NON_LOCAL | 996 IP_VS_RT_MODE_NON_LOCAL |
990 IP_VS_RT_MODE_CONNECT | 997 IP_VS_RT_MODE_CONNECT |
@@ -1042,7 +1049,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1042 1049
1043 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 1050 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
1044 if (ret == NF_ACCEPT) 1051 if (ret == NF_ACCEPT)
1045 ip_local_out(skb); 1052 ip_local_out(net, skb->sk, skb);
1046 else if (ret == NF_DROP) 1053 else if (ret == NF_DROP)
1047 kfree_skb(skb); 1054 kfree_skb(skb);
1048 rcu_read_unlock(); 1055 rcu_read_unlock();
@@ -1078,7 +1085,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1078 EnterFunction(10); 1085 EnterFunction(10);
1079 1086
1080 rcu_read_lock(); 1087 rcu_read_lock();
1081 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 1088 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
1089 &cp->daddr.in6,
1082 &saddr, ipvsh, 1, 1090 &saddr, ipvsh, 1,
1083 IP_VS_RT_MODE_LOCAL | 1091 IP_VS_RT_MODE_LOCAL |
1084 IP_VS_RT_MODE_NON_LOCAL | 1092 IP_VS_RT_MODE_NON_LOCAL |
@@ -1133,7 +1141,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1133 1141
1134 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 1142 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
1135 if (ret == NF_ACCEPT) 1143 if (ret == NF_ACCEPT)
1136 ip6_local_out(skb); 1144 ip6_local_out(cp->ipvs->net, skb->sk, skb);
1137 else if (ret == NF_DROP) 1145 else if (ret == NF_DROP)
1138 kfree_skb(skb); 1146 kfree_skb(skb);
1139 rcu_read_unlock(); 1147 rcu_read_unlock();
@@ -1165,7 +1173,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1165 EnterFunction(10); 1173 EnterFunction(10);
1166 1174
1167 rcu_read_lock(); 1175 rcu_read_lock();
1168 local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, 1176 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
1169 IP_VS_RT_MODE_LOCAL | 1177 IP_VS_RT_MODE_LOCAL |
1170 IP_VS_RT_MODE_NON_LOCAL | 1178 IP_VS_RT_MODE_NON_LOCAL |
1171 IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); 1179 IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
@@ -1204,7 +1212,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1204 EnterFunction(10); 1212 EnterFunction(10);
1205 1213
1206 rcu_read_lock(); 1214 rcu_read_lock();
1207 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 1215 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
1216 &cp->daddr.in6,
1208 NULL, ipvsh, 0, 1217 NULL, ipvsh, 0,
1209 IP_VS_RT_MODE_LOCAL | 1218 IP_VS_RT_MODE_LOCAL |
1210 IP_VS_RT_MODE_NON_LOCAL | 1219 IP_VS_RT_MODE_NON_LOCAL |
@@ -1273,7 +1282,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1273 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1282 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1274 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1283 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1275 rcu_read_lock(); 1284 rcu_read_lock();
1276 local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, 1285 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
1277 NULL, iph); 1286 NULL, iph);
1278 if (local < 0) 1287 if (local < 0)
1279 goto tx_error; 1288 goto tx_error;
@@ -1365,8 +1374,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1365 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1374 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1366 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1375 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1367 rcu_read_lock(); 1376 rcu_read_lock();
1368 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 1377 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
1369 NULL, ipvsh, 0, rt_mode); 1378 &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
1370 if (local < 0) 1379 if (local < 0)
1371 goto tx_error; 1380 goto tx_error;
1372 rt = (struct rt6_info *) skb_dst(skb); 1381 rt = (struct rt6_info *) skb_dst(skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c09d6c7198f6..3cb3cb831591 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -168,6 +168,7 @@ nf_ct_get_tuple(const struct sk_buff *skb,
168 unsigned int dataoff, 168 unsigned int dataoff,
169 u_int16_t l3num, 169 u_int16_t l3num,
170 u_int8_t protonum, 170 u_int8_t protonum,
171 struct net *net,
171 struct nf_conntrack_tuple *tuple, 172 struct nf_conntrack_tuple *tuple,
172 const struct nf_conntrack_l3proto *l3proto, 173 const struct nf_conntrack_l3proto *l3proto,
173 const struct nf_conntrack_l4proto *l4proto) 174 const struct nf_conntrack_l4proto *l4proto)
@@ -181,12 +182,13 @@ nf_ct_get_tuple(const struct sk_buff *skb,
181 tuple->dst.protonum = protonum; 182 tuple->dst.protonum = protonum;
182 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 183 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
183 184
184 return l4proto->pkt_to_tuple(skb, dataoff, tuple); 185 return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
185} 186}
186EXPORT_SYMBOL_GPL(nf_ct_get_tuple); 187EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
187 188
188bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, 189bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
189 u_int16_t l3num, struct nf_conntrack_tuple *tuple) 190 u_int16_t l3num,
191 struct net *net, struct nf_conntrack_tuple *tuple)
190{ 192{
191 struct nf_conntrack_l3proto *l3proto; 193 struct nf_conntrack_l3proto *l3proto;
192 struct nf_conntrack_l4proto *l4proto; 194 struct nf_conntrack_l4proto *l4proto;
@@ -205,7 +207,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
205 207
206 l4proto = __nf_ct_l4proto_find(l3num, protonum); 208 l4proto = __nf_ct_l4proto_find(l3num, protonum);
207 209
208 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, 210 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
209 l3proto, l4proto); 211 l3proto, l4proto);
210 212
211 rcu_read_unlock(); 213 rcu_read_unlock();
@@ -938,10 +940,13 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
938 } 940 }
939 941
940 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; 942 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
941 if (timeout_ext) 943 if (timeout_ext) {
942 timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); 944 timeouts = nf_ct_timeout_data(timeout_ext);
943 else 945 if (unlikely(!timeouts))
946 timeouts = l4proto->get_timeouts(net);
947 } else {
944 timeouts = l4proto->get_timeouts(net); 948 timeouts = l4proto->get_timeouts(net);
949 }
945 950
946 if (!l4proto->new(ct, skb, dataoff, timeouts)) { 951 if (!l4proto->new(ct, skb, dataoff, timeouts)) {
947 nf_conntrack_free(ct); 952 nf_conntrack_free(ct);
@@ -950,7 +955,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
950 } 955 }
951 956
952 if (timeout_ext) 957 if (timeout_ext)
953 nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC); 958 nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
959 GFP_ATOMIC);
954 960
955 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 961 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
956 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); 962 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
@@ -1029,7 +1035,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
1029 u32 hash; 1035 u32 hash;
1030 1036
1031 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 1037 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
1032 dataoff, l3num, protonum, &tuple, l3proto, 1038 dataoff, l3num, protonum, net, &tuple, l3proto,
1033 l4proto)) { 1039 l4proto)) {
1034 pr_debug("resolve_normal_ct: Can't get tuple\n"); 1040 pr_debug("resolve_normal_ct: Can't get tuple\n");
1035 return NULL; 1041 return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 94a66541e0b7..9f5272968abb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2133,9 +2133,9 @@ ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
2133 struct nf_conntrack_tuple *tuple, 2133 struct nf_conntrack_tuple *tuple,
2134 struct nf_conntrack_tuple *mask); 2134 struct nf_conntrack_tuple *mask);
2135 2135
2136#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT 2136#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
2137static size_t 2137static size_t
2138ctnetlink_nfqueue_build_size(const struct nf_conn *ct) 2138ctnetlink_glue_build_size(const struct nf_conn *ct)
2139{ 2139{
2140 return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ 2140 return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
2141 + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ 2141 + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
@@ -2162,8 +2162,19 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
2162 ; 2162 ;
2163} 2163}
2164 2164
2165static int 2165static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb,
2166ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) 2166 enum ip_conntrack_info *ctinfo)
2167{
2168 struct nf_conn *ct;
2169
2170 ct = nf_ct_get(skb, ctinfo);
2171 if (ct && nf_ct_is_untracked(ct))
2172 ct = NULL;
2173
2174 return ct;
2175}
2176
2177static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
2167{ 2178{
2168 const struct nf_conntrack_zone *zone; 2179 const struct nf_conntrack_zone *zone;
2169 struct nlattr *nest_parms; 2180 struct nlattr *nest_parms;
@@ -2236,7 +2247,32 @@ nla_put_failure:
2236} 2247}
2237 2248
2238static int 2249static int
2239ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) 2250ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct,
2251 enum ip_conntrack_info ctinfo,
2252 u_int16_t ct_attr, u_int16_t ct_info_attr)
2253{
2254 struct nlattr *nest_parms;
2255
2256 nest_parms = nla_nest_start(skb, ct_attr | NLA_F_NESTED);
2257 if (!nest_parms)
2258 goto nla_put_failure;
2259
2260 if (__ctnetlink_glue_build(skb, ct) < 0)
2261 goto nla_put_failure;
2262
2263 nla_nest_end(skb, nest_parms);
2264
2265 if (nla_put_be32(skb, ct_info_attr, htonl(ctinfo)))
2266 goto nla_put_failure;
2267
2268 return 0;
2269
2270nla_put_failure:
2271 return -ENOSPC;
2272}
2273
2274static int
2275ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
2240{ 2276{
2241 int err; 2277 int err;
2242 2278
@@ -2276,7 +2312,7 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
2276} 2312}
2277 2313
2278static int 2314static int
2279ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) 2315ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
2280{ 2316{
2281 struct nlattr *cda[CTA_MAX+1]; 2317 struct nlattr *cda[CTA_MAX+1];
2282 int ret; 2318 int ret;
@@ -2286,16 +2322,16 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
2286 return ret; 2322 return ret;
2287 2323
2288 spin_lock_bh(&nf_conntrack_expect_lock); 2324 spin_lock_bh(&nf_conntrack_expect_lock);
2289 ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); 2325 ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
2290 spin_unlock_bh(&nf_conntrack_expect_lock); 2326 spin_unlock_bh(&nf_conntrack_expect_lock);
2291 2327
2292 return ret; 2328 return ret;
2293} 2329}
2294 2330
2295static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda, 2331static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
2296 const struct nf_conn *ct, 2332 const struct nf_conn *ct,
2297 struct nf_conntrack_tuple *tuple, 2333 struct nf_conntrack_tuple *tuple,
2298 struct nf_conntrack_tuple *mask) 2334 struct nf_conntrack_tuple *mask)
2299{ 2335{
2300 int err; 2336 int err;
2301 2337
@@ -2309,8 +2345,8 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
2309} 2345}
2310 2346
2311static int 2347static int
2312ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, 2348ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
2313 u32 portid, u32 report) 2349 u32 portid, u32 report)
2314{ 2350{
2315 struct nlattr *cda[CTA_EXPECT_MAX+1]; 2351 struct nlattr *cda[CTA_EXPECT_MAX+1];
2316 struct nf_conntrack_tuple tuple, mask; 2352 struct nf_conntrack_tuple tuple, mask;
@@ -2322,8 +2358,8 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
2322 if (err < 0) 2358 if (err < 0)
2323 return err; 2359 return err;
2324 2360
2325 err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda, 2361 err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda,
2326 ct, &tuple, &mask); 2362 ct, &tuple, &mask);
2327 if (err < 0) 2363 if (err < 0)
2328 return err; 2364 return err;
2329 2365
@@ -2350,14 +2386,24 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
2350 return 0; 2386 return 0;
2351} 2387}
2352 2388
2353static struct nfq_ct_hook ctnetlink_nfqueue_hook = { 2389static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
2354 .build_size = ctnetlink_nfqueue_build_size, 2390 enum ip_conntrack_info ctinfo, int diff)
2355 .build = ctnetlink_nfqueue_build, 2391{
2356 .parse = ctnetlink_nfqueue_parse, 2392 if (!(ct->status & IPS_NAT_MASK))
2357 .attach_expect = ctnetlink_nfqueue_attach_expect, 2393 return;
2358 .seq_adjust = nf_ct_tcp_seqadj_set, 2394
2395 nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
2396}
2397
2398static struct nfnl_ct_hook ctnetlink_glue_hook = {
2399 .get_ct = ctnetlink_glue_get_ct,
2400 .build_size = ctnetlink_glue_build_size,
2401 .build = ctnetlink_glue_build,
2402 .parse = ctnetlink_glue_parse,
2403 .attach_expect = ctnetlink_glue_attach_expect,
2404 .seq_adjust = ctnetlink_glue_seqadj,
2359}; 2405};
2360#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ 2406#endif /* CONFIG_NETFILTER_NETLINK_GLUE_CT */
2361 2407
2362/*********************************************************************** 2408/***********************************************************************
2363 * EXPECT 2409 * EXPECT
@@ -3341,9 +3387,9 @@ static int __init ctnetlink_init(void)
3341 pr_err("ctnetlink_init: cannot register pernet operations\n"); 3387 pr_err("ctnetlink_init: cannot register pernet operations\n");
3342 goto err_unreg_exp_subsys; 3388 goto err_unreg_exp_subsys;
3343 } 3389 }
3344#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT 3390#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
3345 /* setup interaction between nf_queue and nf_conntrack_netlink. */ 3391 /* setup interaction between nf_queue and nf_conntrack_netlink. */
3346 RCU_INIT_POINTER(nfq_ct_hook, &ctnetlink_nfqueue_hook); 3392 RCU_INIT_POINTER(nfnl_ct_hook, &ctnetlink_glue_hook);
3347#endif 3393#endif
3348 return 0; 3394 return 0;
3349 3395
@@ -3362,8 +3408,8 @@ static void __exit ctnetlink_exit(void)
3362 unregister_pernet_subsys(&ctnetlink_net_ops); 3408 unregister_pernet_subsys(&ctnetlink_net_ops);
3363 nfnetlink_subsys_unregister(&ctnl_exp_subsys); 3409 nfnetlink_subsys_unregister(&ctnl_exp_subsys);
3364 nfnetlink_subsys_unregister(&ctnl_subsys); 3410 nfnetlink_subsys_unregister(&ctnl_subsys);
3365#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT 3411#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
3366 RCU_INIT_POINTER(nfq_ct_hook, NULL); 3412 RCU_INIT_POINTER(nfnl_ct_hook, NULL);
3367#endif 3413#endif
3368} 3414}
3369 3415
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 6dd995c7c72b..fce1b1cca32d 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -398,7 +398,7 @@ static inline struct dccp_net *dccp_pernet(struct net *net)
398} 398}
399 399
400static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 400static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
401 struct nf_conntrack_tuple *tuple) 401 struct net *net, struct nf_conntrack_tuple *tuple)
402{ 402{
403 struct dccp_hdr _hdr, *dh; 403 struct dccp_hdr _hdr, *dh;
404 404
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 2281be419a74..86dc752e5349 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -45,7 +45,7 @@ static inline struct nf_generic_net *generic_pernet(struct net *net)
45 45
46static bool generic_pkt_to_tuple(const struct sk_buff *skb, 46static bool generic_pkt_to_tuple(const struct sk_buff *skb,
47 unsigned int dataoff, 47 unsigned int dataoff,
48 struct nf_conntrack_tuple *tuple) 48 struct net *net, struct nf_conntrack_tuple *tuple)
49{ 49{
50 tuple->src.u.all = 0; 50 tuple->src.u.all = 0;
51 tuple->dst.u.all = 0; 51 tuple->dst.u.all = 0;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 7648674f29c3..a96451a7af20 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -190,9 +190,8 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
190 190
191/* gre hdr info to tuple */ 191/* gre hdr info to tuple */
192static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 192static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
193 struct nf_conntrack_tuple *tuple) 193 struct net *net, struct nf_conntrack_tuple *tuple)
194{ 194{
195 struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev);
196 const struct gre_hdr_pptp *pgrehdr; 195 const struct gre_hdr_pptp *pgrehdr;
197 struct gre_hdr_pptp _pgrehdr; 196 struct gre_hdr_pptp _pgrehdr;
198 __be16 srckey; 197 __be16 srckey;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 67197731eb68..9578a7c371ef 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -156,7 +156,7 @@ static inline struct sctp_net *sctp_pernet(struct net *net)
156} 156}
157 157
158static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 158static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
159 struct nf_conntrack_tuple *tuple) 159 struct net *net, struct nf_conntrack_tuple *tuple)
160{ 160{
161 const struct sctphdr *hp; 161 const struct sctphdr *hp;
162 struct sctphdr _hdr; 162 struct sctphdr _hdr;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70383de72054..278f3b9356ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -277,7 +277,7 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net)
277} 277}
278 278
279static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 279static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
280 struct nf_conntrack_tuple *tuple) 280 struct net *net, struct nf_conntrack_tuple *tuple)
281{ 281{
282 const struct tcphdr *hp; 282 const struct tcphdr *hp;
283 struct tcphdr _hdr; 283 struct tcphdr _hdr;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 6957281ffee5..478f92f834b6 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -38,6 +38,7 @@ static inline struct nf_udp_net *udp_pernet(struct net *net)
38 38
39static bool udp_pkt_to_tuple(const struct sk_buff *skb, 39static bool udp_pkt_to_tuple(const struct sk_buff *skb,
40 unsigned int dataoff, 40 unsigned int dataoff,
41 struct net *net,
41 struct nf_conntrack_tuple *tuple) 42 struct nf_conntrack_tuple *tuple)
42{ 43{
43 const struct udphdr *hp; 44 const struct udphdr *hp;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index c5903d1649f9..1ac8ee13a873 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -48,6 +48,7 @@ static inline struct udplite_net *udplite_pernet(struct net *net)
48 48
49static bool udplite_pkt_to_tuple(const struct sk_buff *skb, 49static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
50 unsigned int dataoff, 50 unsigned int dataoff,
51 struct net *net,
51 struct nf_conntrack_tuple *tuple) 52 struct nf_conntrack_tuple *tuple)
52{ 53{
53 const struct udphdr *hp; 54 const struct udphdr *hp;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5113dfd39df9..06a9f45771ab 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -83,7 +83,7 @@ out:
83 rcu_read_unlock(); 83 rcu_read_unlock();
84} 84}
85 85
86int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) 86int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
87{ 87{
88 struct flowi fl; 88 struct flowi fl;
89 unsigned int hh_len; 89 unsigned int hh_len;
@@ -99,7 +99,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
99 dst = ((struct xfrm_dst *)dst)->route; 99 dst = ((struct xfrm_dst *)dst)->route;
100 dst_hold(dst); 100 dst_hold(dst);
101 101
102 dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); 102 dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
103 if (IS_ERR(dst)) 103 if (IS_ERR(dst))
104 return PTR_ERR(dst); 104 return PTR_ERR(dst);
105 105
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 97b75f9bfbcd..d43869879fcf 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -55,7 +55,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
55 55
56 rcu_read_lock(); 56 rcu_read_lock();
57 indev = __in_dev_get_rcu(skb->dev); 57 indev = __in_dev_get_rcu(skb->dev);
58 if (indev != NULL) { 58 if (indev && indev->ifa_list) {
59 ifa = indev->ifa_list; 59 ifa = indev->ifa_list;
60 newdst = ifa->ifa_local; 60 newdst = ifa->ifa_local;
61 } 61 }
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 96777f9a9350..5baa8e24e6ac 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -69,19 +69,14 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
69 dev_put(physdev); 69 dev_put(physdev);
70 } 70 }
71#endif 71#endif
72 /* Drop reference to owner of hook which queued us. */
73 module_put(entry->elem->owner);
74} 72}
75EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); 73EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
76 74
77/* Bump dev refs so they don't vanish while packet is out */ 75/* Bump dev refs so they don't vanish while packet is out */
78bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) 76void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
79{ 77{
80 struct nf_hook_state *state = &entry->state; 78 struct nf_hook_state *state = &entry->state;
81 79
82 if (!try_module_get(entry->elem->owner))
83 return false;
84
85 if (state->in) 80 if (state->in)
86 dev_hold(state->in); 81 dev_hold(state->in);
87 if (state->out) 82 if (state->out)
@@ -100,8 +95,6 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
100 dev_hold(physdev); 95 dev_hold(physdev);
101 } 96 }
102#endif 97#endif
103
104 return true;
105} 98}
106EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); 99EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
107 100
@@ -131,22 +124,20 @@ int nf_queue(struct sk_buff *skb,
131 const struct nf_queue_handler *qh; 124 const struct nf_queue_handler *qh;
132 125
133 /* QUEUE == DROP if no one is waiting, to be safe. */ 126 /* QUEUE == DROP if no one is waiting, to be safe. */
134 rcu_read_lock();
135
136 qh = rcu_dereference(queue_handler); 127 qh = rcu_dereference(queue_handler);
137 if (!qh) { 128 if (!qh) {
138 status = -ESRCH; 129 status = -ESRCH;
139 goto err_unlock; 130 goto err;
140 } 131 }
141 132
142 afinfo = nf_get_afinfo(state->pf); 133 afinfo = nf_get_afinfo(state->pf);
143 if (!afinfo) 134 if (!afinfo)
144 goto err_unlock; 135 goto err;
145 136
146 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); 137 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
147 if (!entry) { 138 if (!entry) {
148 status = -ENOMEM; 139 status = -ENOMEM;
149 goto err_unlock; 140 goto err;
150 } 141 }
151 142
152 *entry = (struct nf_queue_entry) { 143 *entry = (struct nf_queue_entry) {
@@ -156,16 +147,11 @@ int nf_queue(struct sk_buff *skb,
156 .size = sizeof(*entry) + afinfo->route_key_size, 147 .size = sizeof(*entry) + afinfo->route_key_size,
157 }; 148 };
158 149
159 if (!nf_queue_entry_get_refs(entry)) { 150 nf_queue_entry_get_refs(entry);
160 status = -ECANCELED;
161 goto err_unlock;
162 }
163 skb_dst_force(skb); 151 skb_dst_force(skb);
164 afinfo->saveroute(skb, entry); 152 afinfo->saveroute(skb, entry);
165 status = qh->outfn(entry, queuenum); 153 status = qh->outfn(entry, queuenum);
166 154
167 rcu_read_unlock();
168
169 if (status < 0) { 155 if (status < 0) {
170 nf_queue_entry_release_refs(entry); 156 nf_queue_entry_release_refs(entry);
171 goto err; 157 goto err;
@@ -173,8 +159,6 @@ int nf_queue(struct sk_buff *skb,
173 159
174 return 0; 160 return 0;
175 161
176err_unlock:
177 rcu_read_unlock();
178err: 162err:
179 kfree(entry); 163 kfree(entry);
180 return status; 164 return status;
@@ -187,19 +171,15 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
187 const struct nf_afinfo *afinfo; 171 const struct nf_afinfo *afinfo;
188 int err; 172 int err;
189 173
190 rcu_read_lock();
191
192 nf_queue_entry_release_refs(entry); 174 nf_queue_entry_release_refs(entry);
193 175
194 /* Continue traversal iff userspace said ok... */ 176 /* Continue traversal iff userspace said ok... */
195 if (verdict == NF_REPEAT) { 177 if (verdict == NF_REPEAT)
196 elem = list_entry(elem->list.prev, struct nf_hook_ops, list); 178 verdict = elem->hook(elem->priv, skb, &entry->state);
197 verdict = NF_ACCEPT;
198 }
199 179
200 if (verdict == NF_ACCEPT) { 180 if (verdict == NF_ACCEPT) {
201 afinfo = nf_get_afinfo(entry->state.pf); 181 afinfo = nf_get_afinfo(entry->state.pf);
202 if (!afinfo || afinfo->reroute(skb, entry) < 0) 182 if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
203 verdict = NF_DROP; 183 verdict = NF_DROP;
204 } 184 }
205 185
@@ -215,15 +195,13 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
215 case NF_ACCEPT: 195 case NF_ACCEPT:
216 case NF_STOP: 196 case NF_STOP:
217 local_bh_disable(); 197 local_bh_disable();
218 entry->state.okfn(entry->state.sk, skb); 198 entry->state.okfn(entry->state.net, entry->state.sk, skb);
219 local_bh_enable(); 199 local_bh_enable();
220 break; 200 break;
221 case NF_QUEUE: 201 case NF_QUEUE:
222 err = nf_queue(skb, elem, &entry->state, 202 err = nf_queue(skb, elem, &entry->state,
223 verdict >> NF_VERDICT_QBITS); 203 verdict >> NF_VERDICT_QBITS);
224 if (err < 0) { 204 if (err < 0) {
225 if (err == -ECANCELED)
226 goto next_hook;
227 if (err == -ESRCH && 205 if (err == -ESRCH &&
228 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) 206 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
229 goto next_hook; 207 goto next_hook;
@@ -235,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
235 default: 213 default:
236 kfree_skb(skb); 214 kfree_skb(skb);
237 } 215 }
238 rcu_read_unlock(); 216
239 kfree(entry); 217 kfree(entry);
240} 218}
241EXPORT_SYMBOL(nf_reinject); 219EXPORT_SYMBOL(nf_reinject);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4a41eb92bcc0..93cc4737018f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1433,7 +1433,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
1433 for (i = 0; i < afi->nops; i++) { 1433 for (i = 0; i < afi->nops; i++) {
1434 ops = &basechain->ops[i]; 1434 ops = &basechain->ops[i];
1435 ops->pf = family; 1435 ops->pf = family;
1436 ops->owner = afi->owner;
1437 ops->hooknum = hooknum; 1436 ops->hooknum = hooknum;
1438 ops->priority = priority; 1437 ops->priority = priority;
1439 ops->priv = chain; 1438 ops->priv = chain;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 05d0b03530f6..f3695a497408 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -48,9 +48,7 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt,
48 const struct nft_chain *chain, 48 const struct nft_chain *chain,
49 int rulenum, enum nft_trace type) 49 int rulenum, enum nft_trace type)
50{ 50{
51 struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); 51 nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
52
53 nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
54 pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", 52 pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
55 chain->table->name, chain->name, comments[type], 53 chain->table->name, chain->name, comments[type],
56 rulenum); 54 rulenum);
@@ -111,10 +109,10 @@ struct nft_jumpstack {
111}; 109};
112 110
113unsigned int 111unsigned int
114nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) 112nft_do_chain(struct nft_pktinfo *pkt, void *priv)
115{ 113{
116 const struct nft_chain *chain = ops->priv, *basechain = chain; 114 const struct nft_chain *chain = priv, *basechain = chain;
117 const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); 115 const struct net *net = pkt->net;
118 const struct nft_rule *rule; 116 const struct nft_rule *rule;
119 const struct nft_expr *expr, *last; 117 const struct nft_expr *expr, *last;
120 struct nft_regs regs; 118 struct nft_regs regs;
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 2cae4d4a03b7..7b9c053ba750 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -17,13 +17,13 @@
17 17
18static inline void 18static inline void
19nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, 19nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
20 const struct nf_hook_ops *ops, struct sk_buff *skb, 20 struct sk_buff *skb,
21 const struct nf_hook_state *state) 21 const struct nf_hook_state *state)
22{ 22{
23 struct iphdr *iph, _iph; 23 struct iphdr *iph, _iph;
24 u32 len, thoff; 24 u32 len, thoff;
25 25
26 nft_set_pktinfo(pkt, ops, skb, state); 26 nft_set_pktinfo(pkt, skb, state);
27 27
28 iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), 28 iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
29 &_iph); 29 &_iph);
@@ -48,7 +48,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
48 48
49static inline void 49static inline void
50__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, 50__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
51 const struct nf_hook_ops *ops,
52 struct sk_buff *skb, 51 struct sk_buff *skb,
53 const struct nf_hook_state *state) 52 const struct nf_hook_state *state)
54{ 53{
@@ -82,33 +81,32 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
82} 81}
83 82
84static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, 83static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
85 const struct nf_hook_ops *ops,
86 struct sk_buff *skb, 84 struct sk_buff *skb,
87 const struct nf_hook_state *state) 85 const struct nf_hook_state *state)
88{ 86{
89 nft_set_pktinfo(pkt, ops, skb, state); 87 nft_set_pktinfo(pkt, skb, state);
90 __nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state); 88 __nft_netdev_set_pktinfo_ipv6(pkt, skb, state);
91} 89}
92 90
93static unsigned int 91static unsigned int
94nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb, 92nft_do_chain_netdev(void *priv, struct sk_buff *skb,
95 const struct nf_hook_state *state) 93 const struct nf_hook_state *state)
96{ 94{
97 struct nft_pktinfo pkt; 95 struct nft_pktinfo pkt;
98 96
99 switch (eth_hdr(skb)->h_proto) { 97 switch (eth_hdr(skb)->h_proto) {
100 case htons(ETH_P_IP): 98 case htons(ETH_P_IP):
101 nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state); 99 nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
102 break; 100 break;
103 case htons(ETH_P_IPV6): 101 case htons(ETH_P_IPV6):
104 nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state); 102 nft_netdev_set_pktinfo_ipv6(&pkt, skb, state);
105 break; 103 break;
106 default: 104 default:
107 nft_set_pktinfo(&pkt, ops, skb, state); 105 nft_set_pktinfo(&pkt, skb, state);
108 break; 106 break;
109 } 107 }
110 108
111 return nft_do_chain(&pkt, ops); 109 return nft_do_chain(&pkt, priv);
112} 110}
113 111
114static struct nft_af_info nft_af_netdev __read_mostly = { 112static struct nft_af_info nft_af_netdev __read_mostly = {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 70277b11f742..46453ab318db 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -64,7 +64,7 @@ void nfnl_unlock(__u8 subsys_id)
64EXPORT_SYMBOL_GPL(nfnl_unlock); 64EXPORT_SYMBOL_GPL(nfnl_unlock);
65 65
66#ifdef CONFIG_PROVE_LOCKING 66#ifdef CONFIG_PROVE_LOCKING
67int lockdep_nfnl_is_held(u8 subsys_id) 67bool lockdep_nfnl_is_held(u8 subsys_id)
68{ 68{
69 return lockdep_is_held(&table[subsys_id].mutex); 69 return lockdep_is_held(&table[subsys_id].mutex);
70} 70}
@@ -492,7 +492,7 @@ static int nfnetlink_bind(struct net *net, int group)
492 type = nfnl_group2type[group]; 492 type = nfnl_group2type[group];
493 493
494 rcu_read_lock(); 494 rcu_read_lock();
495 ss = nfnetlink_get_subsys(type); 495 ss = nfnetlink_get_subsys(type << 8);
496 rcu_read_unlock(); 496 rcu_read_unlock();
497 if (!ss) 497 if (!ss)
498 request_module("nfnetlink-subsys-%d", type); 498 request_module("nfnetlink-subsys-%d", type);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 476accd17145..c7a2d0e1c462 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -291,6 +291,34 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
291 return ret; 291 return ret;
292} 292}
293 293
294static void untimeout(struct nf_conntrack_tuple_hash *i,
295 struct ctnl_timeout *timeout)
296{
297 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
298 struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
299
300 if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
301 RCU_INIT_POINTER(timeout_ext->timeout, NULL);
302}
303
304static void ctnl_untimeout(struct ctnl_timeout *timeout)
305{
306 struct nf_conntrack_tuple_hash *h;
307 const struct hlist_nulls_node *nn;
308 int i;
309
310 local_bh_disable();
311 for (i = 0; i < init_net.ct.htable_size; i++) {
312 spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
313 if (i < init_net.ct.htable_size) {
314 hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
315 untimeout(h, timeout);
316 }
317 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
318 }
319 local_bh_enable();
320}
321
294/* try to delete object, fail if it is still in use. */ 322/* try to delete object, fail if it is still in use. */
295static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) 323static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
296{ 324{
@@ -301,6 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
301 /* We are protected by nfnl mutex. */ 329 /* We are protected by nfnl mutex. */
302 list_del_rcu(&timeout->head); 330 list_del_rcu(&timeout->head);
303 nf_ct_l4proto_put(timeout->l4proto); 331 nf_ct_l4proto_put(timeout->l4proto);
332 ctnl_untimeout(timeout);
304 kfree_rcu(timeout, rcu_head); 333 kfree_rcu(timeout, rcu_head);
305 } else { 334 } else {
306 /* still in use, restore reference counter. */ 335 /* still in use, restore reference counter. */
@@ -567,6 +596,10 @@ static void __exit cttimeout_exit(void)
567 pr_info("cttimeout: unregistering from nfnetlink.\n"); 596 pr_info("cttimeout: unregistering from nfnetlink.\n");
568 597
569 nfnetlink_subsys_unregister(&cttimeout_subsys); 598 nfnetlink_subsys_unregister(&cttimeout_subsys);
599
600 /* Make sure no conntrack objects refer to custom timeouts anymore. */
601 ctnl_untimeout(NULL);
602
570 list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { 603 list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
571 list_del_rcu(&cur->head); 604 list_del_rcu(&cur->head);
572 /* We are sure that our objects have no clients at this point, 605 /* We are sure that our objects have no clients at this point,
@@ -579,6 +612,7 @@ static void __exit cttimeout_exit(void)
579 RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); 612 RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
580 RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); 613 RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
581#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ 614#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
615 rcu_barrier();
582} 616}
583 617
584module_init(cttimeout_init); 618module_init(cttimeout_init);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 4670821b569d..740cce4685ac 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -27,6 +27,7 @@
27#include <net/netlink.h> 27#include <net/netlink.h>
28#include <linux/netfilter/nfnetlink.h> 28#include <linux/netfilter/nfnetlink.h>
29#include <linux/netfilter/nfnetlink_log.h> 29#include <linux/netfilter/nfnetlink_log.h>
30#include <linux/netfilter/nf_conntrack_common.h>
30#include <linux/spinlock.h> 31#include <linux/spinlock.h>
31#include <linux/sysctl.h> 32#include <linux/sysctl.h>
32#include <linux/proc_fs.h> 33#include <linux/proc_fs.h>
@@ -401,7 +402,9 @@ __build_packet_message(struct nfnl_log_net *log,
401 unsigned int hooknum, 402 unsigned int hooknum,
402 const struct net_device *indev, 403 const struct net_device *indev,
403 const struct net_device *outdev, 404 const struct net_device *outdev,
404 const char *prefix, unsigned int plen) 405 const char *prefix, unsigned int plen,
406 const struct nfnl_ct_hook *nfnl_ct,
407 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
405{ 408{
406 struct nfulnl_msg_packet_hdr pmsg; 409 struct nfulnl_msg_packet_hdr pmsg;
407 struct nlmsghdr *nlh; 410 struct nlmsghdr *nlh;
@@ -538,9 +541,9 @@ __build_packet_message(struct nfnl_log_net *log,
538 541
539 if (skb->tstamp.tv64) { 542 if (skb->tstamp.tv64) {
540 struct nfulnl_msg_packet_timestamp ts; 543 struct nfulnl_msg_packet_timestamp ts;
541 struct timeval tv = ktime_to_timeval(skb->tstamp); 544 struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
542 ts.sec = cpu_to_be64(tv.tv_sec); 545 ts.sec = cpu_to_be64(kts.tv_sec);
543 ts.usec = cpu_to_be64(tv.tv_usec); 546 ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
544 547
545 if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts)) 548 if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
546 goto nla_put_failure; 549 goto nla_put_failure;
@@ -575,6 +578,10 @@ __build_packet_message(struct nfnl_log_net *log,
575 htonl(atomic_inc_return(&log->global_seq)))) 578 htonl(atomic_inc_return(&log->global_seq))))
576 goto nla_put_failure; 579 goto nla_put_failure;
577 580
581 if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
582 NFULA_CT, NFULA_CT_INFO) < 0)
583 goto nla_put_failure;
584
578 if (data_len) { 585 if (data_len) {
579 struct nlattr *nla; 586 struct nlattr *nla;
580 int size = nla_attr_size(data_len); 587 int size = nla_attr_size(data_len);
@@ -620,12 +627,16 @@ nfulnl_log_packet(struct net *net,
620 const struct nf_loginfo *li_user, 627 const struct nf_loginfo *li_user,
621 const char *prefix) 628 const char *prefix)
622{ 629{
623 unsigned int size, data_len; 630 size_t size;
631 unsigned int data_len;
624 struct nfulnl_instance *inst; 632 struct nfulnl_instance *inst;
625 const struct nf_loginfo *li; 633 const struct nf_loginfo *li;
626 unsigned int qthreshold; 634 unsigned int qthreshold;
627 unsigned int plen; 635 unsigned int plen;
628 struct nfnl_log_net *log = nfnl_log_pernet(net); 636 struct nfnl_log_net *log = nfnl_log_pernet(net);
637 const struct nfnl_ct_hook *nfnl_ct = NULL;
638 struct nf_conn *ct = NULL;
639 enum ip_conntrack_info uninitialized_var(ctinfo);
629 640
630 if (li_user && li_user->type == NF_LOG_TYPE_ULOG) 641 if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
631 li = li_user; 642 li = li_user;
@@ -671,6 +682,14 @@ nfulnl_log_packet(struct net *net,
671 size += nla_total_size(sizeof(u_int32_t)); 682 size += nla_total_size(sizeof(u_int32_t));
672 if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) 683 if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
673 size += nla_total_size(sizeof(u_int32_t)); 684 size += nla_total_size(sizeof(u_int32_t));
685 if (inst->flags & NFULNL_CFG_F_CONNTRACK) {
686 nfnl_ct = rcu_dereference(nfnl_ct_hook);
687 if (nfnl_ct != NULL) {
688 ct = nfnl_ct->get_ct(skb, &ctinfo);
689 if (ct != NULL)
690 size += nfnl_ct->build_size(ct);
691 }
692 }
674 693
675 qthreshold = inst->qthreshold; 694 qthreshold = inst->qthreshold;
676 /* per-rule qthreshold overrides per-instance */ 695 /* per-rule qthreshold overrides per-instance */
@@ -715,7 +734,8 @@ nfulnl_log_packet(struct net *net,
715 inst->qlen++; 734 inst->qlen++;
716 735
717 __build_packet_message(log, inst, skb, data_len, pf, 736 __build_packet_message(log, inst, skb, data_len, pf,
718 hooknum, in, out, prefix, plen); 737 hooknum, in, out, prefix, plen,
738 nfnl_ct, ct, ctinfo);
719 739
720 if (inst->qlen >= qthreshold) 740 if (inst->qlen >= qthreshold)
721 __nfulnl_flush(inst); 741 __nfulnl_flush(inst);
@@ -805,6 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
805 struct net *net = sock_net(ctnl); 825 struct net *net = sock_net(ctnl);
806 struct nfnl_log_net *log = nfnl_log_pernet(net); 826 struct nfnl_log_net *log = nfnl_log_pernet(net);
807 int ret = 0; 827 int ret = 0;
828 u16 flags = 0;
808 829
809 if (nfula[NFULA_CFG_CMD]) { 830 if (nfula[NFULA_CFG_CMD]) {
810 u_int8_t pf = nfmsg->nfgen_family; 831 u_int8_t pf = nfmsg->nfgen_family;
@@ -826,6 +847,28 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
826 goto out_put; 847 goto out_put;
827 } 848 }
828 849
850 /* Check if we support these flags in first place, dependencies should
851 * be there too not to break atomicity.
852 */
853 if (nfula[NFULA_CFG_FLAGS]) {
854 flags = ntohs(nla_get_be16(nfula[NFULA_CFG_FLAGS]));
855
856 if ((flags & NFULNL_CFG_F_CONNTRACK) &&
857 !rcu_access_pointer(nfnl_ct_hook)) {
858#ifdef CONFIG_MODULES
859 nfnl_unlock(NFNL_SUBSYS_ULOG);
860 request_module("ip_conntrack_netlink");
861 nfnl_lock(NFNL_SUBSYS_ULOG);
862 if (rcu_access_pointer(nfnl_ct_hook)) {
863 ret = -EAGAIN;
864 goto out_put;
865 }
866#endif
867 ret = -EOPNOTSUPP;
868 goto out_put;
869 }
870 }
871
829 if (cmd != NULL) { 872 if (cmd != NULL) {
830 switch (cmd->command) { 873 switch (cmd->command) {
831 case NFULNL_CFG_CMD_BIND: 874 case NFULNL_CFG_CMD_BIND:
@@ -854,16 +897,15 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
854 ret = -ENOTSUPP; 897 ret = -ENOTSUPP;
855 break; 898 break;
856 } 899 }
900 } else if (!inst) {
901 ret = -ENODEV;
902 goto out;
857 } 903 }
858 904
859 if (nfula[NFULA_CFG_MODE]) { 905 if (nfula[NFULA_CFG_MODE]) {
860 struct nfulnl_msg_config_mode *params; 906 struct nfulnl_msg_config_mode *params =
861 params = nla_data(nfula[NFULA_CFG_MODE]); 907 nla_data(nfula[NFULA_CFG_MODE]);
862 908
863 if (!inst) {
864 ret = -ENODEV;
865 goto out;
866 }
867 nfulnl_set_mode(inst, params->copy_mode, 909 nfulnl_set_mode(inst, params->copy_mode,
868 ntohl(params->copy_range)); 910 ntohl(params->copy_range));
869 } 911 }
@@ -871,42 +913,23 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
871 if (nfula[NFULA_CFG_TIMEOUT]) { 913 if (nfula[NFULA_CFG_TIMEOUT]) {
872 __be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]); 914 __be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
873 915
874 if (!inst) {
875 ret = -ENODEV;
876 goto out;
877 }
878 nfulnl_set_timeout(inst, ntohl(timeout)); 916 nfulnl_set_timeout(inst, ntohl(timeout));
879 } 917 }
880 918
881 if (nfula[NFULA_CFG_NLBUFSIZ]) { 919 if (nfula[NFULA_CFG_NLBUFSIZ]) {
882 __be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]); 920 __be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
883 921
884 if (!inst) {
885 ret = -ENODEV;
886 goto out;
887 }
888 nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); 922 nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
889 } 923 }
890 924
891 if (nfula[NFULA_CFG_QTHRESH]) { 925 if (nfula[NFULA_CFG_QTHRESH]) {
892 __be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]); 926 __be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
893 927
894 if (!inst) {
895 ret = -ENODEV;
896 goto out;
897 }
898 nfulnl_set_qthresh(inst, ntohl(qthresh)); 928 nfulnl_set_qthresh(inst, ntohl(qthresh));
899 } 929 }
900 930
901 if (nfula[NFULA_CFG_FLAGS]) { 931 if (nfula[NFULA_CFG_FLAGS])
902 __be16 flags = nla_get_be16(nfula[NFULA_CFG_FLAGS]); 932 nfulnl_set_flags(inst, flags);
903
904 if (!inst) {
905 ret = -ENODEV;
906 goto out;
907 }
908 nfulnl_set_flags(inst, ntohs(flags));
909 }
910 933
911out_put: 934out_put:
912 instance_put(inst); 935 instance_put(inst);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue.c
index a5cd6d90b78b..7d81d280cb4f 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -28,12 +28,12 @@
28#include <linux/netfilter_bridge.h> 28#include <linux/netfilter_bridge.h>
29#include <linux/netfilter/nfnetlink.h> 29#include <linux/netfilter/nfnetlink.h>
30#include <linux/netfilter/nfnetlink_queue.h> 30#include <linux/netfilter/nfnetlink_queue.h>
31#include <linux/netfilter/nf_conntrack_common.h>
31#include <linux/list.h> 32#include <linux/list.h>
32#include <net/sock.h> 33#include <net/sock.h>
33#include <net/tcp_states.h> 34#include <net/tcp_states.h>
34#include <net/netfilter/nf_queue.h> 35#include <net/netfilter/nf_queue.h>
35#include <net/netns/generic.h> 36#include <net/netns/generic.h>
36#include <net/netfilter/nfnetlink_queue.h>
37 37
38#include <linux/atomic.h> 38#include <linux/atomic.h>
39 39
@@ -313,6 +313,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
313 struct net_device *outdev; 313 struct net_device *outdev;
314 struct nf_conn *ct = NULL; 314 struct nf_conn *ct = NULL;
315 enum ip_conntrack_info uninitialized_var(ctinfo); 315 enum ip_conntrack_info uninitialized_var(ctinfo);
316 struct nfnl_ct_hook *nfnl_ct;
316 bool csum_verify; 317 bool csum_verify;
317 char *secdata = NULL; 318 char *secdata = NULL;
318 u32 seclen = 0; 319 u32 seclen = 0;
@@ -364,8 +365,14 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
364 break; 365 break;
365 } 366 }
366 367
367 if (queue->flags & NFQA_CFG_F_CONNTRACK) 368 if (queue->flags & NFQA_CFG_F_CONNTRACK) {
368 ct = nfqnl_ct_get(entskb, &size, &ctinfo); 369 nfnl_ct = rcu_dereference(nfnl_ct_hook);
370 if (nfnl_ct != NULL) {
371 ct = nfnl_ct->get_ct(entskb, &ctinfo);
372 if (ct != NULL)
373 size += nfnl_ct->build_size(ct);
374 }
375 }
369 376
370 if (queue->flags & NFQA_CFG_F_UID_GID) { 377 if (queue->flags & NFQA_CFG_F_UID_GID) {
371 size += (nla_total_size(sizeof(u_int32_t)) /* uid */ 378 size += (nla_total_size(sizeof(u_int32_t)) /* uid */
@@ -493,9 +500,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
493 500
494 if (entskb->tstamp.tv64) { 501 if (entskb->tstamp.tv64) {
495 struct nfqnl_msg_packet_timestamp ts; 502 struct nfqnl_msg_packet_timestamp ts;
496 struct timeval tv = ktime_to_timeval(entskb->tstamp); 503 struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
497 ts.sec = cpu_to_be64(tv.tv_sec); 504
498 ts.usec = cpu_to_be64(tv.tv_usec); 505 ts.sec = cpu_to_be64(kts.tv_sec);
506 ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
499 507
500 if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts)) 508 if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
501 goto nla_put_failure; 509 goto nla_put_failure;
@@ -508,7 +516,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
508 if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata)) 516 if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
509 goto nla_put_failure; 517 goto nla_put_failure;
510 518
511 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) 519 if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
512 goto nla_put_failure; 520 goto nla_put_failure;
513 521
514 if (cap_len > data_len && 522 if (cap_len > data_len &&
@@ -598,12 +606,9 @@ static struct nf_queue_entry *
598nf_queue_entry_dup(struct nf_queue_entry *e) 606nf_queue_entry_dup(struct nf_queue_entry *e)
599{ 607{
600 struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); 608 struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
601 if (entry) { 609 if (entry)
602 if (nf_queue_entry_get_refs(entry)) 610 nf_queue_entry_get_refs(entry);
603 return entry; 611 return entry;
604 kfree(entry);
605 }
606 return NULL;
607} 612}
608 613
609#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 614#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
@@ -670,8 +675,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
670 struct nfqnl_instance *queue; 675 struct nfqnl_instance *queue;
671 struct sk_buff *skb, *segs; 676 struct sk_buff *skb, *segs;
672 int err = -ENOBUFS; 677 int err = -ENOBUFS;
673 struct net *net = dev_net(entry->state.in ? 678 struct net *net = entry->state.net;
674 entry->state.in : entry->state.out);
675 struct nfnl_queue_net *q = nfnl_queue_pernet(net); 679 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
676 680
677 /* rcu_read_lock()ed by nf_hook_slow() */ 681 /* rcu_read_lock()ed by nf_hook_slow() */
@@ -699,7 +703,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
699 nf_bridge_adjust_skb_data(skb); 703 nf_bridge_adjust_skb_data(skb);
700 segs = skb_gso_segment(skb, 0); 704 segs = skb_gso_segment(skb, 0);
701 /* Does not use PTR_ERR to limit the number of error codes that can be 705 /* Does not use PTR_ERR to limit the number of error codes that can be
702 * returned by nf_queue. For instance, callers rely on -ECANCELED to 706 * returned by nf_queue. For instance, callers rely on -ESRCH to
703 * mean 'ignore this hook'. 707 * mean 'ignore this hook'.
704 */ 708 */
705 if (IS_ERR_OR_NULL(segs)) 709 if (IS_ERR_OR_NULL(segs))
@@ -1002,6 +1006,28 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
1002 return 0; 1006 return 0;
1003} 1007}
1004 1008
1009static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
1010 const struct nlmsghdr *nlh,
1011 const struct nlattr * const nfqa[],
1012 struct nf_queue_entry *entry,
1013 enum ip_conntrack_info *ctinfo)
1014{
1015 struct nf_conn *ct;
1016
1017 ct = nfnl_ct->get_ct(entry->skb, ctinfo);
1018 if (ct == NULL)
1019 return NULL;
1020
1021 if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
1022 return NULL;
1023
1024 if (nfqa[NFQA_EXP])
1025 nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
1026 NETLINK_CB(entry->skb).portid,
1027 nlmsg_report(nlh));
1028 return ct;
1029}
1030
1005static int 1031static int
1006nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, 1032nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
1007 const struct nlmsghdr *nlh, 1033 const struct nlmsghdr *nlh,
@@ -1015,6 +1041,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
1015 unsigned int verdict; 1041 unsigned int verdict;
1016 struct nf_queue_entry *entry; 1042 struct nf_queue_entry *entry;
1017 enum ip_conntrack_info uninitialized_var(ctinfo); 1043 enum ip_conntrack_info uninitialized_var(ctinfo);
1044 struct nfnl_ct_hook *nfnl_ct;
1018 struct nf_conn *ct = NULL; 1045 struct nf_conn *ct = NULL;
1019 1046
1020 struct net *net = sock_net(ctnl); 1047 struct net *net = sock_net(ctnl);
@@ -1038,12 +1065,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
1038 return -ENOENT; 1065 return -ENOENT;
1039 1066
1040 if (nfqa[NFQA_CT]) { 1067 if (nfqa[NFQA_CT]) {
1041 ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo); 1068 /* rcu lock already held from nfnl->call_rcu. */
1042 if (ct && nfqa[NFQA_EXP]) { 1069 nfnl_ct = rcu_dereference(nfnl_ct_hook);
1043 nfqnl_attach_expect(ct, nfqa[NFQA_EXP], 1070 if (nfnl_ct != NULL)
1044 NETLINK_CB(skb).portid, 1071 ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
1045 nlmsg_report(nlh));
1046 }
1047 } 1072 }
1048 1073
1049 if (nfqa[NFQA_PAYLOAD]) { 1074 if (nfqa[NFQA_PAYLOAD]) {
@@ -1054,8 +1079,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
1054 payload_len, entry, diff) < 0) 1079 payload_len, entry, diff) < 0)
1055 verdict = NF_DROP; 1080 verdict = NF_DROP;
1056 1081
1057 if (ct) 1082 if (ct && diff)
1058 nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff); 1083 nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
1059 } 1084 }
1060 1085
1061 if (nfqa[NFQA_MARK]) 1086 if (nfqa[NFQA_MARK])
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
deleted file mode 100644
index 96cac50e0d12..000000000000
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ /dev/null
@@ -1,113 +0,0 @@
1/*
2 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 */
9
10#include <linux/skbuff.h>
11#include <linux/netfilter.h>
12#include <linux/netfilter/nfnetlink.h>
13#include <linux/netfilter/nfnetlink_queue.h>
14#include <net/netfilter/nf_conntrack.h>
15#include <net/netfilter/nfnetlink_queue.h>
16
17struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size,
18 enum ip_conntrack_info *ctinfo)
19{
20 struct nfq_ct_hook *nfq_ct;
21 struct nf_conn *ct;
22
23 /* rcu_read_lock()ed by __nf_queue already. */
24 nfq_ct = rcu_dereference(nfq_ct_hook);
25 if (nfq_ct == NULL)
26 return NULL;
27
28 ct = nf_ct_get(entskb, ctinfo);
29 if (ct) {
30 if (!nf_ct_is_untracked(ct))
31 *size += nfq_ct->build_size(ct);
32 else
33 ct = NULL;
34 }
35 return ct;
36}
37
38struct nf_conn *
39nfqnl_ct_parse(const struct sk_buff *skb, const struct nlattr *attr,
40 enum ip_conntrack_info *ctinfo)
41{
42 struct nfq_ct_hook *nfq_ct;
43 struct nf_conn *ct;
44
45 /* rcu_read_lock()ed by __nf_queue already. */
46 nfq_ct = rcu_dereference(nfq_ct_hook);
47 if (nfq_ct == NULL)
48 return NULL;
49
50 ct = nf_ct_get(skb, ctinfo);
51 if (ct && !nf_ct_is_untracked(ct))
52 nfq_ct->parse(attr, ct);
53
54 return ct;
55}
56
57int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct,
58 enum ip_conntrack_info ctinfo)
59{
60 struct nfq_ct_hook *nfq_ct;
61 struct nlattr *nest_parms;
62 u_int32_t tmp;
63
64 nfq_ct = rcu_dereference(nfq_ct_hook);
65 if (nfq_ct == NULL)
66 return 0;
67
68 nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED);
69 if (!nest_parms)
70 goto nla_put_failure;
71
72 if (nfq_ct->build(skb, ct) < 0)
73 goto nla_put_failure;
74
75 nla_nest_end(skb, nest_parms);
76
77 tmp = ctinfo;
78 if (nla_put_be32(skb, NFQA_CT_INFO, htonl(tmp)))
79 goto nla_put_failure;
80
81 return 0;
82
83nla_put_failure:
84 return -1;
85}
86
87void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
88 enum ip_conntrack_info ctinfo, int diff)
89{
90 struct nfq_ct_hook *nfq_ct;
91
92 nfq_ct = rcu_dereference(nfq_ct_hook);
93 if (nfq_ct == NULL)
94 return;
95
96 if ((ct->status & IPS_NAT_MASK) && diff)
97 nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
98}
99
100int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
101 u32 portid, u32 report)
102{
103 struct nfq_ct_hook *nfq_ct;
104
105 if (nf_ct_is_untracked(ct))
106 return 0;
107
108 nfq_ct = rcu_dereference(nfq_ct_hook);
109 if (nfq_ct == NULL)
110 return -EOPNOTSUPP;
111
112 return nfq_ct->attach_expect(attr, ct, portid, report);
113}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1ffa..c7808fc19719 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
47 local_bh_enable(); 47 local_bh_enable();
48} 48}
49 49
50static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) 50static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
51 struct nft_counter *total)
51{ 52{
52 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); 53 const struct nft_counter_percpu *cpu_stats;
53 struct nft_counter_percpu *cpu_stats;
54 struct nft_counter total;
55 u64 bytes, packets; 54 u64 bytes, packets;
56 unsigned int seq; 55 unsigned int seq;
57 int cpu; 56 int cpu;
58 57
59 memset(&total, 0, sizeof(total)); 58 memset(total, 0, sizeof(*total));
60 for_each_possible_cpu(cpu) { 59 for_each_possible_cpu(cpu) {
61 cpu_stats = per_cpu_ptr(priv->counter, cpu); 60 cpu_stats = per_cpu_ptr(counter, cpu);
62 do { 61 do {
63 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); 62 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
64 bytes = cpu_stats->counter.bytes; 63 bytes = cpu_stats->counter.bytes;
65 packets = cpu_stats->counter.packets; 64 packets = cpu_stats->counter.packets;
66 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); 65 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
67 66
68 total.packets += packets; 67 total->packets += packets;
69 total.bytes += bytes; 68 total->bytes += bytes;
70 } 69 }
70}
71
72static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
73{
74 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
75 struct nft_counter total;
76
77 nft_counter_fetch(priv->counter, &total);
71 78
72 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || 79 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
73 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) 80 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
118 free_percpu(priv->counter); 125 free_percpu(priv->counter);
119} 126}
120 127
128static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
129{
130 struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
131 struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
132 struct nft_counter_percpu __percpu *cpu_stats;
133 struct nft_counter_percpu *this_cpu;
134 struct nft_counter total;
135
136 nft_counter_fetch(priv->counter, &total);
137
138 cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
139 GFP_ATOMIC);
140 if (cpu_stats == NULL)
141 return ENOMEM;
142
143 preempt_disable();
144 this_cpu = this_cpu_ptr(cpu_stats);
145 this_cpu->counter.packets = total.packets;
146 this_cpu->counter.bytes = total.bytes;
147 preempt_enable();
148
149 priv_clone->counter = cpu_stats;
150 return 0;
151}
152
121static struct nft_expr_type nft_counter_type; 153static struct nft_expr_type nft_counter_type;
122static const struct nft_expr_ops nft_counter_ops = { 154static const struct nft_expr_ops nft_counter_ops = {
123 .type = &nft_counter_type, 155 .type = &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
126 .init = nft_counter_init, 158 .init = nft_counter_init,
127 .destroy = nft_counter_destroy, 159 .destroy = nft_counter_destroy,
128 .dump = nft_counter_dump, 160 .dump = nft_counter_dump,
161 .clone = nft_counter_clone,
129}; 162};
130 163
131static struct nft_expr_type nft_counter_type __read_mostly = { 164static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60a59..9dec3bd1b63c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
50 } 50 }
51 51
52 ext = nft_set_elem_ext(set, elem); 52 ext = nft_set_elem_ext(set, elem);
53 if (priv->expr != NULL) 53 if (priv->expr != NULL &&
54 nft_expr_clone(nft_set_ext_expr(ext), priv->expr); 54 nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
55 return NULL;
55 56
56 return elem; 57 return elem;
57} 58}
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index a13d6a386d63..319c22b4bca2 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -31,9 +31,8 @@ static void nft_log_eval(const struct nft_expr *expr,
31 const struct nft_pktinfo *pkt) 31 const struct nft_pktinfo *pkt)
32{ 32{
33 const struct nft_log *priv = nft_expr_priv(expr); 33 const struct nft_log *priv = nft_expr_priv(expr);
34 struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
35 34
36 nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in, 35 nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
37 pkt->out, &priv->loginfo, "%s", priv->prefix); 36 pkt->out, &priv->loginfo, "%s", priv->prefix);
38} 37}
39 38
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index cb2f13ebb5a6..9dfaf4d55ee0 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -31,6 +31,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
31 const struct nft_meta *priv = nft_expr_priv(expr); 31 const struct nft_meta *priv = nft_expr_priv(expr);
32 const struct sk_buff *skb = pkt->skb; 32 const struct sk_buff *skb = pkt->skb;
33 const struct net_device *in = pkt->in, *out = pkt->out; 33 const struct net_device *in = pkt->in, *out = pkt->out;
34 struct sock *sk;
34 u32 *dest = &regs->data[priv->dreg]; 35 u32 *dest = &regs->data[priv->dreg];
35 36
36 switch (priv->key) { 37 switch (priv->key) {
@@ -42,7 +43,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
42 *(__be16 *)dest = skb->protocol; 43 *(__be16 *)dest = skb->protocol;
43 break; 44 break;
44 case NFT_META_NFPROTO: 45 case NFT_META_NFPROTO:
45 *dest = pkt->ops->pf; 46 *dest = pkt->pf;
46 break; 47 break;
47 case NFT_META_L4PROTO: 48 case NFT_META_L4PROTO:
48 *dest = pkt->tprot; 49 *dest = pkt->tprot;
@@ -86,33 +87,35 @@ void nft_meta_get_eval(const struct nft_expr *expr,
86 *(u16 *)dest = out->type; 87 *(u16 *)dest = out->type;
87 break; 88 break;
88 case NFT_META_SKUID: 89 case NFT_META_SKUID:
89 if (skb->sk == NULL || !sk_fullsock(skb->sk)) 90 sk = skb_to_full_sk(skb);
91 if (!sk || !sk_fullsock(sk))
90 goto err; 92 goto err;
91 93
92 read_lock_bh(&skb->sk->sk_callback_lock); 94 read_lock_bh(&sk->sk_callback_lock);
93 if (skb->sk->sk_socket == NULL || 95 if (sk->sk_socket == NULL ||
94 skb->sk->sk_socket->file == NULL) { 96 sk->sk_socket->file == NULL) {
95 read_unlock_bh(&skb->sk->sk_callback_lock); 97 read_unlock_bh(&sk->sk_callback_lock);
96 goto err; 98 goto err;
97 } 99 }
98 100
99 *dest = from_kuid_munged(&init_user_ns, 101 *dest = from_kuid_munged(&init_user_ns,
100 skb->sk->sk_socket->file->f_cred->fsuid); 102 sk->sk_socket->file->f_cred->fsuid);
101 read_unlock_bh(&skb->sk->sk_callback_lock); 103 read_unlock_bh(&sk->sk_callback_lock);
102 break; 104 break;
103 case NFT_META_SKGID: 105 case NFT_META_SKGID:
104 if (skb->sk == NULL || !sk_fullsock(skb->sk)) 106 sk = skb_to_full_sk(skb);
107 if (!sk || !sk_fullsock(sk))
105 goto err; 108 goto err;
106 109
107 read_lock_bh(&skb->sk->sk_callback_lock); 110 read_lock_bh(&sk->sk_callback_lock);
108 if (skb->sk->sk_socket == NULL || 111 if (sk->sk_socket == NULL ||
109 skb->sk->sk_socket->file == NULL) { 112 sk->sk_socket->file == NULL) {
110 read_unlock_bh(&skb->sk->sk_callback_lock); 113 read_unlock_bh(&sk->sk_callback_lock);
111 goto err; 114 goto err;
112 } 115 }
113 *dest = from_kgid_munged(&init_user_ns, 116 *dest = from_kgid_munged(&init_user_ns,
114 skb->sk->sk_socket->file->f_cred->fsgid); 117 sk->sk_socket->file->f_cred->fsgid);
115 read_unlock_bh(&skb->sk->sk_callback_lock); 118 read_unlock_bh(&sk->sk_callback_lock);
116 break; 119 break;
117#ifdef CONFIG_IP_ROUTE_CLASSID 120#ifdef CONFIG_IP_ROUTE_CLASSID
118 case NFT_META_RTCLASSID: { 121 case NFT_META_RTCLASSID: {
@@ -135,7 +138,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
135 break; 138 break;
136 } 139 }
137 140
138 switch (pkt->ops->pf) { 141 switch (pkt->pf) {
139 case NFPROTO_IPV4: 142 case NFPROTO_IPV4:
140 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) 143 if (ipv4_is_multicast(ip_hdr(skb)->daddr))
141 *dest = PACKET_MULTICAST; 144 *dest = PACKET_MULTICAST;
@@ -168,9 +171,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
168 break; 171 break;
169#ifdef CONFIG_CGROUP_NET_CLASSID 172#ifdef CONFIG_CGROUP_NET_CLASSID
170 case NFT_META_CGROUP: 173 case NFT_META_CGROUP:
171 if (skb->sk == NULL || !sk_fullsock(skb->sk)) 174 sk = skb_to_full_sk(skb);
175 if (!sk || !sk_fullsock(sk))
172 goto err; 176 goto err;
173 *dest = skb->sk->sk_classid; 177 *dest = sk->sk_classid;
174 break; 178 break;
175#endif 179#endif
176 default: 180 default:
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 96805d21d618..61d216eb7917 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -42,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
42 queue = priv->queuenum + cpu % priv->queues_total; 42 queue = priv->queuenum + cpu % priv->queues_total;
43 } else { 43 } else {
44 queue = nfqueue_hash(pkt->skb, queue, 44 queue = nfqueue_hash(pkt->skb, queue,
45 priv->queues_total, pkt->ops->pf, 45 priv->queues_total, pkt->pf,
46 jhash_initval); 46 jhash_initval);
47 } 47 }
48 } 48 }
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 635dbba93d01..759ca5248a3d 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -22,38 +22,37 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
22 const struct nft_pktinfo *pkt) 22 const struct nft_pktinfo *pkt)
23{ 23{
24 struct nft_reject *priv = nft_expr_priv(expr); 24 struct nft_reject *priv = nft_expr_priv(expr);
25 struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
26 25
27 switch (pkt->ops->pf) { 26 switch (pkt->pf) {
28 case NFPROTO_IPV4: 27 case NFPROTO_IPV4:
29 switch (priv->type) { 28 switch (priv->type) {
30 case NFT_REJECT_ICMP_UNREACH: 29 case NFT_REJECT_ICMP_UNREACH:
31 nf_send_unreach(pkt->skb, priv->icmp_code, 30 nf_send_unreach(pkt->skb, priv->icmp_code,
32 pkt->ops->hooknum); 31 pkt->hook);
33 break; 32 break;
34 case NFT_REJECT_TCP_RST: 33 case NFT_REJECT_TCP_RST:
35 nf_send_reset(pkt->skb, pkt->ops->hooknum); 34 nf_send_reset(pkt->net, pkt->skb, pkt->hook);
36 break; 35 break;
37 case NFT_REJECT_ICMPX_UNREACH: 36 case NFT_REJECT_ICMPX_UNREACH:
38 nf_send_unreach(pkt->skb, 37 nf_send_unreach(pkt->skb,
39 nft_reject_icmp_code(priv->icmp_code), 38 nft_reject_icmp_code(priv->icmp_code),
40 pkt->ops->hooknum); 39 pkt->hook);
41 break; 40 break;
42 } 41 }
43 break; 42 break;
44 case NFPROTO_IPV6: 43 case NFPROTO_IPV6:
45 switch (priv->type) { 44 switch (priv->type) {
46 case NFT_REJECT_ICMP_UNREACH: 45 case NFT_REJECT_ICMP_UNREACH:
47 nf_send_unreach6(net, pkt->skb, priv->icmp_code, 46 nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
48 pkt->ops->hooknum); 47 pkt->hook);
49 break; 48 break;
50 case NFT_REJECT_TCP_RST: 49 case NFT_REJECT_TCP_RST:
51 nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); 50 nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
52 break; 51 break;
53 case NFT_REJECT_ICMPX_UNREACH: 52 case NFT_REJECT_ICMPX_UNREACH:
54 nf_send_unreach6(net, pkt->skb, 53 nf_send_unreach6(pkt->net, pkt->skb,
55 nft_reject_icmpv6_code(priv->icmp_code), 54 nft_reject_icmpv6_code(priv->icmp_code),
56 pkt->ops->hooknum); 55 pkt->hook);
57 break; 56 break;
58 } 57 }
59 break; 58 break;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 9b42b5ea6dcd..d4aaad747ea9 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1193,7 +1193,6 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
1193 if (!(hook_mask & 1)) 1193 if (!(hook_mask & 1))
1194 continue; 1194 continue;
1195 ops[i].hook = fn; 1195 ops[i].hook = fn;
1196 ops[i].owner = table->me;
1197 ops[i].pf = table->af; 1196 ops[i].pf = table->af;
1198 ops[i].hooknum = hooknum; 1197 ops[i].hooknum = hooknum;
1199 ops[i].priority = table->priority; 1198 ops[i].priority = table->priority;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index faf32d888198..e7ac07e53b59 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -171,6 +171,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
171 if (timeout_ext == NULL) 171 if (timeout_ext == NULL)
172 ret = -ENOMEM; 172 ret = -ENOMEM;
173 173
174 rcu_read_unlock();
175 return ret;
176
174err_put_timeout: 177err_put_timeout:
175 __xt_ct_tg_timeout_put(timeout); 178 __xt_ct_tg_timeout_put(timeout);
176out: 179out:
@@ -318,8 +321,10 @@ static void xt_ct_destroy_timeout(struct nf_conn *ct)
318 321
319 if (timeout_put) { 322 if (timeout_put) {
320 timeout_ext = nf_ct_timeout_find(ct); 323 timeout_ext = nf_ct_timeout_find(ct);
321 if (timeout_ext) 324 if (timeout_ext) {
322 timeout_put(timeout_ext->timeout); 325 timeout_put(timeout_ext->timeout);
326 RCU_INIT_POINTER(timeout_ext->timeout, NULL);
327 }
323 } 328 }
324 rcu_read_unlock(); 329 rcu_read_unlock();
325#endif 330#endif
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index c13b79440ede..1763ab82bcd7 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -33,7 +33,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
33{ 33{
34 const struct xt_log_info *loginfo = par->targinfo; 34 const struct xt_log_info *loginfo = par->targinfo;
35 struct nf_loginfo li; 35 struct nf_loginfo li;
36 struct net *net = dev_net(par->in ? par->in : par->out); 36 struct net *net = par->net;
37 37
38 li.type = NF_LOG_TYPE_LOG; 38 li.type = NF_LOG_TYPE_LOG;
39 li.u.log.level = loginfo->level; 39 li.u.log.level = loginfo->level;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index fb7497c928a0..a1fa2c800cb9 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -26,7 +26,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
26{ 26{
27 const struct xt_nflog_info *info = par->targinfo; 27 const struct xt_nflog_info *info = par->targinfo;
28 struct nf_loginfo li; 28 struct nf_loginfo li;
29 struct net *net = dev_net(par->in ? par->in : par->out); 29 struct net *net = par->net;
30 30
31 li.type = NF_LOG_TYPE_ULOG; 31 li.type = NF_LOG_TYPE_ULOG;
32 li.u.ulog.copy_len = info->len; 32 li.u.ulog.copy_len = info->len;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8c02501a530f..b7c43def0dc6 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
108 return -1; 108 return -1;
109 109
110 if (info->mss == XT_TCPMSS_CLAMP_PMTU) { 110 if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
111 struct net *net = dev_net(par->in ? par->in : par->out); 111 struct net *net = par->net;
112 unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); 112 unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
113 113
114 if (dst_mtu(skb_dst(skb)) <= minlen) { 114 if (dst_mtu(skb_dst(skb)) <= minlen) {
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index fd980aa7715d..3eff7b67cdf2 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -31,8 +31,9 @@ static unsigned int
31tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) 31tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
32{ 32{
33 const struct xt_tee_tginfo *info = par->targinfo; 33 const struct xt_tee_tginfo *info = par->targinfo;
34 int oif = info->priv ? info->priv->oif : 0;
34 35
35 nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif); 36 nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
36 37
37 return XT_CONTINUE; 38 return XT_CONTINUE;
38} 39}
@@ -42,8 +43,9 @@ static unsigned int
42tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) 43tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
43{ 44{
44 const struct xt_tee_tginfo *info = par->targinfo; 45 const struct xt_tee_tginfo *info = par->targinfo;
46 int oif = info->priv ? info->priv->oif : 0;
45 47
46 nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif); 48 nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
47 49
48 return XT_CONTINUE; 50 return XT_CONTINUE;
49} 51}
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d0c96c5ae29a..3ab591e73ec0 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -250,8 +250,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
250 * no such listener is found, or NULL if the TCP header is incomplete. 250 * no such listener is found, or NULL if the TCP header is incomplete.
251 */ 251 */
252static struct sock * 252static struct sock *
253tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, 253tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
254 struct sock *sk) 254 __be32 laddr, __be16 lport, struct sock *sk)
255{ 255{
256 const struct iphdr *iph = ip_hdr(skb); 256 const struct iphdr *iph = ip_hdr(skb);
257 struct tcphdr _hdr, *hp; 257 struct tcphdr _hdr, *hp;
@@ -267,7 +267,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
267 * to a listener socket if there's one */ 267 * to a listener socket if there's one */
268 struct sock *sk2; 268 struct sock *sk2;
269 269
270 sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 270 sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
271 iph->saddr, laddr ? laddr : iph->daddr, 271 iph->saddr, laddr ? laddr : iph->daddr,
272 hp->source, lport ? lport : hp->dest, 272 hp->source, lport ? lport : hp->dest,
273 skb->dev, NFT_LOOKUP_LISTENER); 273 skb->dev, NFT_LOOKUP_LISTENER);
@@ -290,7 +290,7 @@ nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
290} 290}
291 291
292static unsigned int 292static unsigned int
293tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, 293tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
294 u_int32_t mark_mask, u_int32_t mark_value) 294 u_int32_t mark_mask, u_int32_t mark_value)
295{ 295{
296 const struct iphdr *iph = ip_hdr(skb); 296 const struct iphdr *iph = ip_hdr(skb);
@@ -305,7 +305,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
305 * addresses, this happens if the redirect already happened 305 * addresses, this happens if the redirect already happened
306 * and the current packet belongs to an already established 306 * and the current packet belongs to an already established
307 * connection */ 307 * connection */
308 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 308 sk = nf_tproxy_get_sock_v4(net, iph->protocol,
309 iph->saddr, iph->daddr, 309 iph->saddr, iph->daddr,
310 hp->source, hp->dest, 310 hp->source, hp->dest,
311 skb->dev, NFT_LOOKUP_ESTABLISHED); 311 skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -317,11 +317,11 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
317 /* UDP has no TCP_TIME_WAIT state, so we never enter here */ 317 /* UDP has no TCP_TIME_WAIT state, so we never enter here */
318 if (sk && sk->sk_state == TCP_TIME_WAIT) 318 if (sk && sk->sk_state == TCP_TIME_WAIT)
319 /* reopening a TIME_WAIT connection needs special handling */ 319 /* reopening a TIME_WAIT connection needs special handling */
320 sk = tproxy_handle_time_wait4(skb, laddr, lport, sk); 320 sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
321 else if (!sk) 321 else if (!sk)
322 /* no, there's no established connection, check if 322 /* no, there's no established connection, check if
323 * there's a listener on the redirected addr/port */ 323 * there's a listener on the redirected addr/port */
324 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 324 sk = nf_tproxy_get_sock_v4(net, iph->protocol,
325 iph->saddr, laddr, 325 iph->saddr, laddr,
326 hp->source, lport, 326 hp->source, lport,
327 skb->dev, NFT_LOOKUP_LISTENER); 327 skb->dev, NFT_LOOKUP_LISTENER);
@@ -351,7 +351,7 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
351{ 351{
352 const struct xt_tproxy_target_info *tgi = par->targinfo; 352 const struct xt_tproxy_target_info *tgi = par->targinfo;
353 353
354 return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); 354 return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
355} 355}
356 356
357static unsigned int 357static unsigned int
@@ -359,7 +359,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
359{ 359{
360 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; 360 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
361 361
362 return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); 362 return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
363} 363}
364 364
365#ifdef XT_TPROXY_HAVE_IPV6 365#ifdef XT_TPROXY_HAVE_IPV6
@@ -429,7 +429,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
429 * to a listener socket if there's one */ 429 * to a listener socket if there's one */
430 struct sock *sk2; 430 struct sock *sk2;
431 431
432 sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, 432 sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
433 &iph->saddr, 433 &iph->saddr,
434 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), 434 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
435 hp->source, 435 hp->source,
@@ -472,7 +472,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
472 * addresses, this happens if the redirect already happened 472 * addresses, this happens if the redirect already happened
473 * and the current packet belongs to an already established 473 * and the current packet belongs to an already established
474 * connection */ 474 * connection */
475 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, 475 sk = nf_tproxy_get_sock_v6(par->net, tproto,
476 &iph->saddr, &iph->daddr, 476 &iph->saddr, &iph->daddr,
477 hp->source, hp->dest, 477 hp->source, hp->dest,
478 par->in, NFT_LOOKUP_ESTABLISHED); 478 par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,7 +487,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
487 else if (!sk) 487 else if (!sk)
488 /* no there's no established connection, check if 488 /* no there's no established connection, check if
489 * there's a listener on the redirected addr/port */ 489 * there's a listener on the redirected addr/port */
490 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, 490 sk = nf_tproxy_get_sock_v6(par->net, tproto,
491 &iph->saddr, laddr, 491 &iph->saddr, laddr,
492 hp->source, lport, 492 hp->source, lport,
493 par->in, NFT_LOOKUP_LISTENER); 493 par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 5b4743cc0436..11d6091991a4 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
125static bool 125static bool
126addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) 126addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
127{ 127{
128 struct net *net = dev_net(par->in ? par->in : par->out); 128 struct net *net = par->net;
129 const struct xt_addrtype_info *info = par->matchinfo; 129 const struct xt_addrtype_info *info = par->matchinfo;
130 const struct iphdr *iph = ip_hdr(skb); 130 const struct iphdr *iph = ip_hdr(skb);
131 bool ret = true; 131 bool ret = true;
@@ -143,7 +143,7 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
143static bool 143static bool
144addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) 144addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
145{ 145{
146 struct net *net = dev_net(par->in ? par->in : par->out); 146 struct net *net = par->net;
147 const struct xt_addrtype_info_v1 *info = par->matchinfo; 147 const struct xt_addrtype_info_v1 *info = par->matchinfo;
148 const struct iphdr *iph; 148 const struct iphdr *iph;
149 const struct net_device *dev = NULL; 149 const struct net_device *dev = NULL;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 075d89d94d28..99bbc829868d 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -317,7 +317,7 @@ static int count_them(struct net *net,
317static bool 317static bool
318connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) 318connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
319{ 319{
320 struct net *net = dev_net(par->in ? par->in : par->out); 320 struct net *net = par->net;
321 const struct xt_connlimit_info *info = par->matchinfo; 321 const struct xt_connlimit_info *info = par->matchinfo;
322 union nf_inet_addr addr; 322 union nf_inet_addr addr;
323 struct nf_conntrack_tuple tuple; 323 struct nf_conntrack_tuple tuple;
@@ -332,7 +332,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
332 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 332 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
333 zone = nf_ct_zone(ct); 333 zone = nf_ct_zone(ct);
334 } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), 334 } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
335 par->family, &tuple)) { 335 par->family, net, &tuple)) {
336 goto hotdrop; 336 goto hotdrop;
337 } 337 }
338 338
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 8d47c3780fda..71a9d95e0a81 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -48,6 +48,7 @@ static bool
48ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) 48ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
49{ 49{
50 const struct xt_ipvs_mtinfo *data = par->matchinfo; 50 const struct xt_ipvs_mtinfo *data = par->matchinfo;
51 struct netns_ipvs *ipvs = net_ipvs(par->net);
51 /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ 52 /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
52 const u_int8_t family = par->family; 53 const u_int8_t family = par->family;
53 struct ip_vs_iphdr iph; 54 struct ip_vs_iphdr iph;
@@ -67,7 +68,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
67 goto out; 68 goto out;
68 } 69 }
69 70
70 ip_vs_fill_iph_skb(family, skb, &iph); 71 ip_vs_fill_iph_skb(family, skb, true, &iph);
71 72
72 if (data->bitmask & XT_IPVS_PROTO) 73 if (data->bitmask & XT_IPVS_PROTO)
73 if ((iph.protocol == data->l4proto) ^ 74 if ((iph.protocol == data->l4proto) ^
@@ -85,7 +86,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
85 /* 86 /*
86 * Check if the packet belongs to an existing entry 87 * Check if the packet belongs to an existing entry
87 */ 88 */
88 cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */); 89 cp = pp->conn_out_get(ipvs, family, skb, &iph);
89 if (unlikely(cp == NULL)) { 90 if (unlikely(cp == NULL)) {
90 match = false; 91 match = false;
91 goto out; 92 goto out;
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 0778855ea5e7..df8801e02a32 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -200,7 +200,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
200 unsigned char opts[MAX_IPOPTLEN]; 200 unsigned char opts[MAX_IPOPTLEN];
201 const struct xt_osf_finger *kf; 201 const struct xt_osf_finger *kf;
202 const struct xt_osf_user_finger *f; 202 const struct xt_osf_user_finger *f;
203 struct net *net = dev_net(p->in ? p->in : p->out); 203 struct net *net = p->net;
204 204
205 if (!info) 205 if (!info)
206 return false; 206 return false;
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index ca2e577ed8ac..1302b475abcb 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -14,6 +14,7 @@
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/file.h> 15#include <linux/file.h>
16#include <net/sock.h> 16#include <net/sock.h>
17#include <net/inet_sock.h>
17#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
18#include <linux/netfilter/xt_owner.h> 19#include <linux/netfilter/xt_owner.h>
19 20
@@ -33,8 +34,9 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
33{ 34{
34 const struct xt_owner_match_info *info = par->matchinfo; 35 const struct xt_owner_match_info *info = par->matchinfo;
35 const struct file *filp; 36 const struct file *filp;
37 struct sock *sk = skb_to_full_sk(skb);
36 38
37 if (skb->sk == NULL || skb->sk->sk_socket == NULL) 39 if (sk == NULL || sk->sk_socket == NULL)
38 return (info->match ^ info->invert) == 0; 40 return (info->match ^ info->invert) == 0;
39 else if (info->match & info->invert & XT_OWNER_SOCKET) 41 else if (info->match & info->invert & XT_OWNER_SOCKET)
40 /* 42 /*
@@ -43,7 +45,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
43 */ 45 */
44 return false; 46 return false;
45 47
46 filp = skb->sk->sk_socket->file; 48 filp = sk->sk_socket->file;
47 if (filp == NULL) 49 if (filp == NULL)
48 return ((info->match ^ info->invert) & 50 return ((info->match ^ info->invert) &
49 (XT_OWNER_UID | XT_OWNER_GID)) == 0; 51 (XT_OWNER_UID | XT_OWNER_GID)) == 0;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 45e1b30e4fb2..d725a27743a1 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -237,7 +237,7 @@ static void recent_table_flush(struct recent_table *t)
237static bool 237static bool
238recent_mt(const struct sk_buff *skb, struct xt_action_param *par) 238recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
239{ 239{
240 struct net *net = dev_net(par->in ? par->in : par->out); 240 struct net *net = par->net;
241 struct recent_net *recent_net = recent_pernet(net); 241 struct recent_net *recent_net = recent_pernet(net);
242 const struct xt_recent_mtinfo_v1 *info = par->matchinfo; 242 const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
243 struct recent_table *t; 243 struct recent_table *t;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 43e26c881100..2ec08f04b816 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -143,7 +143,8 @@ static bool xt_socket_sk_is_transparent(struct sock *sk)
143 } 143 }
144} 144}
145 145
146static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, 146static struct sock *xt_socket_lookup_slow_v4(struct net *net,
147 const struct sk_buff *skb,
147 const struct net_device *indev) 148 const struct net_device *indev)
148{ 149{
149 const struct iphdr *iph = ip_hdr(skb); 150 const struct iphdr *iph = ip_hdr(skb);
@@ -197,7 +198,7 @@ static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
197 } 198 }
198#endif 199#endif
199 200
200 return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, 201 return xt_socket_get_sock_v4(net, protocol, saddr, daddr,
201 sport, dport, indev); 202 sport, dport, indev);
202} 203}
203 204
@@ -209,7 +210,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
209 struct sock *sk = skb->sk; 210 struct sock *sk = skb->sk;
210 211
211 if (!sk) 212 if (!sk)
212 sk = xt_socket_lookup_slow_v4(skb, par->in); 213 sk = xt_socket_lookup_slow_v4(par->net, skb, par->in);
213 if (sk) { 214 if (sk) {
214 bool wildcard; 215 bool wildcard;
215 bool transparent = true; 216 bool transparent = true;
@@ -335,7 +336,8 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
335 return NULL; 336 return NULL;
336} 337}
337 338
338static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, 339static struct sock *xt_socket_lookup_slow_v6(struct net *net,
340 const struct sk_buff *skb,
339 const struct net_device *indev) 341 const struct net_device *indev)
340{ 342{
341 __be16 uninitialized_var(dport), uninitialized_var(sport); 343 __be16 uninitialized_var(dport), uninitialized_var(sport);
@@ -371,7 +373,7 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
371 return NULL; 373 return NULL;
372 } 374 }
373 375
374 return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, 376 return xt_socket_get_sock_v6(net, tproto, saddr, daddr,
375 sport, dport, indev); 377 sport, dport, indev);
376} 378}
377 379
@@ -383,7 +385,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
383 struct sock *sk = skb->sk; 385 struct sock *sk = skb->sk;
384 386
385 if (!sk) 387 if (!sk)
386 sk = xt_socket_lookup_slow_v6(skb, par->in); 388 sk = xt_socket_lookup_slow_v6(par->net, skb, par->in);
387 if (sk) { 389 if (sk) {
388 bool wildcard; 390 bool wildcard;
389 bool transparent = true; 391 bool transparent = true;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8f060d7f9a0e..59651af8cc27 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2116,7 +2116,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
2116 consume_skb(info.skb2); 2116 consume_skb(info.skb2);
2117 2117
2118 if (info.delivered) { 2118 if (info.delivered) {
2119 if (info.congested && (allocation & __GFP_WAIT)) 2119 if (info.congested && gfpflags_allow_blocking(allocation))
2120 yield(); 2120 yield();
2121 return 0; 2121 return 0;
2122 } 2122 }
@@ -2371,7 +2371,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
2371 int pos, idx, shift; 2371 int pos, idx, shift;
2372 2372
2373 err = 0; 2373 err = 0;
2374 netlink_table_grab(); 2374 netlink_lock_table();
2375 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { 2375 for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
2376 if (len - pos < sizeof(u32)) 2376 if (len - pos < sizeof(u32))
2377 break; 2377 break;
@@ -2386,7 +2386,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
2386 } 2386 }
2387 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) 2387 if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
2388 err = -EFAULT; 2388 err = -EFAULT;
2389 netlink_table_ungrab(); 2389 netlink_unlock_table();
2390 break; 2390 break;
2391 } 2391 }
2392 case NETLINK_CAP_ACK: 2392 case NETLINK_CAP_ACK:
@@ -2785,6 +2785,7 @@ static int netlink_dump(struct sock *sk)
2785 struct sk_buff *skb = NULL; 2785 struct sk_buff *skb = NULL;
2786 struct nlmsghdr *nlh; 2786 struct nlmsghdr *nlh;
2787 int len, err = -ENOBUFS; 2787 int len, err = -ENOBUFS;
2788 int alloc_min_size;
2788 int alloc_size; 2789 int alloc_size;
2789 2790
2790 mutex_lock(nlk->cb_mutex); 2791 mutex_lock(nlk->cb_mutex);
@@ -2793,9 +2794,6 @@ static int netlink_dump(struct sock *sk)
2793 goto errout_skb; 2794 goto errout_skb;
2794 } 2795 }
2795 2796
2796 cb = &nlk->cb;
2797 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2798
2799 if (!netlink_rx_is_mmaped(sk) && 2797 if (!netlink_rx_is_mmaped(sk) &&
2800 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 2798 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2801 goto errout_skb; 2799 goto errout_skb;
@@ -2805,23 +2803,35 @@ static int netlink_dump(struct sock *sk)
2805 * to reduce number of system calls on dump operations, if user 2803 * to reduce number of system calls on dump operations, if user
2806 * ever provided a big enough buffer. 2804 * ever provided a big enough buffer.
2807 */ 2805 */
2808 if (alloc_size < nlk->max_recvmsg_len) { 2806 cb = &nlk->cb;
2809 skb = netlink_alloc_skb(sk, 2807 alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2810 nlk->max_recvmsg_len, 2808
2811 nlk->portid, 2809 if (alloc_min_size < nlk->max_recvmsg_len) {
2810 alloc_size = nlk->max_recvmsg_len;
2811 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
2812 GFP_KERNEL | 2812 GFP_KERNEL |
2813 __GFP_NOWARN | 2813 __GFP_NOWARN |
2814 __GFP_NORETRY); 2814 __GFP_NORETRY);
2815 /* available room should be exact amount to avoid MSG_TRUNC */
2816 if (skb)
2817 skb_reserve(skb, skb_tailroom(skb) -
2818 nlk->max_recvmsg_len);
2819 } 2815 }
2820 if (!skb) 2816 if (!skb) {
2817 alloc_size = alloc_min_size;
2821 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, 2818 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
2822 GFP_KERNEL); 2819 GFP_KERNEL);
2820 }
2823 if (!skb) 2821 if (!skb)
2824 goto errout_skb; 2822 goto errout_skb;
2823
2824 /* Trim skb to allocated size. User is expected to provide buffer as
2825 * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
2826 * netlink_recvmsg())). dump will pack as many smaller messages as
2827 * could fit within the allocated skb. skb is typically allocated
2828 * with larger space than required (could be as much as near 2x the
2829 * requested size with align to next power of 2 approach). Allowing
2830 * dump to use the excess space makes it difficult for a user to have a
2831 * reasonable static buffer based on the expected largest dump of a
2832 * single netdev. The outcome is MSG_TRUNC error.
2833 */
2834 skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2825 netlink_skb_set_owner_r(skb, sk); 2835 netlink_skb_set_owner_r(skb, sk);
2826 2836
2827 len = cb->dump(skb, cb); 2837 len = cb->dump(skb, cb);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2ed5f964772e..bc0e504f33a6 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -39,7 +39,7 @@ void genl_unlock(void)
39EXPORT_SYMBOL(genl_unlock); 39EXPORT_SYMBOL(genl_unlock);
40 40
41#ifdef CONFIG_LOCKDEP 41#ifdef CONFIG_LOCKDEP
42int lockdep_genl_is_held(void) 42bool lockdep_genl_is_held(void)
43{ 43{
44 return lockdep_is_held(&genl_mutex); 44 return lockdep_is_held(&genl_mutex);
45} 45}
@@ -1136,19 +1136,19 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
1136} 1136}
1137EXPORT_SYMBOL(genlmsg_multicast_allns); 1137EXPORT_SYMBOL(genlmsg_multicast_allns);
1138 1138
1139void genl_notify(struct genl_family *family, 1139void genl_notify(struct genl_family *family, struct sk_buff *skb,
1140 struct sk_buff *skb, struct net *net, u32 portid, u32 group, 1140 struct genl_info *info, u32 group, gfp_t flags)
1141 struct nlmsghdr *nlh, gfp_t flags)
1142{ 1141{
1142 struct net *net = genl_info_net(info);
1143 struct sock *sk = net->genl_sock; 1143 struct sock *sk = net->genl_sock;
1144 int report = 0; 1144 int report = 0;
1145 1145
1146 if (nlh) 1146 if (info->nlhdr)
1147 report = nlmsg_report(nlh); 1147 report = nlmsg_report(info->nlhdr);
1148 1148
1149 if (WARN_ON_ONCE(group >= family->n_mcgrps)) 1149 if (WARN_ON_ONCE(group >= family->n_mcgrps))
1150 return; 1150 return;
1151 group = family->mcgrp_offset + group; 1151 group = family->mcgrp_offset + group;
1152 nlmsg_notify(sk, skb, portid, group, report, flags); 1152 nlmsg_notify(sk, skb, info->snd_portid, group, report, flags);
1153} 1153}
1154EXPORT_SYMBOL(genl_notify); 1154EXPORT_SYMBOL(genl_notify);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index cff3f1614ad4..1fe3d3b362c0 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -449,7 +449,7 @@ error:
449 * @dev: The nfc device that found the target 449 * @dev: The nfc device that found the target
450 * @target_idx: index of the target that must be deactivated 450 * @target_idx: index of the target that must be deactivated
451 */ 451 */
452int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx) 452int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
453{ 453{
454 int rc = 0; 454 int rc = 0;
455 455
@@ -476,7 +476,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx)
476 if (dev->ops->check_presence) 476 if (dev->ops->check_presence)
477 del_timer_sync(&dev->check_pres_timer); 477 del_timer_sync(&dev->check_pres_timer);
478 478
479 dev->ops->deactivate_target(dev, dev->active_target); 479 dev->ops->deactivate_target(dev, dev->active_target, mode);
480 dev->active_target = NULL; 480 dev->active_target = NULL;
481 481
482error: 482error:
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 009bcf317101..23c2a118ac9f 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -631,7 +631,8 @@ static int digital_activate_target(struct nfc_dev *nfc_dev,
631} 631}
632 632
633static void digital_deactivate_target(struct nfc_dev *nfc_dev, 633static void digital_deactivate_target(struct nfc_dev *nfc_dev,
634 struct nfc_target *target) 634 struct nfc_target *target,
635 u8 mode)
635{ 636{
636 struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); 637 struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev);
637 638
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 6e061da2258a..2b0f0ac498d2 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -678,7 +678,8 @@ static int hci_activate_target(struct nfc_dev *nfc_dev,
678} 678}
679 679
680static void hci_deactivate_target(struct nfc_dev *nfc_dev, 680static void hci_deactivate_target(struct nfc_dev *nfc_dev,
681 struct nfc_target *target) 681 struct nfc_target *target,
682 u8 mode)
682{ 683{
683} 684}
684 685
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 1b90c0531852..1399a03fa6e6 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -144,11 +144,13 @@ inline int nfc_llc_start(struct nfc_llc *llc)
144{ 144{
145 return llc->ops->start(llc); 145 return llc->ops->start(llc);
146} 146}
147EXPORT_SYMBOL(nfc_llc_start);
147 148
148inline int nfc_llc_stop(struct nfc_llc *llc) 149inline int nfc_llc_stop(struct nfc_llc *llc)
149{ 150{
150 return llc->ops->stop(llc); 151 return llc->ops->stop(llc);
151} 152}
153EXPORT_SYMBOL(nfc_llc_stop);
152 154
153inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb) 155inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
154{ 156{
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index 901c1ddba841..85d4819ab657 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -12,7 +12,7 @@ config NFC_NCI
12config NFC_NCI_SPI 12config NFC_NCI_SPI
13 depends on NFC_NCI && SPI 13 depends on NFC_NCI && SPI
14 select CRC_CCITT 14 select CRC_CCITT
15 bool "NCI over SPI protocol support" 15 tristate "NCI over SPI protocol support"
16 default n 16 default n
17 help 17 help
18 NCI (NFC Controller Interface) is a communication protocol between 18 NCI (NFC Controller Interface) is a communication protocol between
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index b4b85b82e988..0ca31d9bf741 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_NFC_NCI) += nci.o
6 6
7nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o 7nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
8 8
9nci-$(CONFIG_NFC_NCI_SPI) += spi.o 9nci_spi-y += spi.o
10obj-$(CONFIG_NFC_NCI_SPI) += nci_spi.o
10 11
11nci_uart-y += uart.o 12nci_uart-y += uart.o
12obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o 13obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 943889b87a34..10c99a578421 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -64,6 +64,19 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
64 return NULL; 64 return NULL;
65} 65}
66 66
67int nci_get_conn_info_by_id(struct nci_dev *ndev, u8 id)
68{
69 struct nci_conn_info *conn_info;
70
71 list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
72 if (conn_info->id == id)
73 return conn_info->conn_id;
74 }
75
76 return -EINVAL;
77}
78EXPORT_SYMBOL(nci_get_conn_info_by_id);
79
67/* ---- NCI requests ---- */ 80/* ---- NCI requests ---- */
68 81
69void nci_req_complete(struct nci_dev *ndev, int result) 82void nci_req_complete(struct nci_dev *ndev, int result)
@@ -325,32 +338,46 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
325 sizeof(struct nci_rf_deactivate_cmd), &cmd); 338 sizeof(struct nci_rf_deactivate_cmd), &cmd);
326} 339}
327 340
328struct nci_prop_cmd_param { 341struct nci_cmd_param {
329 __u16 opcode; 342 __u16 opcode;
330 size_t len; 343 size_t len;
331 __u8 *payload; 344 __u8 *payload;
332}; 345};
333 346
334static void nci_prop_cmd_req(struct nci_dev *ndev, unsigned long opt) 347static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
335{ 348{
336 struct nci_prop_cmd_param *param = (struct nci_prop_cmd_param *)opt; 349 struct nci_cmd_param *param =
350 (struct nci_cmd_param *)opt;
337 351
338 nci_send_cmd(ndev, param->opcode, param->len, param->payload); 352 nci_send_cmd(ndev, param->opcode, param->len, param->payload);
339} 353}
340 354
341int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload) 355int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
342{ 356{
343 struct nci_prop_cmd_param param; 357 struct nci_cmd_param param;
344 358
345 param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid); 359 param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid);
346 param.len = len; 360 param.len = len;
347 param.payload = payload; 361 param.payload = payload;
348 362
349 return __nci_request(ndev, nci_prop_cmd_req, (unsigned long)&param, 363 return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
350 msecs_to_jiffies(NCI_CMD_TIMEOUT)); 364 msecs_to_jiffies(NCI_CMD_TIMEOUT));
351} 365}
352EXPORT_SYMBOL(nci_prop_cmd); 366EXPORT_SYMBOL(nci_prop_cmd);
353 367
368int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
369{
370 struct nci_cmd_param param;
371
372 param.opcode = opcode;
373 param.len = len;
374 param.payload = payload;
375
376 return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
377 msecs_to_jiffies(NCI_CMD_TIMEOUT));
378}
379EXPORT_SYMBOL(nci_core_cmd);
380
354int nci_core_reset(struct nci_dev *ndev) 381int nci_core_reset(struct nci_dev *ndev)
355{ 382{
356 return __nci_request(ndev, nci_reset_req, 0, 383 return __nci_request(ndev, nci_reset_req, 0,
@@ -402,9 +429,8 @@ static int nci_open_device(struct nci_dev *ndev)
402 msecs_to_jiffies(NCI_INIT_TIMEOUT)); 429 msecs_to_jiffies(NCI_INIT_TIMEOUT));
403 } 430 }
404 431
405 if (ndev->ops->post_setup) { 432 if (!rc && ndev->ops->post_setup)
406 rc = ndev->ops->post_setup(ndev); 433 rc = ndev->ops->post_setup(ndev);
407 }
408 434
409 if (!rc) { 435 if (!rc) {
410 rc = __nci_request(ndev, nci_init_complete_req, 0, 436 rc = __nci_request(ndev, nci_init_complete_req, 0,
@@ -540,7 +566,7 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
540 566
541int nci_nfcee_discover(struct nci_dev *ndev, u8 action) 567int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
542{ 568{
543 return nci_request(ndev, nci_nfcee_discover_req, action, 569 return __nci_request(ndev, nci_nfcee_discover_req, action,
544 msecs_to_jiffies(NCI_CMD_TIMEOUT)); 570 msecs_to_jiffies(NCI_CMD_TIMEOUT));
545} 571}
546EXPORT_SYMBOL(nci_nfcee_discover); 572EXPORT_SYMBOL(nci_nfcee_discover);
@@ -561,8 +587,9 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
561 cmd.nfcee_id = nfcee_id; 587 cmd.nfcee_id = nfcee_id;
562 cmd.nfcee_mode = nfcee_mode; 588 cmd.nfcee_mode = nfcee_mode;
563 589
564 return nci_request(ndev, nci_nfcee_mode_set_req, (unsigned long)&cmd, 590 return __nci_request(ndev, nci_nfcee_mode_set_req,
565 msecs_to_jiffies(NCI_CMD_TIMEOUT)); 591 (unsigned long)&cmd,
592 msecs_to_jiffies(NCI_CMD_TIMEOUT));
566} 593}
567EXPORT_SYMBOL(nci_nfcee_mode_set); 594EXPORT_SYMBOL(nci_nfcee_mode_set);
568 595
@@ -588,12 +615,19 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
588 if (!cmd) 615 if (!cmd)
589 return -ENOMEM; 616 return -ENOMEM;
590 617
618 if (!number_destination_params)
619 return -EINVAL;
620
591 cmd->destination_type = destination_type; 621 cmd->destination_type = destination_type;
592 cmd->number_destination_params = number_destination_params; 622 cmd->number_destination_params = number_destination_params;
593 memcpy(cmd->params, params, params_len); 623 memcpy(cmd->params, params, params_len);
594 624
595 data.cmd = cmd; 625 data.cmd = cmd;
596 ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX]; 626
627 if (params->length > 0)
628 ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX];
629 else
630 ndev->cur_id = 0;
597 631
598 r = __nci_request(ndev, nci_core_conn_create_req, 632 r = __nci_request(ndev, nci_core_conn_create_req,
599 (unsigned long)&data, 633 (unsigned long)&data,
@@ -612,8 +646,8 @@ static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
612 646
613int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) 647int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
614{ 648{
615 return nci_request(ndev, nci_core_conn_close_req, conn_id, 649 return __nci_request(ndev, nci_core_conn_close_req, conn_id,
616 msecs_to_jiffies(NCI_CMD_TIMEOUT)); 650 msecs_to_jiffies(NCI_CMD_TIMEOUT));
617} 651}
618EXPORT_SYMBOL(nci_core_conn_close); 652EXPORT_SYMBOL(nci_core_conn_close);
619 653
@@ -801,9 +835,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
801} 835}
802 836
803static void nci_deactivate_target(struct nfc_dev *nfc_dev, 837static void nci_deactivate_target(struct nfc_dev *nfc_dev,
804 struct nfc_target *target) 838 struct nfc_target *target,
839 __u8 mode)
805{ 840{
806 struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); 841 struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
842 u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
807 843
808 pr_debug("entry\n"); 844 pr_debug("entry\n");
809 845
@@ -814,9 +850,14 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
814 850
815 ndev->target_active_prot = 0; 851 ndev->target_active_prot = 0;
816 852
853 switch (mode) {
854 case NFC_TARGET_MODE_SLEEP:
855 nci_mode = NCI_DEACTIVATE_TYPE_SLEEP_MODE;
856 break;
857 }
858
817 if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) { 859 if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
818 nci_request(ndev, nci_rf_deactivate_req, 860 nci_request(ndev, nci_rf_deactivate_req, nci_mode,
819 NCI_DEACTIVATE_TYPE_IDLE_MODE,
820 msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); 861 msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
821 } 862 }
822} 863}
@@ -850,7 +891,7 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
850 pr_debug("entry\n"); 891 pr_debug("entry\n");
851 892
852 if (nfc_dev->rf_mode == NFC_RF_INITIATOR) { 893 if (nfc_dev->rf_mode == NFC_RF_INITIATOR) {
853 nci_deactivate_target(nfc_dev, NULL); 894 nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE);
854 } else { 895 } else {
855 if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE || 896 if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
856 atomic_read(&ndev->state) == NCI_DISCOVERY) { 897 atomic_read(&ndev->state) == NCI_DISCOVERY) {
@@ -1177,7 +1218,7 @@ int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb)
1177} 1218}
1178EXPORT_SYMBOL(nci_recv_frame); 1219EXPORT_SYMBOL(nci_recv_frame);
1179 1220
1180static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) 1221int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
1181{ 1222{
1182 pr_debug("len %d\n", skb->len); 1223 pr_debug("len %d\n", skb->len);
1183 1224
@@ -1195,6 +1236,7 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
1195 1236
1196 return ndev->ops->send(ndev, skb); 1237 return ndev->ops->send(ndev, skb);
1197} 1238}
1239EXPORT_SYMBOL(nci_send_frame);
1198 1240
1199/* Send NCI command */ 1241/* Send NCI command */
1200int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) 1242int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
@@ -1226,48 +1268,80 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
1226 1268
1227 return 0; 1269 return 0;
1228} 1270}
1271EXPORT_SYMBOL(nci_send_cmd);
1229 1272
1230/* Proprietary commands API */ 1273/* Proprietary commands API */
1231static struct nci_prop_ops *prop_cmd_lookup(struct nci_dev *ndev, 1274static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
1232 __u16 opcode) 1275 size_t n_ops,
1276 __u16 opcode)
1233{ 1277{
1234 size_t i; 1278 size_t i;
1235 struct nci_prop_ops *prop_op; 1279 struct nci_driver_ops *op;
1236 1280
1237 if (!ndev->ops->prop_ops || !ndev->ops->n_prop_ops) 1281 if (!ops || !n_ops)
1238 return NULL; 1282 return NULL;
1239 1283
1240 for (i = 0; i < ndev->ops->n_prop_ops; i++) { 1284 for (i = 0; i < n_ops; i++) {
1241 prop_op = &ndev->ops->prop_ops[i]; 1285 op = &ops[i];
1242 if (prop_op->opcode == opcode) 1286 if (op->opcode == opcode)
1243 return prop_op; 1287 return op;
1244 } 1288 }
1245 1289
1246 return NULL; 1290 return NULL;
1247} 1291}
1248 1292
1249int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, 1293static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
1250 struct sk_buff *skb) 1294 struct sk_buff *skb, struct nci_driver_ops *ops,
1295 size_t n_ops)
1251{ 1296{
1252 struct nci_prop_ops *prop_op; 1297 struct nci_driver_ops *op;
1253 1298
1254 prop_op = prop_cmd_lookup(ndev, rsp_opcode); 1299 op = ops_cmd_lookup(ops, n_ops, rsp_opcode);
1255 if (!prop_op || !prop_op->rsp) 1300 if (!op || !op->rsp)
1256 return -ENOTSUPP; 1301 return -ENOTSUPP;
1257 1302
1258 return prop_op->rsp(ndev, skb); 1303 return op->rsp(ndev, skb);
1259} 1304}
1260 1305
1261int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, 1306static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
1262 struct sk_buff *skb) 1307 struct sk_buff *skb, struct nci_driver_ops *ops,
1308 size_t n_ops)
1263{ 1309{
1264 struct nci_prop_ops *prop_op; 1310 struct nci_driver_ops *op;
1265 1311
1266 prop_op = prop_cmd_lookup(ndev, ntf_opcode); 1312 op = ops_cmd_lookup(ops, n_ops, ntf_opcode);
1267 if (!prop_op || !prop_op->ntf) 1313 if (!op || !op->ntf)
1268 return -ENOTSUPP; 1314 return -ENOTSUPP;
1269 1315
1270 return prop_op->ntf(ndev, skb); 1316 return op->ntf(ndev, skb);
1317}
1318
1319int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode,
1320 struct sk_buff *skb)
1321{
1322 return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->prop_ops,
1323 ndev->ops->n_prop_ops);
1324}
1325
1326int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode,
1327 struct sk_buff *skb)
1328{
1329 return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->prop_ops,
1330 ndev->ops->n_prop_ops);
1331}
1332
1333int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode,
1334 struct sk_buff *skb)
1335{
1336 return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->core_ops,
1337 ndev->ops->n_core_ops);
1338}
1339
1340int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
1341 struct sk_buff *skb)
1342{
1343 return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->core_ops,
1344 ndev->ops->n_core_ops);
1271} 1345}
1272 1346
1273/* ---- NCI TX Data worker thread ---- */ 1347/* ---- NCI TX Data worker thread ---- */
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 566466d90048..dbd24254412a 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -90,6 +90,18 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
90 nci_pbf_set((__u8 *)hdr, pbf); 90 nci_pbf_set((__u8 *)hdr, pbf);
91} 91}
92 92
93int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id)
94{
95 struct nci_conn_info *conn_info;
96
97 conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
98 if (!conn_info)
99 return -EPROTO;
100
101 return conn_info->max_pkt_payload_len;
102}
103EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size);
104
93static int nci_queue_tx_data_frags(struct nci_dev *ndev, 105static int nci_queue_tx_data_frags(struct nci_dev *ndev,
94 __u8 conn_id, 106 __u8 conn_id,
95 struct sk_buff *skb) { 107 struct sk_buff *skb) {
@@ -203,6 +215,7 @@ free_exit:
203exit: 215exit:
204 return rc; 216 return rc;
205} 217}
218EXPORT_SYMBOL(nci_send_data);
206 219
207/* ----------------- NCI RX Data ----------------- */ 220/* ----------------- NCI RX Data ----------------- */
208 221
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 609f92283d1b..2aedac15cb59 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -70,6 +70,7 @@ struct nci_hcp_packet {
70#define NCI_HCI_ANY_SET_PARAMETER 0x01 70#define NCI_HCI_ANY_SET_PARAMETER 0x01
71#define NCI_HCI_ANY_GET_PARAMETER 0x02 71#define NCI_HCI_ANY_GET_PARAMETER 0x02
72#define NCI_HCI_ANY_CLOSE_PIPE 0x04 72#define NCI_HCI_ANY_CLOSE_PIPE 0x04
73#define NCI_HCI_ADM_CLEAR_ALL_PIPE 0x14
73 74
74#define NCI_HFP_NO_CHAINING 0x80 75#define NCI_HFP_NO_CHAINING 0x80
75 76
@@ -78,6 +79,8 @@ struct nci_hcp_packet {
78#define NCI_EVT_HOT_PLUG 0x03 79#define NCI_EVT_HOT_PLUG 0x03
79 80
80#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 81#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01
82#define NCI_HCI_ADM_CREATE_PIPE 0x10
83#define NCI_HCI_ADM_DELETE_PIPE 0x11
81 84
82/* HCP headers */ 85/* HCP headers */
83#define NCI_HCI_HCP_PACKET_HEADER_LEN 1 86#define NCI_HCI_HCP_PACKET_HEADER_LEN 1
@@ -101,6 +104,20 @@ struct nci_hcp_packet {
101#define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) 104#define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f)
102#define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) 105#define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f)
103 106
107static int nci_hci_result_to_errno(u8 result)
108{
109 switch (result) {
110 case NCI_HCI_ANY_OK:
111 return 0;
112 case NCI_HCI_ANY_E_REG_PAR_UNKNOWN:
113 return -EOPNOTSUPP;
114 case NCI_HCI_ANY_E_TIMEOUT:
115 return -ETIME;
116 default:
117 return -1;
118 }
119}
120
104/* HCI core */ 121/* HCI core */
105static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) 122static void nci_hci_reset_pipes(struct nci_hci_dev *hdev)
106{ 123{
@@ -146,18 +163,18 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
146 if (!conn_info) 163 if (!conn_info)
147 return -EPROTO; 164 return -EPROTO;
148 165
149 skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len + 166 i = 0;
167 skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
150 NCI_DATA_HDR_SIZE, GFP_KERNEL); 168 NCI_DATA_HDR_SIZE, GFP_KERNEL);
151 if (!skb) 169 if (!skb)
152 return -ENOMEM; 170 return -ENOMEM;
153 171
154 skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE); 172 skb_reserve(skb, NCI_DATA_HDR_SIZE + 2);
155 *skb_push(skb, 1) = data_type; 173 *skb_push(skb, 1) = data_type;
156 174
157 i = 0;
158 len = conn_info->max_pkt_payload_len;
159
160 do { 175 do {
176 len = conn_info->max_pkt_payload_len;
177
161 /* If last packet add NCI_HFP_NO_CHAINING */ 178 /* If last packet add NCI_HFP_NO_CHAINING */
162 if (i + conn_info->max_pkt_payload_len - 179 if (i + conn_info->max_pkt_payload_len -
163 (skb->len + 1) >= data_len) { 180 (skb->len + 1) >= data_len) {
@@ -177,9 +194,15 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
177 return r; 194 return r;
178 195
179 i += len; 196 i += len;
197
180 if (i < data_len) { 198 if (i < data_len) {
181 skb_trim(skb, 0); 199 skb = nci_skb_alloc(ndev,
182 skb_pull(skb, len); 200 conn_info->max_pkt_payload_len +
201 NCI_DATA_HDR_SIZE, GFP_KERNEL);
202 if (!skb)
203 return -ENOMEM;
204
205 skb_reserve(skb, NCI_DATA_HDR_SIZE + 1);
183 } 206 }
184 } while (i < data_len); 207 } while (i < data_len);
185 208
@@ -212,7 +235,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
212 const u8 *param, size_t param_len, 235 const u8 *param, size_t param_len,
213 struct sk_buff **skb) 236 struct sk_buff **skb)
214{ 237{
215 struct nci_conn_info *conn_info; 238 struct nci_hcp_message *message;
239 struct nci_conn_info *conn_info;
216 struct nci_data data; 240 struct nci_data data;
217 int r; 241 int r;
218 u8 pipe = ndev->hci_dev->gate2pipe[gate]; 242 u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -232,14 +256,34 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
232 256
233 r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, 257 r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
234 msecs_to_jiffies(NCI_DATA_TIMEOUT)); 258 msecs_to_jiffies(NCI_DATA_TIMEOUT));
235 259 if (r == NCI_STATUS_OK) {
236 if (r == NCI_STATUS_OK && skb) 260 message = (struct nci_hcp_message *)conn_info->rx_skb->data;
237 *skb = conn_info->rx_skb; 261 r = nci_hci_result_to_errno(
262 NCI_HCP_MSG_GET_CMD(message->header));
263 skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
264
265 if (!r && skb)
266 *skb = conn_info->rx_skb;
267 }
238 268
239 return r; 269 return r;
240} 270}
241EXPORT_SYMBOL(nci_hci_send_cmd); 271EXPORT_SYMBOL(nci_hci_send_cmd);
242 272
273int nci_hci_clear_all_pipes(struct nci_dev *ndev)
274{
275 int r;
276
277 r = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
278 NCI_HCI_ADM_CLEAR_ALL_PIPE, NULL, 0, NULL);
279 if (r < 0)
280 return r;
281
282 nci_hci_reset_pipes(ndev->hci_dev);
283 return r;
284}
285EXPORT_SYMBOL(nci_hci_clear_all_pipes);
286
243static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe, 287static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe,
244 u8 event, struct sk_buff *skb) 288 u8 event, struct sk_buff *skb)
245{ 289{
@@ -328,9 +372,6 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
328 struct nci_conn_info *conn_info; 372 struct nci_conn_info *conn_info;
329 u8 status = result; 373 u8 status = result;
330 374
331 if (result != NCI_HCI_ANY_OK)
332 goto exit;
333
334 conn_info = ndev->hci_dev->conn_info; 375 conn_info = ndev->hci_dev->conn_info;
335 if (!conn_info) { 376 if (!conn_info) {
336 status = NCI_STATUS_REJECTED; 377 status = NCI_STATUS_REJECTED;
@@ -340,7 +381,7 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
340 conn_info->rx_skb = skb; 381 conn_info->rx_skb = skb;
341 382
342exit: 383exit:
343 nci_req_complete(ndev, status); 384 nci_req_complete(ndev, NCI_STATUS_OK);
344} 385}
345 386
346/* Receive hcp message for pipe, with type and cmd. 387/* Receive hcp message for pipe, with type and cmd.
@@ -366,7 +407,7 @@ static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe,
366 break; 407 break;
367 } 408 }
368 409
369 nci_req_complete(ndev, 0); 410 nci_req_complete(ndev, NCI_STATUS_OK);
370} 411}
371 412
372static void nci_hci_msg_rx_work(struct work_struct *work) 413static void nci_hci_msg_rx_work(struct work_struct *work)
@@ -378,7 +419,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work)
378 u8 pipe, type, instruction; 419 u8 pipe, type, instruction;
379 420
380 while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { 421 while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
381 pipe = skb->data[0]; 422 pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]);
382 skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); 423 skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
383 message = (struct nci_hcp_message *)skb->data; 424 message = (struct nci_hcp_message *)skb->data;
384 type = NCI_HCP_MSG_GET_TYPE(message->header); 425 type = NCI_HCP_MSG_GET_TYPE(message->header);
@@ -395,7 +436,7 @@ void nci_hci_data_received_cb(void *context,
395{ 436{
396 struct nci_dev *ndev = (struct nci_dev *)context; 437 struct nci_dev *ndev = (struct nci_dev *)context;
397 struct nci_hcp_packet *packet; 438 struct nci_hcp_packet *packet;
398 u8 pipe, type, instruction; 439 u8 pipe, type;
399 struct sk_buff *hcp_skb; 440 struct sk_buff *hcp_skb;
400 struct sk_buff *frag_skb; 441 struct sk_buff *frag_skb;
401 int msg_len; 442 int msg_len;
@@ -415,7 +456,7 @@ void nci_hci_data_received_cb(void *context,
415 456
416 /* it's the last fragment. Does it need re-aggregation? */ 457 /* it's the last fragment. Does it need re-aggregation? */
417 if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { 458 if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) {
418 pipe = packet->header & NCI_HCI_FRAGMENT; 459 pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
419 skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); 460 skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
420 461
421 msg_len = 0; 462 msg_len = 0;
@@ -434,7 +475,7 @@ void nci_hci_data_received_cb(void *context,
434 *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe; 475 *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
435 476
436 skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { 477 skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
437 msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; 478 msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
438 memcpy(skb_put(hcp_skb, msg_len), frag_skb->data + 479 memcpy(skb_put(hcp_skb, msg_len), frag_skb->data +
439 NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); 480 NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len);
440 } 481 }
@@ -452,11 +493,10 @@ void nci_hci_data_received_cb(void *context,
452 packet = (struct nci_hcp_packet *)hcp_skb->data; 493 packet = (struct nci_hcp_packet *)hcp_skb->data;
453 type = NCI_HCP_MSG_GET_TYPE(packet->message.header); 494 type = NCI_HCP_MSG_GET_TYPE(packet->message.header);
454 if (type == NCI_HCI_HCP_RESPONSE) { 495 if (type == NCI_HCI_HCP_RESPONSE) {
455 pipe = packet->header; 496 pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
456 instruction = NCI_HCP_MSG_GET_CMD(packet->message.header); 497 skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
457 skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN + 498 nci_hci_hcp_message_rx(ndev, pipe, type,
458 NCI_HCI_HCP_MESSAGE_HEADER_LEN); 499 NCI_STATUS_OK, hcp_skb);
459 nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb);
460 } else { 500 } else {
461 skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); 501 skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb);
462 schedule_work(&ndev->hci_dev->msg_rx_work); 502 schedule_work(&ndev->hci_dev->msg_rx_work);
@@ -485,9 +525,47 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
485} 525}
486EXPORT_SYMBOL(nci_hci_open_pipe); 526EXPORT_SYMBOL(nci_hci_open_pipe);
487 527
528static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
529 u8 dest_gate, int *result)
530{
531 u8 pipe;
532 struct sk_buff *skb;
533 struct nci_hci_create_pipe_params params;
534 struct nci_hci_create_pipe_resp *resp;
535
536 pr_debug("gate=%d\n", dest_gate);
537
538 params.src_gate = NCI_HCI_ADMIN_GATE;
539 params.dest_host = dest_host;
540 params.dest_gate = dest_gate;
541
542 *result = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
543 NCI_HCI_ADM_CREATE_PIPE,
544 (u8 *)&params, sizeof(params), &skb);
545 if (*result < 0)
546 return NCI_HCI_INVALID_PIPE;
547
548 resp = (struct nci_hci_create_pipe_resp *)skb->data;
549 pipe = resp->pipe;
550 kfree_skb(skb);
551
552 pr_debug("pipe created=%d\n", pipe);
553
554 return pipe;
555}
556
557static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe)
558{
559 pr_debug("\n");
560
561 return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
562 NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL);
563}
564
488int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, 565int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
489 const u8 *param, size_t param_len) 566 const u8 *param, size_t param_len)
490{ 567{
568 struct nci_hcp_message *message;
491 struct nci_conn_info *conn_info; 569 struct nci_conn_info *conn_info;
492 struct nci_data data; 570 struct nci_data data;
493 int r; 571 int r;
@@ -520,6 +598,12 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
520 r = nci_request(ndev, nci_hci_send_data_req, 598 r = nci_request(ndev, nci_hci_send_data_req,
521 (unsigned long)&data, 599 (unsigned long)&data,
522 msecs_to_jiffies(NCI_DATA_TIMEOUT)); 600 msecs_to_jiffies(NCI_DATA_TIMEOUT));
601 if (r == NCI_STATUS_OK) {
602 message = (struct nci_hcp_message *)conn_info->rx_skb->data;
603 r = nci_hci_result_to_errno(
604 NCI_HCP_MSG_GET_CMD(message->header));
605 skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
606 }
523 607
524 kfree(tmp); 608 kfree(tmp);
525 return r; 609 return r;
@@ -529,6 +613,7 @@ EXPORT_SYMBOL(nci_hci_set_param);
529int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, 613int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
530 struct sk_buff **skb) 614 struct sk_buff **skb)
531{ 615{
616 struct nci_hcp_message *message;
532 struct nci_conn_info *conn_info; 617 struct nci_conn_info *conn_info;
533 struct nci_data data; 618 struct nci_data data;
534 int r; 619 int r;
@@ -553,8 +638,15 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
553 r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, 638 r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
554 msecs_to_jiffies(NCI_DATA_TIMEOUT)); 639 msecs_to_jiffies(NCI_DATA_TIMEOUT));
555 640
556 if (r == NCI_STATUS_OK) 641 if (r == NCI_STATUS_OK) {
557 *skb = conn_info->rx_skb; 642 message = (struct nci_hcp_message *)conn_info->rx_skb->data;
643 r = nci_hci_result_to_errno(
644 NCI_HCP_MSG_GET_CMD(message->header));
645 skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
646
647 if (!r && skb)
648 *skb = conn_info->rx_skb;
649 }
558 650
559 return r; 651 return r;
560} 652}
@@ -563,6 +655,7 @@ EXPORT_SYMBOL(nci_hci_get_param);
563int nci_hci_connect_gate(struct nci_dev *ndev, 655int nci_hci_connect_gate(struct nci_dev *ndev,
564 u8 dest_host, u8 dest_gate, u8 pipe) 656 u8 dest_host, u8 dest_gate, u8 pipe)
565{ 657{
658 bool pipe_created = false;
566 int r; 659 int r;
567 660
568 if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE) 661 if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE)
@@ -581,12 +674,26 @@ int nci_hci_connect_gate(struct nci_dev *ndev,
581 case NCI_HCI_ADMIN_GATE: 674 case NCI_HCI_ADMIN_GATE:
582 pipe = NCI_HCI_ADMIN_PIPE; 675 pipe = NCI_HCI_ADMIN_PIPE;
583 break; 676 break;
677 default:
678 pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r);
679 if (pipe < 0)
680 return r;
681 pipe_created = true;
682 break;
584 } 683 }
585 684
586open_pipe: 685open_pipe:
587 r = nci_hci_open_pipe(ndev, pipe); 686 r = nci_hci_open_pipe(ndev, pipe);
588 if (r < 0) 687 if (r < 0) {
688 if (pipe_created) {
689 if (nci_hci_delete_pipe(ndev, pipe) < 0) {
690 /* TODO: Cannot clean by deleting pipe...
691 * -> inconsistent state
692 */
693 }
694 }
589 return r; 695 return r;
696 }
590 697
591 ndev->hci_dev->pipes[pipe].gate = dest_gate; 698 ndev->hci_dev->pipes[pipe].gate = dest_gate;
592 ndev->hci_dev->pipes[pipe].host = dest_host; 699 ndev->hci_dev->pipes[pipe].host = dest_host;
@@ -653,6 +760,10 @@ int nci_hci_dev_session_init(struct nci_dev *ndev)
653 /* Restore gate<->pipe table from some proprietary location. */ 760 /* Restore gate<->pipe table from some proprietary location. */
654 r = ndev->ops->hci_load_session(ndev); 761 r = ndev->ops->hci_load_session(ndev);
655 } else { 762 } else {
763 r = nci_hci_clear_all_pipes(ndev);
764 if (r < 0)
765 goto exit;
766
656 r = nci_hci_dev_connect_gates(ndev, 767 r = nci_hci_dev_connect_gates(ndev,
657 ndev->hci_dev->init_data.gate_count, 768 ndev->hci_dev->init_data.gate_count,
658 ndev->hci_dev->init_data.gates); 769 ndev->hci_dev->init_data.gates);
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 5d1c2e391c56..2ada2b39e355 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -759,7 +759,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
759 skb_pull(skb, NCI_CTRL_HDR_SIZE); 759 skb_pull(skb, NCI_CTRL_HDR_SIZE);
760 760
761 if (nci_opcode_gid(ntf_opcode) == NCI_GID_PROPRIETARY) { 761 if (nci_opcode_gid(ntf_opcode) == NCI_GID_PROPRIETARY) {
762 if (nci_prop_ntf_packet(ndev, ntf_opcode, skb)) { 762 if (nci_prop_ntf_packet(ndev, ntf_opcode, skb) == -ENOTSUPP) {
763 pr_err("unsupported ntf opcode 0x%x\n", 763 pr_err("unsupported ntf opcode 0x%x\n",
764 ntf_opcode); 764 ntf_opcode);
765 } 765 }
@@ -805,6 +805,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
805 break; 805 break;
806 } 806 }
807 807
808 nci_core_ntf_packet(ndev, ntf_opcode, skb);
808end: 809end:
809 kfree_skb(skb); 810 kfree_skb(skb);
810} 811}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 408bd8f857ab..9b6eb913d801 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -355,6 +355,7 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
355 break; 355 break;
356 } 356 }
357 357
358 nci_core_rsp_packet(ndev, rsp_opcode, skb);
358end: 359end:
359 kfree_skb(skb); 360 kfree_skb(skb);
360 361
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index ec250e77763a..d904cd2f1442 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -18,6 +18,8 @@
18 18
19#define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__ 19#define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__
20 20
21#include <linux/module.h>
22
21#include <linux/export.h> 23#include <linux/export.h>
22#include <linux/spi/spi.h> 24#include <linux/spi/spi.h>
23#include <linux/crc-ccitt.h> 25#include <linux/crc-ccitt.h>
@@ -56,6 +58,7 @@ static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb,
56 } 58 }
57 t.cs_change = cs_change; 59 t.cs_change = cs_change;
58 t.delay_usecs = nspi->xfer_udelay; 60 t.delay_usecs = nspi->xfer_udelay;
61 t.speed_hz = nspi->xfer_speed_hz;
59 62
60 spi_message_init(&m); 63 spi_message_init(&m);
61 spi_message_add_tail(&t, &m); 64 spi_message_add_tail(&t, &m);
@@ -142,7 +145,8 @@ struct nci_spi *nci_spi_allocate_spi(struct spi_device *spi,
142 145
143 nspi->acknowledge_mode = acknowledge_mode; 146 nspi->acknowledge_mode = acknowledge_mode;
144 nspi->xfer_udelay = delay; 147 nspi->xfer_udelay = delay;
145 148 /* Use controller max SPI speed by default */
149 nspi->xfer_speed_hz = 0;
146 nspi->spi = spi; 150 nspi->spi = spi;
147 nspi->ndev = ndev; 151 nspi->ndev = ndev;
148 init_completion(&nspi->req_completion); 152 init_completion(&nspi->req_completion);
@@ -195,12 +199,14 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi)
195 tx.tx_buf = req; 199 tx.tx_buf = req;
196 tx.len = 2; 200 tx.len = 2;
197 tx.cs_change = 0; 201 tx.cs_change = 0;
202 tx.speed_hz = nspi->xfer_speed_hz;
198 spi_message_add_tail(&tx, &m); 203 spi_message_add_tail(&tx, &m);
199 204
200 memset(&rx, 0, sizeof(struct spi_transfer)); 205 memset(&rx, 0, sizeof(struct spi_transfer));
201 rx.rx_buf = resp_hdr; 206 rx.rx_buf = resp_hdr;
202 rx.len = 2; 207 rx.len = 2;
203 rx.cs_change = 1; 208 rx.cs_change = 1;
209 rx.speed_hz = nspi->xfer_speed_hz;
204 spi_message_add_tail(&rx, &m); 210 spi_message_add_tail(&rx, &m);
205 211
206 ret = spi_sync(nspi->spi, &m); 212 ret = spi_sync(nspi->spi, &m);
@@ -224,6 +230,7 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi)
224 rx.len = rx_len; 230 rx.len = rx_len;
225 rx.cs_change = 0; 231 rx.cs_change = 0;
226 rx.delay_usecs = nspi->xfer_udelay; 232 rx.delay_usecs = nspi->xfer_udelay;
233 rx.speed_hz = nspi->xfer_speed_hz;
227 spi_message_add_tail(&rx, &m); 234 spi_message_add_tail(&rx, &m);
228 235
229 ret = spi_sync(nspi->spi, &m); 236 ret = spi_sync(nspi->spi, &m);
@@ -320,3 +327,5 @@ done:
320 return skb; 327 return skb;
321} 328}
322EXPORT_SYMBOL_GPL(nci_spi_read); 329EXPORT_SYMBOL_GPL(nci_spi_read);
330
331MODULE_LICENSE("GPL");
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 853172c27f68..f58c1fba1026 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -885,7 +885,7 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
885 target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); 885 target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
886 protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); 886 protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]);
887 887
888 nfc_deactivate_target(dev, target_idx); 888 nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
889 rc = nfc_activate_target(dev, target_idx, protocol); 889 rc = nfc_activate_target(dev, target_idx, protocol);
890 890
891 nfc_put_device(dev); 891 nfc_put_device(dev);
@@ -1109,10 +1109,8 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
1109 idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); 1109 idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
1110 1110
1111 dev = nfc_get_device(idx); 1111 dev = nfc_get_device(idx);
1112 if (!dev) { 1112 if (!dev)
1113 rc = -ENODEV; 1113 return -ENODEV;
1114 goto exit;
1115 }
1116 1114
1117 device_lock(&dev->dev); 1115 device_lock(&dev->dev);
1118 1116
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 5c93e8412a26..c20b784ad720 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -25,6 +25,9 @@
25#include <net/nfc/nfc.h> 25#include <net/nfc/nfc.h>
26#include <net/sock.h> 26#include <net/sock.h>
27 27
28#define NFC_TARGET_MODE_IDLE 0
29#define NFC_TARGET_MODE_SLEEP 1
30
28struct nfc_protocol { 31struct nfc_protocol {
29 int id; 32 int id;
30 struct proto *proto; 33 struct proto *proto;
@@ -147,7 +150,7 @@ int nfc_dep_link_down(struct nfc_dev *dev);
147 150
148int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol); 151int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol);
149 152
150int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx); 153int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode);
151 154
152int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, 155int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
153 data_exchange_cb_t cb, void *cb_context); 156 data_exchange_cb_t cb, void *cb_context);
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e9a91488fe3d..e386e6c90b17 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -321,7 +321,8 @@ static void rawsock_destruct(struct sock *sk)
321 321
322 if (sk->sk_state == TCP_ESTABLISHED) { 322 if (sk->sk_state == TCP_ESTABLISHED) {
323 nfc_deactivate_target(nfc_rawsock(sk)->dev, 323 nfc_deactivate_target(nfc_rawsock(sk)->dev,
324 nfc_rawsock(sk)->target_idx); 324 nfc_rawsock(sk)->target_idx,
325 NFC_TARGET_MODE_IDLE);
325 nfc_put_device(nfc_rawsock(sk)->dev); 326 nfc_put_device(nfc_rawsock(sk)->dev);
326 } 327 }
327 328
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 315f5330b6e5..c88d0f2d3e01 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -620,7 +620,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
620 return 0; 620 return 0;
621} 621}
622 622
623static int ovs_vport_output(struct sock *sock, struct sk_buff *skb) 623static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
624{ 624{
625 struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage); 625 struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
626 struct vport *vport = data->vport; 626 struct vport *vport = data->vport;
@@ -679,12 +679,12 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
679 skb_pull(skb, hlen); 679 skb_pull(skb, hlen);
680} 680}
681 681
682static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, 682static void ovs_fragment(struct net *net, struct vport *vport,
683 __be16 ethertype) 683 struct sk_buff *skb, u16 mru, __be16 ethertype)
684{ 684{
685 if (skb_network_offset(skb) > MAX_L2_LEN) { 685 if (skb_network_offset(skb) > MAX_L2_LEN) {
686 OVS_NLERR(1, "L2 header too long to fragment"); 686 OVS_NLERR(1, "L2 header too long to fragment");
687 return; 687 goto err;
688 } 688 }
689 689
690 if (ethertype == htons(ETH_P_IP)) { 690 if (ethertype == htons(ETH_P_IP)) {
@@ -700,7 +700,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
700 skb_dst_set_noref(skb, &ovs_dst); 700 skb_dst_set_noref(skb, &ovs_dst);
701 IPCB(skb)->frag_max_size = mru; 701 IPCB(skb)->frag_max_size = mru;
702 702
703 ip_do_fragment(skb->sk, skb, ovs_vport_output); 703 ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
704 refdst_drop(orig_dst); 704 refdst_drop(orig_dst);
705 } else if (ethertype == htons(ETH_P_IPV6)) { 705 } else if (ethertype == htons(ETH_P_IPV6)) {
706 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 706 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
@@ -708,8 +708,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
708 struct rt6_info ovs_rt; 708 struct rt6_info ovs_rt;
709 709
710 if (!v6ops) { 710 if (!v6ops) {
711 kfree_skb(skb); 711 goto err;
712 return;
713 } 712 }
714 713
715 prepare_frag(vport, skb); 714 prepare_frag(vport, skb);
@@ -722,14 +721,18 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
722 skb_dst_set_noref(skb, &ovs_rt.dst); 721 skb_dst_set_noref(skb, &ovs_rt.dst);
723 IP6CB(skb)->frag_max_size = mru; 722 IP6CB(skb)->frag_max_size = mru;
724 723
725 v6ops->fragment(skb->sk, skb, ovs_vport_output); 724 v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
726 refdst_drop(orig_dst); 725 refdst_drop(orig_dst);
727 } else { 726 } else {
728 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", 727 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
729 ovs_vport_name(vport), ntohs(ethertype), mru, 728 ovs_vport_name(vport), ntohs(ethertype), mru,
730 vport->dev->mtu); 729 vport->dev->mtu);
731 kfree_skb(skb); 730 goto err;
732 } 731 }
732
733 return;
734err:
735 kfree_skb(skb);
733} 736}
734 737
735static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, 738static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
@@ -743,6 +746,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
743 if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { 746 if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
744 ovs_vport_send(vport, skb); 747 ovs_vport_send(vport, skb);
745 } else if (mru <= vport->dev->mtu) { 748 } else if (mru <= vport->dev->mtu) {
749 struct net *net = read_pnet(&dp->net);
746 __be16 ethertype = key->eth.type; 750 __be16 ethertype = key->eth.type;
747 751
748 if (!is_flow_key_valid(key)) { 752 if (!is_flow_key_valid(key)) {
@@ -752,7 +756,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
752 ethertype = vlan_get_protocol(skb); 756 ethertype = vlan_get_protocol(skb);
753 } 757 }
754 758
755 ovs_fragment(vport, skb, mru, ethertype); 759 ovs_fragment(net, vport, skb, mru, ethertype);
756 } else { 760 } else {
757 kfree_skb(skb); 761 kfree_skb(skb);
758 } 762 }
@@ -765,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
765 struct sw_flow_key *key, const struct nlattr *attr, 769 struct sw_flow_key *key, const struct nlattr *attr,
766 const struct nlattr *actions, int actions_len) 770 const struct nlattr *actions, int actions_len)
767{ 771{
768 struct ip_tunnel_info info;
769 struct dp_upcall_info upcall; 772 struct dp_upcall_info upcall;
770 const struct nlattr *a; 773 const struct nlattr *a;
771 int rem; 774 int rem;
@@ -793,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
793 if (vport) { 796 if (vport) {
794 int err; 797 int err;
795 798
796 upcall.egress_tun_info = &info; 799 err = dev_fill_metadata_dst(vport->dev, skb);
797 err = ovs_vport_get_egress_tun_info(vport, skb, 800 if (!err)
798 &upcall); 801 upcall.egress_tun_info = skb_tunnel_info(skb);
799 if (err)
800 upcall.egress_tun_info = NULL;
801 } 802 }
802 803
803 break; 804 break;
@@ -968,7 +969,7 @@ static int execute_masked_set_action(struct sk_buff *skb,
968 case OVS_KEY_ATTR_CT_STATE: 969 case OVS_KEY_ATTR_CT_STATE:
969 case OVS_KEY_ATTR_CT_ZONE: 970 case OVS_KEY_ATTR_CT_ZONE:
970 case OVS_KEY_ATTR_CT_MARK: 971 case OVS_KEY_ATTR_CT_MARK:
971 case OVS_KEY_ATTR_CT_LABEL: 972 case OVS_KEY_ATTR_CT_LABELS:
972 err = -EINVAL; 973 err = -EINVAL;
973 break; 974 break;
974 } 975 }
@@ -1099,12 +1100,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1099 break; 1100 break;
1100 1101
1101 case OVS_ACTION_ATTR_CT: 1102 case OVS_ACTION_ATTR_CT:
1103 if (!is_flow_key_valid(key)) {
1104 err = ovs_flow_key_update(skb, key);
1105 if (err)
1106 return err;
1107 }
1108
1102 err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key, 1109 err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
1103 nla_data(a)); 1110 nla_data(a));
1104 1111
1105 /* Hide stolen IP fragments from user space. */ 1112 /* Hide stolen IP fragments from user space. */
1106 if (err == -EINPROGRESS) 1113 if (err)
1107 return 0; 1114 return err == -EINPROGRESS ? 0 : err;
1108 break; 1115 break;
1109 } 1116 }
1110 1117
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 002a755fa07e..c2cc11168fd5 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -37,9 +37,9 @@ struct md_mark {
37}; 37};
38 38
39/* Metadata label for masked write to conntrack label. */ 39/* Metadata label for masked write to conntrack label. */
40struct md_label { 40struct md_labels {
41 struct ovs_key_ct_label value; 41 struct ovs_key_ct_labels value;
42 struct ovs_key_ct_label mask; 42 struct ovs_key_ct_labels mask;
43}; 43};
44 44
45/* Conntrack action context for execution. */ 45/* Conntrack action context for execution. */
@@ -47,10 +47,10 @@ struct ovs_conntrack_info {
47 struct nf_conntrack_helper *helper; 47 struct nf_conntrack_helper *helper;
48 struct nf_conntrack_zone zone; 48 struct nf_conntrack_zone zone;
49 struct nf_conn *ct; 49 struct nf_conn *ct;
50 u32 flags; 50 u8 commit : 1;
51 u16 family; 51 u16 family;
52 struct md_mark mark; 52 struct md_mark mark;
53 struct md_label label; 53 struct md_labels labels;
54}; 54};
55 55
56static u16 key_to_nfproto(const struct sw_flow_key *key) 56static u16 key_to_nfproto(const struct sw_flow_key *key)
@@ -109,21 +109,21 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
109#endif 109#endif
110} 110}
111 111
112static void ovs_ct_get_label(const struct nf_conn *ct, 112static void ovs_ct_get_labels(const struct nf_conn *ct,
113 struct ovs_key_ct_label *label) 113 struct ovs_key_ct_labels *labels)
114{ 114{
115 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; 115 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
116 116
117 if (cl) { 117 if (cl) {
118 size_t len = cl->words * sizeof(long); 118 size_t len = cl->words * sizeof(long);
119 119
120 if (len > OVS_CT_LABEL_LEN) 120 if (len > OVS_CT_LABELS_LEN)
121 len = OVS_CT_LABEL_LEN; 121 len = OVS_CT_LABELS_LEN;
122 else if (len < OVS_CT_LABEL_LEN) 122 else if (len < OVS_CT_LABELS_LEN)
123 memset(label, 0, OVS_CT_LABEL_LEN); 123 memset(labels, 0, OVS_CT_LABELS_LEN);
124 memcpy(label, cl->bits, len); 124 memcpy(labels, cl->bits, len);
125 } else { 125 } else {
126 memset(label, 0, OVS_CT_LABEL_LEN); 126 memset(labels, 0, OVS_CT_LABELS_LEN);
127 } 127 }
128} 128}
129 129
@@ -134,7 +134,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
134 key->ct.state = state; 134 key->ct.state = state;
135 key->ct.zone = zone->id; 135 key->ct.zone = zone->id;
136 key->ct.mark = ovs_ct_get_mark(ct); 136 key->ct.mark = ovs_ct_get_mark(ct);
137 ovs_ct_get_label(ct, &key->ct.label); 137 ovs_ct_get_labels(ct, &key->ct.labels);
138} 138}
139 139
140/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has 140/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
@@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
151 ct = nf_ct_get(skb, &ctinfo); 151 ct = nf_ct_get(skb, &ctinfo);
152 if (ct) { 152 if (ct) {
153 state = ovs_ct_get_state(ctinfo); 153 state = ovs_ct_get_state(ctinfo);
154 if (!nf_ct_is_confirmed(ct))
155 state |= OVS_CS_F_NEW;
154 if (ct->master) 156 if (ct->master)
155 state |= OVS_CS_F_RELATED; 157 state |= OVS_CS_F_RELATED;
156 zone = nf_ct_zone(ct); 158 zone = nf_ct_zone(ct);
@@ -167,7 +169,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
167 169
168int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) 170int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
169{ 171{
170 if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) 172 if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
171 return -EMSGSIZE; 173 return -EMSGSIZE;
172 174
173 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 175 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
@@ -179,8 +181,8 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
179 return -EMSGSIZE; 181 return -EMSGSIZE;
180 182
181 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 183 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
182 nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label), 184 nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
183 &key->ct.label)) 185 &key->ct.labels))
184 return -EMSGSIZE; 186 return -EMSGSIZE;
185 187
186 return 0; 188 return 0;
@@ -213,18 +215,15 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
213#endif 215#endif
214} 216}
215 217
216static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, 218static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
217 const struct ovs_key_ct_label *label, 219 const struct ovs_key_ct_labels *labels,
218 const struct ovs_key_ct_label *mask) 220 const struct ovs_key_ct_labels *mask)
219{ 221{
220 enum ip_conntrack_info ctinfo; 222 enum ip_conntrack_info ctinfo;
221 struct nf_conn_labels *cl; 223 struct nf_conn_labels *cl;
222 struct nf_conn *ct; 224 struct nf_conn *ct;
223 int err; 225 int err;
224 226
225 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
226 return -ENOTSUPP;
227
228 /* The connection could be invalid, in which case set_label is no-op.*/ 227 /* The connection could be invalid, in which case set_label is no-op.*/
229 ct = nf_ct_get(skb, &ctinfo); 228 ct = nf_ct_get(skb, &ctinfo);
230 if (!ct) 229 if (!ct)
@@ -235,15 +234,15 @@ static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key,
235 nf_ct_labels_ext_add(ct); 234 nf_ct_labels_ext_add(ct);
236 cl = nf_ct_labels_find(ct); 235 cl = nf_ct_labels_find(ct);
237 } 236 }
238 if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN) 237 if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN)
239 return -ENOSPC; 238 return -ENOSPC;
240 239
241 err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask, 240 err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
242 OVS_CT_LABEL_LEN / sizeof(u32)); 241 OVS_CT_LABELS_LEN / sizeof(u32));
243 if (err) 242 if (err)
244 return err; 243 return err;
245 244
246 ovs_ct_get_label(ct, &key->ct.label); 245 ovs_ct_get_labels(ct, &key->ct.labels);
247 return 0; 246 return 0;
248} 247}
249 248
@@ -294,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
294 return helper->help(skb, protoff, ct, ctinfo); 293 return helper->help(skb, protoff, ct, ctinfo);
295} 294}
296 295
296/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
297 * value if 'skb' is freed.
298 */
297static int handle_fragments(struct net *net, struct sw_flow_key *key, 299static int handle_fragments(struct net *net, struct sw_flow_key *key,
298 u16 zone, struct sk_buff *skb) 300 u16 zone, struct sk_buff *skb)
299{ 301{
@@ -304,32 +306,40 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
304 int err; 306 int err;
305 307
306 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 308 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
307 err = ip_defrag(skb, user); 309 err = ip_defrag(net, skb, user);
308 if (err) 310 if (err)
309 return err; 311 return err;
310 312
311 ovs_cb.mru = IPCB(skb)->frag_max_size; 313 ovs_cb.mru = IPCB(skb)->frag_max_size;
312 } else if (key->eth.type == htons(ETH_P_IPV6)) {
313#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 314#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
315 } else if (key->eth.type == htons(ETH_P_IPV6)) {
314 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 316 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
315 struct sk_buff *reasm; 317 struct sk_buff *reasm;
316 318
317 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 319 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
318 reasm = nf_ct_frag6_gather(skb, user); 320 reasm = nf_ct_frag6_gather(net, skb, user);
319 if (!reasm) 321 if (!reasm)
320 return -EINPROGRESS; 322 return -EINPROGRESS;
321 323
322 if (skb == reasm) 324 if (skb == reasm) {
325 kfree_skb(skb);
323 return -EINVAL; 326 return -EINVAL;
327 }
328
329 /* Don't free 'skb' even though it is one of the original
330 * fragments, as we're going to morph it into the head.
331 */
332 skb_get(skb);
333 nf_ct_frag6_consume_orig(reasm);
324 334
325 key->ip.proto = ipv6_hdr(reasm)->nexthdr; 335 key->ip.proto = ipv6_hdr(reasm)->nexthdr;
326 skb_morph(skb, reasm); 336 skb_morph(skb, reasm);
337 skb->next = reasm->next;
327 consume_skb(reasm); 338 consume_skb(reasm);
328 ovs_cb.mru = IP6CB(skb)->frag_max_size; 339 ovs_cb.mru = IP6CB(skb)->frag_max_size;
329#else
330 return -EPFNOSUPPORT;
331#endif 340#endif
332 } else { 341 } else {
342 kfree_skb(skb);
333 return -EPFNOSUPPORT; 343 return -EPFNOSUPPORT;
334 } 344 }
335 345
@@ -347,7 +357,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
347{ 357{
348 struct nf_conntrack_tuple tuple; 358 struct nf_conntrack_tuple tuple;
349 359
350 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple)) 360 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
351 return NULL; 361 return NULL;
352 return __nf_ct_expect_find(net, zone, &tuple); 362 return __nf_ct_expect_find(net, zone, &tuple);
353} 363}
@@ -377,7 +387,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
377 return true; 387 return true;
378} 388}
379 389
380static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, 390static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
381 const struct ovs_conntrack_info *info, 391 const struct ovs_conntrack_info *info,
382 struct sk_buff *skb) 392 struct sk_buff *skb)
383{ 393{
@@ -408,6 +418,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
408 } 418 }
409 } 419 }
410 420
421 ovs_ct_update_key(skb, key, true);
422
411 return 0; 423 return 0;
412} 424}
413 425
@@ -430,8 +442,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
430 err = __ovs_ct_lookup(net, key, info, skb); 442 err = __ovs_ct_lookup(net, key, info, skb);
431 if (err) 443 if (err)
432 return err; 444 return err;
433
434 ovs_ct_update_key(skb, key, true);
435 } 445 }
436 446
437 return 0; 447 return 0;
@@ -460,22 +470,23 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
460 if (nf_conntrack_confirm(skb) != NF_ACCEPT) 470 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
461 return -EINVAL; 471 return -EINVAL;
462 472
463 ovs_ct_update_key(skb, key, true);
464
465 return 0; 473 return 0;
466} 474}
467 475
468static bool label_nonzero(const struct ovs_key_ct_label *label) 476static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
469{ 477{
470 size_t i; 478 size_t i;
471 479
472 for (i = 0; i < sizeof(*label); i++) 480 for (i = 0; i < sizeof(*labels); i++)
473 if (label->ct_label[i]) 481 if (labels->ct_labels[i])
474 return true; 482 return true;
475 483
476 return false; 484 return false;
477} 485}
478 486
487/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
488 * value if 'skb' is freed.
489 */
479int ovs_ct_execute(struct net *net, struct sk_buff *skb, 490int ovs_ct_execute(struct net *net, struct sk_buff *skb,
480 struct sw_flow_key *key, 491 struct sw_flow_key *key,
481 const struct ovs_conntrack_info *info) 492 const struct ovs_conntrack_info *info)
@@ -493,7 +504,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
493 return err; 504 return err;
494 } 505 }
495 506
496 if (info->flags & OVS_CT_F_COMMIT) 507 if (info->commit)
497 err = ovs_ct_commit(net, key, info, skb); 508 err = ovs_ct_commit(net, key, info, skb);
498 else 509 else
499 err = ovs_ct_lookup(net, key, info, skb); 510 err = ovs_ct_lookup(net, key, info, skb);
@@ -506,11 +517,13 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
506 if (err) 517 if (err)
507 goto err; 518 goto err;
508 } 519 }
509 if (label_nonzero(&info->label.mask)) 520 if (labels_nonzero(&info->labels.mask))
510 err = ovs_ct_set_label(skb, key, &info->label.value, 521 err = ovs_ct_set_labels(skb, key, &info->labels.value,
511 &info->label.mask); 522 &info->labels.mask);
512err: 523err:
513 skb_push(skb, nh_ofs); 524 skb_push(skb, nh_ofs);
525 if (err)
526 kfree_skb(skb);
514 return err; 527 return err;
515} 528}
516 529
@@ -539,14 +552,13 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
539} 552}
540 553
541static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { 554static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
542 [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32), 555 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
543 .maxlen = sizeof(u32) },
544 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), 556 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
545 .maxlen = sizeof(u16) }, 557 .maxlen = sizeof(u16) },
546 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), 558 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
547 .maxlen = sizeof(struct md_mark) }, 559 .maxlen = sizeof(struct md_mark) },
548 [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label), 560 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
549 .maxlen = sizeof(struct md_label) }, 561 .maxlen = sizeof(struct md_labels) },
550 [OVS_CT_ATTR_HELPER] = { .minlen = 1, 562 [OVS_CT_ATTR_HELPER] = { .minlen = 1,
551 .maxlen = NF_CT_HELPER_NAME_LEN } 563 .maxlen = NF_CT_HELPER_NAME_LEN }
552}; 564};
@@ -576,8 +588,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
576 } 588 }
577 589
578 switch (type) { 590 switch (type) {
579 case OVS_CT_ATTR_FLAGS: 591 case OVS_CT_ATTR_COMMIT:
580 info->flags = nla_get_u32(a); 592 info->commit = true;
581 break; 593 break;
582#ifdef CONFIG_NF_CONNTRACK_ZONES 594#ifdef CONFIG_NF_CONNTRACK_ZONES
583 case OVS_CT_ATTR_ZONE: 595 case OVS_CT_ATTR_ZONE:
@@ -588,15 +600,23 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
588 case OVS_CT_ATTR_MARK: { 600 case OVS_CT_ATTR_MARK: {
589 struct md_mark *mark = nla_data(a); 601 struct md_mark *mark = nla_data(a);
590 602
603 if (!mark->mask) {
604 OVS_NLERR(log, "ct_mark mask cannot be 0");
605 return -EINVAL;
606 }
591 info->mark = *mark; 607 info->mark = *mark;
592 break; 608 break;
593 } 609 }
594#endif 610#endif
595#ifdef CONFIG_NF_CONNTRACK_LABELS 611#ifdef CONFIG_NF_CONNTRACK_LABELS
596 case OVS_CT_ATTR_LABEL: { 612 case OVS_CT_ATTR_LABELS: {
597 struct md_label *label = nla_data(a); 613 struct md_labels *labels = nla_data(a);
598 614
599 info->label = *label; 615 if (!labels_nonzero(&labels->mask)) {
616 OVS_NLERR(log, "ct_labels mask cannot be 0");
617 return -EINVAL;
618 }
619 info->labels = *labels;
600 break; 620 break;
601 } 621 }
602#endif 622#endif
@@ -633,7 +653,7 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
633 attr == OVS_KEY_ATTR_CT_MARK) 653 attr == OVS_KEY_ATTR_CT_MARK)
634 return true; 654 return true;
635 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 655 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
636 attr == OVS_KEY_ATTR_CT_LABEL) { 656 attr == OVS_KEY_ATTR_CT_LABELS) {
637 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 657 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
638 658
639 return ovs_net->xt_label; 659 return ovs_net->xt_label;
@@ -701,18 +721,19 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
701 if (!start) 721 if (!start)
702 return -EMSGSIZE; 722 return -EMSGSIZE;
703 723
704 if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags)) 724 if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT))
705 return -EMSGSIZE; 725 return -EMSGSIZE;
706 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 726 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
707 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) 727 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
708 return -EMSGSIZE; 728 return -EMSGSIZE;
709 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 729 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
710 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), 730 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
711 &ct_info->mark)) 731 &ct_info->mark))
712 return -EMSGSIZE; 732 return -EMSGSIZE;
713 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 733 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
714 nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label), 734 labels_nonzero(&ct_info->labels.mask) &&
715 &ct_info->label)) 735 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
736 &ct_info->labels))
716 return -EMSGSIZE; 737 return -EMSGSIZE;
717 if (ct_info->helper) { 738 if (ct_info->helper) {
718 if (nla_put_string(skb, OVS_CT_ATTR_HELPER, 739 if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
@@ -737,7 +758,7 @@ void ovs_ct_free_action(const struct nlattr *a)
737 758
738void ovs_ct_init(struct net *net) 759void ovs_ct_init(struct net *net)
739{ 760{
740 unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE; 761 unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
741 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 762 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
742 763
743 if (nf_connlabels_get(net, n_bits)) { 764 if (nf_connlabels_get(net, n_bits)) {
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index 43f5dd7a5577..a7544f405c16 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -34,6 +34,10 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
34void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); 34void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
35int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); 35int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
36void ovs_ct_free_action(const struct nlattr *a); 36void ovs_ct_free_action(const struct nlattr *a);
37
38#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
39 OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
40 OVS_CS_F_INVALID | OVS_CS_F_TRACKED)
37#else 41#else
38#include <linux/errno.h> 42#include <linux/errno.h>
39 43
@@ -63,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
63 struct sw_flow_key *key, 67 struct sw_flow_key *key,
64 const struct ovs_conntrack_info *info) 68 const struct ovs_conntrack_info *info)
65{ 69{
70 kfree_skb(skb);
66 return -ENOTSUPP; 71 return -ENOTSUPP;
67} 72}
68 73
@@ -72,7 +77,7 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb,
72 key->ct.state = 0; 77 key->ct.state = 0;
73 key->ct.zone = 0; 78 key->ct.zone = 0;
74 key->ct.mark = 0; 79 key->ct.mark = 0;
75 memset(&key->ct.label, 0, sizeof(key->ct.label)); 80 memset(&key->ct.labels, 0, sizeof(key->ct.labels));
76} 81}
77 82
78static inline int ovs_ct_put_key(const struct sw_flow_key *key, 83static inline int ovs_ct_put_key(const struct sw_flow_key *key,
@@ -82,5 +87,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key,
82} 87}
83 88
84static inline void ovs_ct_free_action(const struct nlattr *a) { } 89static inline void ovs_ct_free_action(const struct nlattr *a) { }
90
91#define CT_SUPPORTED_MASK 0
85#endif /* CONFIG_NF_CONNTRACK */ 92#endif /* CONFIG_NF_CONNTRACK */
86#endif /* ovs_conntrack.h */ 93#endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index b816ff871528..91a8b004dc51 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -91,8 +91,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
91static void ovs_notify(struct genl_family *family, 91static void ovs_notify(struct genl_family *family,
92 struct sk_buff *skb, struct genl_info *info) 92 struct sk_buff *skb, struct genl_info *info)
93{ 93{
94 genl_notify(family, skb, genl_info_net(info), info->snd_portid, 94 genl_notify(family, skb, info, 0, GFP_KERNEL);
95 0, info->nlhdr, GFP_KERNEL);
96} 95}
97 96
98/** 97/**
@@ -490,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
490 489
491 if (upcall_info->egress_tun_info) { 490 if (upcall_info->egress_tun_info) {
492 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); 491 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
493 err = ovs_nla_put_egress_tunnel_key(user_skb, 492 err = ovs_nla_put_tunnel_info(user_skb,
494 upcall_info->egress_tun_info, 493 upcall_info->egress_tun_info);
495 upcall_info->egress_tun_opts);
496 BUG_ON(err); 494 BUG_ON(err);
497 nla_nest_end(user_skb, nla); 495 nla_nest_end(user_skb, nla);
498 } 496 }
@@ -1177,7 +1175,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1177 info, OVS_FLOW_CMD_NEW, false, 1175 info, OVS_FLOW_CMD_NEW, false,
1178 ufid_flags); 1176 ufid_flags);
1179 1177
1180 if (unlikely(IS_ERR(reply))) { 1178 if (IS_ERR(reply)) {
1181 error = PTR_ERR(reply); 1179 error = PTR_ERR(reply);
1182 goto err_unlock_ovs; 1180 goto err_unlock_ovs;
1183 } 1181 }
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index f88038a99f44..67bdecd9fdc1 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -117,7 +117,6 @@ struct ovs_skb_cb {
117 */ 117 */
118struct dp_upcall_info { 118struct dp_upcall_info {
119 struct ip_tunnel_info *egress_tun_info; 119 struct ip_tunnel_info *egress_tun_info;
120 const void *egress_tun_opts;
121 const struct nlattr *userdata; 120 const struct nlattr *userdata;
122 const struct nlattr *actions; 121 const struct nlattr *actions;
123 int actions_len; 122 int actions_len;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c8db44ab2ee7..0ea128eeeab2 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -698,8 +698,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
698{ 698{
699 /* Extract metadata from packet. */ 699 /* Extract metadata from packet. */
700 if (tun_info) { 700 if (tun_info) {
701 if (ip_tunnel_info_af(tun_info) != AF_INET) 701 key->tun_proto = ip_tunnel_info_af(tun_info);
702 return -EINVAL;
703 memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key)); 702 memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
704 703
705 if (tun_info->options_len) { 704 if (tun_info->options_len) {
@@ -714,6 +713,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
714 key->tun_opts_len = 0; 713 key->tun_opts_len = 0;
715 } 714 }
716 } else { 715 } else {
716 key->tun_proto = 0;
717 key->tun_opts_len = 0; 717 key->tun_opts_len = 0;
718 memset(&key->tun_key, 0, sizeof(key->tun_key)); 718 memset(&key->tun_key, 0, sizeof(key->tun_key));
719 } 719 }
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index fe527d2dd4b7..1d055c559eaf 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -63,6 +63,7 @@ struct sw_flow_key {
63 u32 skb_mark; /* SKB mark. */ 63 u32 skb_mark; /* SKB mark. */
64 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ 64 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
65 } __packed phy; /* Safe when right after 'tun_key'. */ 65 } __packed phy; /* Safe when right after 'tun_key'. */
66 u8 tun_proto; /* Protocol of encapsulating tunnel. */
66 u32 ovs_flow_hash; /* Datapath computed hash value. */ 67 u32 ovs_flow_hash; /* Datapath computed hash value. */
67 u32 recirc_id; /* Recirculation ID. */ 68 u32 recirc_id; /* Recirculation ID. */
68 struct { 69 struct {
@@ -116,7 +117,7 @@ struct sw_flow_key {
116 u16 zone; 117 u16 zone;
117 u32 mark; 118 u32 mark;
118 u8 state; 119 u8 state;
119 struct ovs_key_ct_label label; 120 struct ovs_key_ct_labels labels;
120 } ct; 121 } ct;
121 122
122} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ 123} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 5c030a4d7338..907d6fd28ede 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -262,8 +262,8 @@ size_t ovs_tun_key_attr_size(void)
262 * updating this function. 262 * updating this function.
263 */ 263 */
264 return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 264 return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
265 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ 265 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
266 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ 266 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
267 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 267 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
268 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 268 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
269 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 269 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
@@ -291,10 +291,10 @@ size_t ovs_key_attr_size(void)
291 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 291 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
292 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 292 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
293 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 293 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
294 + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ 294 + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */
295 + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ 295 + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
296 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ 296 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
297 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */ 297 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
298 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 298 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
299 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 299 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
300 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 300 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -323,6 +323,8 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
323 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, 323 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE },
324 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, 324 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED,
325 .next = ovs_vxlan_ext_key_lens }, 325 .next = ovs_vxlan_ext_key_lens },
326 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
327 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
326}; 328};
327 329
328/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 330/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -349,10 +351,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
349 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 351 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
350 .next = ovs_tunnel_key_lens, }, 352 .next = ovs_tunnel_key_lens, },
351 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 353 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
352 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, 354 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) },
353 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, 355 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
354 [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, 356 [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
355 [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, 357 [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
356}; 358};
357 359
358static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) 360static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -542,15 +544,15 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
542 return 0; 544 return 0;
543} 545}
544 546
545static int ipv4_tun_from_nlattr(const struct nlattr *attr, 547static int ip_tun_from_nlattr(const struct nlattr *attr,
546 struct sw_flow_match *match, bool is_mask, 548 struct sw_flow_match *match, bool is_mask,
547 bool log) 549 bool log)
548{ 550{
549 struct nlattr *a; 551 bool ttl = false, ipv4 = false, ipv6 = false;
550 int rem;
551 bool ttl = false;
552 __be16 tun_flags = 0; 552 __be16 tun_flags = 0;
553 int opts_type = 0; 553 int opts_type = 0;
554 struct nlattr *a;
555 int rem;
554 556
555 nla_for_each_nested(a, attr, rem) { 557 nla_for_each_nested(a, attr, rem) {
556 int type = nla_type(a); 558 int type = nla_type(a);
@@ -578,10 +580,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
578 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 580 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
579 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, 581 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
580 nla_get_in_addr(a), is_mask); 582 nla_get_in_addr(a), is_mask);
583 ipv4 = true;
581 break; 584 break;
582 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 585 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
583 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst, 586 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
584 nla_get_in_addr(a), is_mask); 587 nla_get_in_addr(a), is_mask);
588 ipv4 = true;
589 break;
590 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
591 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
592 nla_get_in6_addr(a), is_mask);
593 ipv6 = true;
594 break;
595 case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
596 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
597 nla_get_in6_addr(a), is_mask);
598 ipv6 = true;
585 break; 599 break;
586 case OVS_TUNNEL_KEY_ATTR_TOS: 600 case OVS_TUNNEL_KEY_ATTR_TOS:
587 SW_FLOW_KEY_PUT(match, tun_key.tos, 601 SW_FLOW_KEY_PUT(match, tun_key.tos,
@@ -636,28 +650,46 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
636 opts_type = type; 650 opts_type = type;
637 break; 651 break;
638 default: 652 default:
639 OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", 653 OVS_NLERR(log, "Unknown IP tunnel attribute %d",
640 type); 654 type);
641 return -EINVAL; 655 return -EINVAL;
642 } 656 }
643 } 657 }
644 658
645 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 659 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
660 if (is_mask)
661 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
662 else
663 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
664 false);
646 665
647 if (rem > 0) { 666 if (rem > 0) {
648 OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", 667 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
649 rem); 668 rem);
650 return -EINVAL; 669 return -EINVAL;
651 } 670 }
652 671
672 if (ipv4 && ipv6) {
673 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
674 return -EINVAL;
675 }
676
653 if (!is_mask) { 677 if (!is_mask) {
654 if (!match->key->tun_key.u.ipv4.dst) { 678 if (!ipv4 && !ipv6) {
679 OVS_NLERR(log, "IP tunnel dst address not specified");
680 return -EINVAL;
681 }
682 if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
655 OVS_NLERR(log, "IPv4 tunnel dst address is zero"); 683 OVS_NLERR(log, "IPv4 tunnel dst address is zero");
656 return -EINVAL; 684 return -EINVAL;
657 } 685 }
686 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
687 OVS_NLERR(log, "IPv6 tunnel dst address is zero");
688 return -EINVAL;
689 }
658 690
659 if (!ttl) { 691 if (!ttl) {
660 OVS_NLERR(log, "IPv4 tunnel TTL not specified."); 692 OVS_NLERR(log, "IP tunnel TTL not specified.");
661 return -EINVAL; 693 return -EINVAL;
662 } 694 }
663 } 695 }
@@ -682,21 +714,36 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb,
682 return 0; 714 return 0;
683} 715}
684 716
685static int __ipv4_tun_to_nlattr(struct sk_buff *skb, 717static int __ip_tun_to_nlattr(struct sk_buff *skb,
686 const struct ip_tunnel_key *output, 718 const struct ip_tunnel_key *output,
687 const void *tun_opts, int swkey_tun_opts_len) 719 const void *tun_opts, int swkey_tun_opts_len,
720 unsigned short tun_proto)
688{ 721{
689 if (output->tun_flags & TUNNEL_KEY && 722 if (output->tun_flags & TUNNEL_KEY &&
690 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 723 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
691 return -EMSGSIZE; 724 return -EMSGSIZE;
692 if (output->u.ipv4.src && 725 switch (tun_proto) {
693 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, 726 case AF_INET:
694 output->u.ipv4.src)) 727 if (output->u.ipv4.src &&
695 return -EMSGSIZE; 728 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
696 if (output->u.ipv4.dst && 729 output->u.ipv4.src))
697 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, 730 return -EMSGSIZE;
698 output->u.ipv4.dst)) 731 if (output->u.ipv4.dst &&
699 return -EMSGSIZE; 732 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
733 output->u.ipv4.dst))
734 return -EMSGSIZE;
735 break;
736 case AF_INET6:
737 if (!ipv6_addr_any(&output->u.ipv6.src) &&
738 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
739 &output->u.ipv6.src))
740 return -EMSGSIZE;
741 if (!ipv6_addr_any(&output->u.ipv6.dst) &&
742 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
743 &output->u.ipv6.dst))
744 return -EMSGSIZE;
745 break;
746 }
700 if (output->tos && 747 if (output->tos &&
701 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos)) 748 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
702 return -EMSGSIZE; 749 return -EMSGSIZE;
@@ -717,7 +764,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
717 if ((output->tun_flags & TUNNEL_OAM) && 764 if ((output->tun_flags & TUNNEL_OAM) &&
718 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 765 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
719 return -EMSGSIZE; 766 return -EMSGSIZE;
720 if (tun_opts) { 767 if (swkey_tun_opts_len) {
721 if (output->tun_flags & TUNNEL_GENEVE_OPT && 768 if (output->tun_flags & TUNNEL_GENEVE_OPT &&
722 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 769 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
723 swkey_tun_opts_len, tun_opts)) 770 swkey_tun_opts_len, tun_opts))
@@ -730,9 +777,10 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
730 return 0; 777 return 0;
731} 778}
732 779
733static int ipv4_tun_to_nlattr(struct sk_buff *skb, 780static int ip_tun_to_nlattr(struct sk_buff *skb,
734 const struct ip_tunnel_key *output, 781 const struct ip_tunnel_key *output,
735 const void *tun_opts, int swkey_tun_opts_len) 782 const void *tun_opts, int swkey_tun_opts_len,
783 unsigned short tun_proto)
736{ 784{
737 struct nlattr *nla; 785 struct nlattr *nla;
738 int err; 786 int err;
@@ -741,7 +789,8 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
741 if (!nla) 789 if (!nla)
742 return -EMSGSIZE; 790 return -EMSGSIZE;
743 791
744 err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len); 792 err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
793 tun_proto);
745 if (err) 794 if (err)
746 return err; 795 return err;
747 796
@@ -749,13 +798,13 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
749 return 0; 798 return 0;
750} 799}
751 800
752int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, 801int ovs_nla_put_tunnel_info(struct sk_buff *skb,
753 const struct ip_tunnel_info *egress_tun_info, 802 struct ip_tunnel_info *tun_info)
754 const void *egress_tun_opts)
755{ 803{
756 return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key, 804 return __ip_tun_to_nlattr(skb, &tun_info->key,
757 egress_tun_opts, 805 ip_tunnel_info_opts(tun_info),
758 egress_tun_info->options_len); 806 tun_info->options_len,
807 ip_tunnel_info_af(tun_info));
759} 808}
760 809
761static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, 810static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -806,15 +855,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
806 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 855 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
807 } 856 }
808 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 857 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
809 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 858 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
810 is_mask, log) < 0) 859 is_mask, log) < 0)
811 return -EINVAL; 860 return -EINVAL;
812 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 861 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
813 } 862 }
814 863
815 if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && 864 if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
816 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { 865 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
817 u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); 866 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
867
868 if (ct_state & ~CT_SUPPORTED_MASK) {
869 OVS_NLERR(log, "ct_state flags %08x unsupported",
870 ct_state);
871 return -EINVAL;
872 }
818 873
819 SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); 874 SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
820 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); 875 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
@@ -833,14 +888,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
833 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); 888 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
834 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); 889 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
835 } 890 }
836 if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) && 891 if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
837 ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) { 892 ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
838 const struct ovs_key_ct_label *cl; 893 const struct ovs_key_ct_labels *cl;
839 894
840 cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]); 895 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
841 SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label, 896 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
842 sizeof(*cl), is_mask); 897 sizeof(*cl), is_mask);
843 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL); 898 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
844 } 899 }
845 return 0; 900 return 0;
846} 901}
@@ -1093,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val,
1093 } else { 1148 } else {
1094 memset(nla_data(nla), val, nla_len(nla)); 1149 memset(nla_data(nla), val, nla_len(nla));
1095 } 1150 }
1151
1152 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1153 *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1096 } 1154 }
1097} 1155}
1098 1156
@@ -1194,7 +1252,7 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1194 /* The userspace does not send tunnel attributes that 1252 /* The userspace does not send tunnel attributes that
1195 * are 0, but we should not wildcard them nonetheless. 1253 * are 0, but we should not wildcard them nonetheless.
1196 */ 1254 */
1197 if (match->key->tun_key.u.ipv4.dst) 1255 if (match->key->tun_proto)
1198 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 1256 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1199 0xff, true); 1257 0xff, true);
1200 1258
@@ -1367,14 +1425,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1367 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1425 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1368 goto nla_put_failure; 1426 goto nla_put_failure;
1369 1427
1370 if ((swkey->tun_key.u.ipv4.dst || is_mask)) { 1428 if ((swkey->tun_proto || is_mask)) {
1371 const void *opts = NULL; 1429 const void *opts = NULL;
1372 1430
1373 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1431 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1374 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 1432 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1375 1433
1376 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, 1434 if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
1377 swkey->tun_opts_len)) 1435 swkey->tun_opts_len, swkey->tun_proto))
1378 goto nla_put_failure; 1436 goto nla_put_failure;
1379 } 1437 }
1380 1438
@@ -1877,7 +1935,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1877 int err = 0, start, opts_type; 1935 int err = 0, start, opts_type;
1878 1936
1879 ovs_match_init(&match, &key, NULL); 1937 ovs_match_init(&match, &key, NULL);
1880 opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); 1938 opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
1881 if (opts_type < 0) 1939 if (opts_type < 0)
1882 return opts_type; 1940 return opts_type;
1883 1941
@@ -1913,6 +1971,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1913 1971
1914 tun_info = &tun_dst->u.tun_info; 1972 tun_info = &tun_dst->u.tun_info;
1915 tun_info->mode = IP_TUNNEL_INFO_TX; 1973 tun_info->mode = IP_TUNNEL_INFO_TX;
1974 if (key.tun_proto == AF_INET6)
1975 tun_info->mode |= IP_TUNNEL_INFO_IPV6;
1916 tun_info->key = key.tun_key; 1976 tun_info->key = key.tun_key;
1917 1977
1918 /* We need to store the options in the action itself since 1978 /* We need to store the options in the action itself since
@@ -1973,7 +2033,7 @@ static int validate_set(const struct nlattr *a,
1973 case OVS_KEY_ATTR_PRIORITY: 2033 case OVS_KEY_ATTR_PRIORITY:
1974 case OVS_KEY_ATTR_SKB_MARK: 2034 case OVS_KEY_ATTR_SKB_MARK:
1975 case OVS_KEY_ATTR_CT_MARK: 2035 case OVS_KEY_ATTR_CT_MARK:
1976 case OVS_KEY_ATTR_CT_LABEL: 2036 case OVS_KEY_ATTR_CT_LABELS:
1977 case OVS_KEY_ATTR_ETHERNET: 2037 case OVS_KEY_ATTR_ETHERNET:
1978 break; 2038 break;
1979 2039
@@ -2374,10 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
2374 if (!start) 2434 if (!start)
2375 return -EMSGSIZE; 2435 return -EMSGSIZE;
2376 2436
2377 err = ipv4_tun_to_nlattr(skb, &tun_info->key, 2437 err = ovs_nla_put_tunnel_info(skb, tun_info);
2378 tun_info->options_len ?
2379 ip_tunnel_info_opts(tun_info) : NULL,
2380 tun_info->options_len);
2381 if (err) 2438 if (err)
2382 return err; 2439 return err;
2383 nla_nest_end(skb, start); 2440 nla_nest_end(skb, start);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6ca3f0baf449..47dd142eca1c 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
55int ovs_nla_get_match(struct net *, struct sw_flow_match *, 55int ovs_nla_get_match(struct net *, struct sw_flow_match *,
56 const struct nlattr *key, const struct nlattr *mask, 56 const struct nlattr *key, const struct nlattr *mask,
57 bool log); 57 bool log);
58int ovs_nla_put_egress_tunnel_key(struct sk_buff *, 58
59 const struct ip_tunnel_info *, 59int ovs_nla_put_tunnel_info(struct sk_buff *skb,
60 const void *egress_tun_opts); 60 struct ip_tunnel_info *tun_info);
61 61
62bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); 62bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
63int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, 63int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index f2ea83ba4763..d073fff82fdb 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -93,7 +93,8 @@ struct sw_flow *ovs_flow_alloc(void)
93 93
94 /* Initialize the default stat node. */ 94 /* Initialize the default stat node. */
95 stats = kmem_cache_alloc_node(flow_stats_cache, 95 stats = kmem_cache_alloc_node(flow_stats_cache,
96 GFP_KERNEL | __GFP_ZERO, 0); 96 GFP_KERNEL | __GFP_ZERO,
97 node_online(0) ? 0 : NUMA_NO_NODE);
97 if (!stats) 98 if (!stats)
98 goto err; 99 goto err;
99 100
@@ -427,7 +428,7 @@ static u32 flow_hash(const struct sw_flow_key *key,
427 428
428static int flow_key_start(const struct sw_flow_key *key) 429static int flow_key_start(const struct sw_flow_key *key)
429{ 430{
430 if (key->tun_key.u.ipv4.dst) 431 if (key->tun_proto)
431 return 0; 432 return 0;
432 else 433 else
433 return rounddown(offsetof(struct sw_flow_key, phy), 434 return rounddown(offsetof(struct sw_flow_key, phy),
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 2735e9c4a3b8..efb736bb6855 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,
52 return 0; 52 return 0;
53} 53}
54 54
55static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
56 struct dp_upcall_info *upcall)
57{
58 struct geneve_port *geneve_port = geneve_vport(vport);
59 struct net *net = ovs_dp_get_net(vport->dp);
60 __be16 dport = htons(geneve_port->port_no);
61 __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
62
63 return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
64 skb, IPPROTO_UDP, sport, dport);
65}
66
67static struct vport *geneve_tnl_create(const struct vport_parms *parms) 55static struct vport *geneve_tnl_create(const struct vport_parms *parms)
68{ 56{
69 struct net *net = ovs_dp_get_net(parms->dp); 57 struct net *net = ovs_dp_get_net(parms->dp);
@@ -128,9 +116,8 @@ static struct vport_ops ovs_geneve_vport_ops = {
128 .create = geneve_create, 116 .create = geneve_create,
129 .destroy = ovs_netdev_tunnel_destroy, 117 .destroy = ovs_netdev_tunnel_destroy,
130 .get_options = geneve_get_options, 118 .get_options = geneve_get_options,
131 .send = ovs_netdev_send, 119 .send = dev_queue_xmit,
132 .owner = THIS_MODULE, 120 .owner = THIS_MODULE,
133 .get_egress_tun_info = geneve_get_egress_tun_info,
134}; 121};
135 122
136static int __init ovs_geneve_tnl_init(void) 123static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 4d24481669c9..c3257d78d3d2 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)
84 return ovs_netdev_link(vport, parms->name); 84 return ovs_netdev_link(vport, parms->name);
85} 85}
86 86
87static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
88 struct dp_upcall_info *upcall)
89{
90 return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
91 skb, IPPROTO_GRE, 0, 0);
92}
93
94static struct vport_ops ovs_gre_vport_ops = { 87static struct vport_ops ovs_gre_vport_ops = {
95 .type = OVS_VPORT_TYPE_GRE, 88 .type = OVS_VPORT_TYPE_GRE,
96 .create = gre_create, 89 .create = gre_create,
97 .send = ovs_netdev_send, 90 .send = dev_queue_xmit,
98 .get_egress_tun_info = gre_get_egress_tun_info,
99 .destroy = ovs_netdev_tunnel_destroy, 91 .destroy = ovs_netdev_tunnel_destroy,
100 .owner = THIS_MODULE, 92 .owner = THIS_MODULE,
101}; 93};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 388b8a6bf112..ec76398a792f 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev)
106 free_netdev(dev); 106 free_netdev(dev);
107} 107}
108 108
109static struct rtnl_link_stats64 *
110internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
111{
112 int i;
113
114 memset(stats, 0, sizeof(*stats));
115 stats->rx_errors = dev->stats.rx_errors;
116 stats->tx_errors = dev->stats.tx_errors;
117 stats->tx_dropped = dev->stats.tx_dropped;
118 stats->rx_dropped = dev->stats.rx_dropped;
119
120 for_each_possible_cpu(i) {
121 const struct pcpu_sw_netstats *percpu_stats;
122 struct pcpu_sw_netstats local_stats;
123 unsigned int start;
124
125 percpu_stats = per_cpu_ptr(dev->tstats, i);
126
127 do {
128 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
129 local_stats = *percpu_stats;
130 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
131
132 stats->rx_bytes += local_stats.rx_bytes;
133 stats->rx_packets += local_stats.rx_packets;
134 stats->tx_bytes += local_stats.tx_bytes;
135 stats->tx_packets += local_stats.tx_packets;
136 }
137
138 return stats;
139}
140
109static const struct net_device_ops internal_dev_netdev_ops = { 141static const struct net_device_ops internal_dev_netdev_ops = {
110 .ndo_open = internal_dev_open, 142 .ndo_open = internal_dev_open,
111 .ndo_stop = internal_dev_stop, 143 .ndo_stop = internal_dev_stop,
112 .ndo_start_xmit = internal_dev_xmit, 144 .ndo_start_xmit = internal_dev_xmit,
113 .ndo_set_mac_address = eth_mac_addr, 145 .ndo_set_mac_address = eth_mac_addr,
114 .ndo_change_mtu = internal_dev_change_mtu, 146 .ndo_change_mtu = internal_dev_change_mtu,
147 .ndo_get_stats64 = internal_get_stats,
115}; 148};
116 149
117static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { 150static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
161 err = -ENOMEM; 194 err = -ENOMEM;
162 goto error_free_vport; 195 goto error_free_vport;
163 } 196 }
197 vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
198 if (!vport->dev->tstats) {
199 err = -ENOMEM;
200 goto error_free_netdev;
201 }
164 202
165 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); 203 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
166 internal_dev = internal_dev_priv(vport->dev); 204 internal_dev = internal_dev_priv(vport->dev);
@@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
173 rtnl_lock(); 211 rtnl_lock();
174 err = register_netdevice(vport->dev); 212 err = register_netdevice(vport->dev);
175 if (err) 213 if (err)
176 goto error_free_netdev; 214 goto error_unlock;
177 215
178 dev_set_promiscuity(vport->dev, 1); 216 dev_set_promiscuity(vport->dev, 1);
179 rtnl_unlock(); 217 rtnl_unlock();
@@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
181 219
182 return vport; 220 return vport;
183 221
184error_free_netdev: 222error_unlock:
185 rtnl_unlock(); 223 rtnl_unlock();
224 free_percpu(vport->dev->tstats);
225error_free_netdev:
186 free_netdev(vport->dev); 226 free_netdev(vport->dev);
187error_free_vport: 227error_free_vport:
188 ovs_vport_free(vport); 228 ovs_vport_free(vport);
@@ -198,26 +238,25 @@ static void internal_dev_destroy(struct vport *vport)
198 238
199 /* unregister_netdevice() waits for an RCU grace period. */ 239 /* unregister_netdevice() waits for an RCU grace period. */
200 unregister_netdevice(vport->dev); 240 unregister_netdevice(vport->dev);
201 241 free_percpu(vport->dev->tstats);
202 rtnl_unlock(); 242 rtnl_unlock();
203} 243}
204 244
205static void internal_dev_recv(struct vport *vport, struct sk_buff *skb) 245static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
206{ 246{
207 struct net_device *netdev = vport->dev; 247 struct net_device *netdev = skb->dev;
208 struct pcpu_sw_netstats *stats; 248 struct pcpu_sw_netstats *stats;
209 249
210 if (unlikely(!(netdev->flags & IFF_UP))) { 250 if (unlikely(!(netdev->flags & IFF_UP))) {
211 kfree_skb(skb); 251 kfree_skb(skb);
212 netdev->stats.rx_dropped++; 252 netdev->stats.rx_dropped++;
213 return; 253 return NETDEV_TX_OK;
214 } 254 }
215 255
216 skb_dst_drop(skb); 256 skb_dst_drop(skb);
217 nf_reset(skb); 257 nf_reset(skb);
218 secpath_reset(skb); 258 secpath_reset(skb);
219 259
220 skb->dev = netdev;
221 skb->pkt_type = PACKET_HOST; 260 skb->pkt_type = PACKET_HOST;
222 skb->protocol = eth_type_trans(skb, netdev); 261 skb->protocol = eth_type_trans(skb, netdev);
223 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 262 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@ -229,6 +268,7 @@ static void internal_dev_recv(struct vport *vport, struct sk_buff *skb)
229 u64_stats_update_end(&stats->syncp); 268 u64_stats_update_end(&stats->syncp);
230 269
231 netif_rx(skb); 270 netif_rx(skb);
271 return NETDEV_TX_OK;
232} 272}
233 273
234static struct vport_ops ovs_internal_vport_ops = { 274static struct vport_ops ovs_internal_vport_ops = {
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index f7e8dcce7ada..b327368a3848 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -190,37 +190,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
190} 190}
191EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy); 191EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
192 192
193static unsigned int packet_length(const struct sk_buff *skb)
194{
195 unsigned int length = skb->len - ETH_HLEN;
196
197 if (skb->protocol == htons(ETH_P_8021Q))
198 length -= VLAN_HLEN;
199
200 return length;
201}
202
203void ovs_netdev_send(struct vport *vport, struct sk_buff *skb)
204{
205 int mtu = vport->dev->mtu;
206
207 if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
208 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
209 vport->dev->name,
210 packet_length(skb), mtu);
211 vport->dev->stats.tx_errors++;
212 goto drop;
213 }
214
215 skb->dev = vport->dev;
216 dev_queue_xmit(skb);
217 return;
218
219drop:
220 kfree_skb(skb);
221}
222EXPORT_SYMBOL_GPL(ovs_netdev_send);
223
224/* Returns null if this device is not attached to a datapath. */ 193/* Returns null if this device is not attached to a datapath. */
225struct vport *ovs_netdev_get_vport(struct net_device *dev) 194struct vport *ovs_netdev_get_vport(struct net_device *dev)
226{ 195{
@@ -235,7 +204,7 @@ static struct vport_ops ovs_netdev_vport_ops = {
235 .type = OVS_VPORT_TYPE_NETDEV, 204 .type = OVS_VPORT_TYPE_NETDEV,
236 .create = netdev_create, 205 .create = netdev_create,
237 .destroy = netdev_destroy, 206 .destroy = netdev_destroy,
238 .send = ovs_netdev_send, 207 .send = dev_queue_xmit,
239}; 208};
240 209
241int __init ovs_netdev_init(void) 210int __init ovs_netdev_init(void)
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index bf22fcedbc69..19e29c12adcc 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -27,7 +27,6 @@
27struct vport *ovs_netdev_get_vport(struct net_device *dev); 27struct vport *ovs_netdev_get_vport(struct net_device *dev);
28 28
29struct vport *ovs_netdev_link(struct vport *vport, const char *name); 29struct vport *ovs_netdev_link(struct vport *vport, const char *name);
30void ovs_netdev_send(struct vport *vport, struct sk_buff *skb);
31void ovs_netdev_detach_dev(struct vport *); 30void ovs_netdev_detach_dev(struct vport *);
32 31
33int __init ovs_netdev_init(void); 32int __init ovs_netdev_init(void);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index c11413d5075f..1605691d9414 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -146,31 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
146 return ovs_netdev_link(vport, parms->name); 146 return ovs_netdev_link(vport, parms->name);
147} 147}
148 148
149static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
150 struct dp_upcall_info *upcall)
151{
152 struct vxlan_dev *vxlan = netdev_priv(vport->dev);
153 struct net *net = ovs_dp_get_net(vport->dp);
154 __be16 dst_port = vxlan_dev_dst_port(vxlan);
155 __be16 src_port;
156 int port_min;
157 int port_max;
158
159 inet_get_local_port_range(net, &port_min, &port_max);
160 src_port = udp_flow_src_port(net, skb, 0, 0, true);
161
162 return ovs_tunnel_get_egress_info(upcall, net,
163 skb, IPPROTO_UDP,
164 src_port, dst_port);
165}
166
167static struct vport_ops ovs_vxlan_netdev_vport_ops = { 149static struct vport_ops ovs_vxlan_netdev_vport_ops = {
168 .type = OVS_VPORT_TYPE_VXLAN, 150 .type = OVS_VPORT_TYPE_VXLAN,
169 .create = vxlan_create, 151 .create = vxlan_create,
170 .destroy = ovs_netdev_tunnel_destroy, 152 .destroy = ovs_netdev_tunnel_destroy,
171 .get_options = vxlan_get_options, 153 .get_options = vxlan_get_options,
172 .send = ovs_netdev_send, 154 .send = dev_queue_xmit,
173 .get_egress_tun_info = vxlan_get_egress_tun_info,
174}; 155};
175 156
176static int __init ovs_vxlan_tnl_init(void) 157static int __init ovs_vxlan_tnl_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index dc81dc619aa2..0ac0fd004d7e 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -280,35 +280,19 @@ void ovs_vport_del(struct vport *vport)
280 */ 280 */
281void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) 281void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
282{ 282{
283 struct net_device *dev = vport->dev; 283 const struct rtnl_link_stats64 *dev_stats;
284 int i; 284 struct rtnl_link_stats64 temp;
285 285
286 memset(stats, 0, sizeof(*stats)); 286 dev_stats = dev_get_stats(vport->dev, &temp);
287 stats->rx_errors = dev->stats.rx_errors; 287 stats->rx_errors = dev_stats->rx_errors;
288 stats->tx_errors = dev->stats.tx_errors; 288 stats->tx_errors = dev_stats->tx_errors;
289 stats->tx_dropped = dev->stats.tx_dropped; 289 stats->tx_dropped = dev_stats->tx_dropped;
290 stats->rx_dropped = dev->stats.rx_dropped; 290 stats->rx_dropped = dev_stats->rx_dropped;
291 291
292 stats->rx_dropped += atomic_long_read(&dev->rx_dropped); 292 stats->rx_bytes = dev_stats->rx_bytes;
293 stats->tx_dropped += atomic_long_read(&dev->tx_dropped); 293 stats->rx_packets = dev_stats->rx_packets;
294 294 stats->tx_bytes = dev_stats->tx_bytes;
295 for_each_possible_cpu(i) { 295 stats->tx_packets = dev_stats->tx_packets;
296 const struct pcpu_sw_netstats *percpu_stats;
297 struct pcpu_sw_netstats local_stats;
298 unsigned int start;
299
300 percpu_stats = per_cpu_ptr(dev->tstats, i);
301
302 do {
303 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
304 local_stats = *percpu_stats;
305 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
306
307 stats->rx_bytes += local_stats.rx_bytes;
308 stats->rx_packets += local_stats.rx_packets;
309 stats->tx_bytes += local_stats.tx_bytes;
310 stats->tx_packets += local_stats.tx_packets;
311 }
312} 296}
313 297
314/** 298/**
@@ -460,6 +444,15 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
460 444
461 OVS_CB(skb)->input_vport = vport; 445 OVS_CB(skb)->input_vport = vport;
462 OVS_CB(skb)->mru = 0; 446 OVS_CB(skb)->mru = 0;
447 if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
448 u32 mark;
449
450 mark = skb->mark;
451 skb_scrub_packet(skb, true);
452 skb->mark = mark;
453 tun_info = NULL;
454 }
455
463 /* Extract flow from 'skb' into 'key'. */ 456 /* Extract flow from 'skb' into 'key'. */
464 error = ovs_flow_key_extract(tun_info, skb, &key); 457 error = ovs_flow_key_extract(tun_info, skb, &key);
465 if (unlikely(error)) { 458 if (unlikely(error)) {
@@ -487,60 +480,32 @@ void ovs_vport_deferred_free(struct vport *vport)
487} 480}
488EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); 481EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
489 482
490int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, 483static unsigned int packet_length(const struct sk_buff *skb)
491 struct net *net,
492 struct sk_buff *skb,
493 u8 ipproto,
494 __be16 tp_src,
495 __be16 tp_dst)
496{ 484{
497 struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; 485 unsigned int length = skb->len - ETH_HLEN;
498 const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
499 const struct ip_tunnel_key *tun_key;
500 u32 skb_mark = skb->mark;
501 struct rtable *rt;
502 struct flowi4 fl;
503
504 if (unlikely(!tun_info))
505 return -EINVAL;
506 if (ip_tunnel_info_af(tun_info) != AF_INET)
507 return -EINVAL;
508
509 tun_key = &tun_info->key;
510 486
511 /* Route lookup to get srouce IP address. 487 if (skb->protocol == htons(ETH_P_8021Q))
512 * The process may need to be changed if the corresponding process 488 length -= VLAN_HLEN;
513 * in vports ops changed.
514 */
515 rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
516 if (IS_ERR(rt))
517 return PTR_ERR(rt);
518
519 ip_rt_put(rt);
520 489
521 /* Generate egress_tun_info based on tun_info, 490 return length;
522 * saddr, tp_src and tp_dst
523 */
524 ip_tunnel_key_init(&egress_tun_info->key,
525 fl.saddr, tun_key->u.ipv4.dst,
526 tun_key->tos,
527 tun_key->ttl,
528 tp_src, tp_dst,
529 tun_key->tun_id,
530 tun_key->tun_flags);
531 egress_tun_info->options_len = tun_info->options_len;
532 egress_tun_info->mode = tun_info->mode;
533 upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
534 return 0;
535} 491}
536EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
537 492
538int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, 493void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
539 struct dp_upcall_info *upcall)
540{ 494{
541 /* get_egress_tun_info() is only implemented on tunnel ports. */ 495 int mtu = vport->dev->mtu;
542 if (unlikely(!vport->ops->get_egress_tun_info)) 496
543 return -EINVAL; 497 if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
498 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
499 vport->dev->name,
500 packet_length(skb), mtu);
501 vport->dev->stats.tx_errors++;
502 goto drop;
503 }
504
505 skb->dev = vport->dev;
506 vport->ops->send(skb);
507 return;
544 508
545 return vport->ops->get_egress_tun_info(vport, skb, upcall); 509drop:
510 kfree_skb(skb);
546} 511}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index a413f3ae6a7b..bdfd82a7c064 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -27,7 +27,6 @@
27#include <linux/skbuff.h> 27#include <linux/skbuff.h>
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <linux/u64_stats_sync.h> 29#include <linux/u64_stats_sync.h>
30#include <net/route.h>
31 30
32#include "datapath.h" 31#include "datapath.h"
33 32
@@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
53int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); 52int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
54u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); 53u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
55 54
56int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
57 struct net *net,
58 struct sk_buff *,
59 u8 ipproto,
60 __be16 tp_src,
61 __be16 tp_dst);
62
63int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
64 struct dp_upcall_info *upcall);
65
66/** 55/**
67 * struct vport_portids - array of netlink portids of a vport. 56 * struct vport_portids - array of netlink portids of a vport.
68 * must be protected by rcu. 57 * must be protected by rcu.
@@ -140,8 +129,6 @@ struct vport_parms {
140 * have any configuration. 129 * have any configuration.
141 * @send: Send a packet on the device. 130 * @send: Send a packet on the device.
142 * zero for dropped packets or negative for error. 131 * zero for dropped packets or negative for error.
143 * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
144 * a packet.
145 */ 132 */
146struct vport_ops { 133struct vport_ops {
147 enum ovs_vport_type type; 134 enum ovs_vport_type type;
@@ -153,10 +140,7 @@ struct vport_ops {
153 int (*set_options)(struct vport *, struct nlattr *); 140 int (*set_options)(struct vport *, struct nlattr *);
154 int (*get_options)(const struct vport *, struct sk_buff *); 141 int (*get_options)(const struct vport *, struct sk_buff *);
155 142
156 void (*send)(struct vport *, struct sk_buff *); 143 netdev_tx_t (*send) (struct sk_buff *skb);
157 int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
158 struct dp_upcall_info *upcall);
159
160 struct module *owner; 144 struct module *owner;
161 struct list_head list; 145 struct list_head list;
162}; 146};
@@ -234,9 +218,6 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
234 return rt; 218 return rt;
235} 219}
236 220
237static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb) 221void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
238{
239 vport->ops->send(vport, skb);
240}
241 222
242#endif /* vport.h */ 223#endif /* vport.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index aa4b15c35884..1cf928fb573e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1423,7 +1423,7 @@ static unsigned int fanout_demux_bpf(struct packet_fanout *f,
1423 rcu_read_lock(); 1423 rcu_read_lock();
1424 prog = rcu_dereference(f->bpf_prog); 1424 prog = rcu_dereference(f->bpf_prog);
1425 if (prog) 1425 if (prog)
1426 ret = BPF_PROG_RUN(prog, skb) % num; 1426 ret = bpf_prog_run_clear_cb(prog, skb) % num;
1427 rcu_read_unlock(); 1427 rcu_read_unlock();
1428 1428
1429 return ret; 1429 return ret;
@@ -1439,17 +1439,17 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1439{ 1439{
1440 struct packet_fanout *f = pt->af_packet_priv; 1440 struct packet_fanout *f = pt->af_packet_priv;
1441 unsigned int num = READ_ONCE(f->num_members); 1441 unsigned int num = READ_ONCE(f->num_members);
1442 struct net *net = read_pnet(&f->net);
1442 struct packet_sock *po; 1443 struct packet_sock *po;
1443 unsigned int idx; 1444 unsigned int idx;
1444 1445
1445 if (!net_eq(dev_net(dev), read_pnet(&f->net)) || 1446 if (!net_eq(dev_net(dev), net) || !num) {
1446 !num) {
1447 kfree_skb(skb); 1447 kfree_skb(skb);
1448 return 0; 1448 return 0;
1449 } 1449 }
1450 1450
1451 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { 1451 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1452 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); 1452 skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
1453 if (!skb) 1453 if (!skb)
1454 return 0; 1454 return 0;
1455 } 1455 }
@@ -1519,10 +1519,10 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1519 1519
1520static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) 1520static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
1521{ 1521{
1522 if (ptype->af_packet_priv == (void *)((struct packet_sock *)sk)->fanout) 1522 if (sk->sk_family != PF_PACKET)
1523 return true; 1523 return false;
1524 1524
1525 return false; 1525 return ptype->af_packet_priv == pkt_sk(sk)->fanout;
1526} 1526}
1527 1527
1528static void fanout_init_data(struct packet_fanout *f) 1528static void fanout_init_data(struct packet_fanout *f)
@@ -1567,7 +1567,7 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
1567 if (copy_from_user(&fprog, data, len)) 1567 if (copy_from_user(&fprog, data, len))
1568 return -EFAULT; 1568 return -EFAULT;
1569 1569
1570 ret = bpf_prog_create_from_user(&new, &fprog, NULL); 1570 ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
1571 if (ret) 1571 if (ret)
1572 return ret; 1572 return ret;
1573 1573
@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
1741 kfree_rcu(po->rollover, rcu); 1741 kfree_rcu(po->rollover, rcu);
1742} 1742}
1743 1743
1744static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1745 struct sk_buff *skb)
1746{
1747 /* Earlier code assumed this would be a VLAN pkt, double-check
1748 * this now that we have the actual packet in hand. We can only
1749 * do this check on Ethernet devices.
1750 */
1751 if (unlikely(dev->type != ARPHRD_ETHER))
1752 return false;
1753
1754 skb_reset_mac_header(skb);
1755 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1756}
1757
1744static const struct proto_ops packet_ops; 1758static const struct proto_ops packet_ops;
1745 1759
1746static const struct proto_ops packet_ops_spkt; 1760static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@ retry:
1902 goto retry; 1916 goto retry;
1903 } 1917 }
1904 1918
1905 if (len > (dev->mtu + dev->hard_header_len + extra_len)) { 1919 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1906 /* Earlier code assumed this would be a VLAN pkt, 1920 !packet_extra_vlan_len_allowed(dev, skb)) {
1907 * double-check this now that we have the actual 1921 err = -EMSGSIZE;
1908 * packet in hand. 1922 goto out_unlock;
1909 */
1910 struct ethhdr *ehdr;
1911 skb_reset_mac_header(skb);
1912 ehdr = eth_hdr(skb);
1913 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1914 err = -EMSGSIZE;
1915 goto out_unlock;
1916 }
1917 } 1923 }
1918 1924
1919 skb->protocol = proto; 1925 skb->protocol = proto;
@@ -1939,16 +1945,16 @@ out_free:
1939 return err; 1945 return err;
1940} 1946}
1941 1947
1942static unsigned int run_filter(const struct sk_buff *skb, 1948static unsigned int run_filter(struct sk_buff *skb,
1943 const struct sock *sk, 1949 const struct sock *sk,
1944 unsigned int res) 1950 unsigned int res)
1945{ 1951{
1946 struct sk_filter *filter; 1952 struct sk_filter *filter;
1947 1953
1948 rcu_read_lock(); 1954 rcu_read_lock();
1949 filter = rcu_dereference(sk->sk_filter); 1955 filter = rcu_dereference(sk->sk_filter);
1950 if (filter != NULL) 1956 if (filter != NULL)
1951 res = SK_RUN_FILTER(filter, skb); 1957 res = bpf_prog_run_clear_cb(filter->prog, skb);
1952 rcu_read_unlock(); 1958 rcu_read_unlock();
1953 1959
1954 return res; 1960 return res;
@@ -2332,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
2332 return false; 2338 return false;
2333} 2339}
2334 2340
2341static void tpacket_set_protocol(const struct net_device *dev,
2342 struct sk_buff *skb)
2343{
2344 if (dev->type == ARPHRD_ETHER) {
2345 skb_reset_mac_header(skb);
2346 skb->protocol = eth_hdr(skb)->h_proto;
2347 }
2348}
2349
2335static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 2350static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2336 void *frame, struct net_device *dev, int size_max, 2351 void *frame, struct net_device *dev, int size_max,
2337 __be16 proto, unsigned char *addr, int hlen) 2352 __be16 proto, unsigned char *addr, int hlen)
@@ -2368,8 +2383,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2368 skb_reserve(skb, hlen); 2383 skb_reserve(skb, hlen);
2369 skb_reset_network_header(skb); 2384 skb_reset_network_header(skb);
2370 2385
2371 if (!packet_use_direct_xmit(po))
2372 skb_probe_transport_header(skb, 0);
2373 if (unlikely(po->tp_tx_has_off)) { 2386 if (unlikely(po->tp_tx_has_off)) {
2374 int off_min, off_max, off; 2387 int off_min, off_max, off;
2375 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); 2388 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2415,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2415 dev->hard_header_len); 2428 dev->hard_header_len);
2416 if (unlikely(err)) 2429 if (unlikely(err))
2417 return err; 2430 return err;
2431 if (!skb->protocol)
2432 tpacket_set_protocol(dev, skb);
2418 2433
2419 data += dev->hard_header_len; 2434 data += dev->hard_header_len;
2420 to_write -= dev->hard_header_len; 2435 to_write -= dev->hard_header_len;
@@ -2449,6 +2464,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2449 len = ((to_write > len_max) ? len_max : to_write); 2464 len = ((to_write > len_max) ? len_max : to_write);
2450 } 2465 }
2451 2466
2467 skb_probe_transport_header(skb, 0);
2468
2452 return tp_len; 2469 return tp_len;
2453} 2470}
2454 2471
@@ -2493,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2493 if (unlikely(!(dev->flags & IFF_UP))) 2510 if (unlikely(!(dev->flags & IFF_UP)))
2494 goto out_put; 2511 goto out_put;
2495 2512
2496 reserve = dev->hard_header_len + VLAN_HLEN; 2513 if (po->sk.sk_socket->type == SOCK_RAW)
2514 reserve = dev->hard_header_len;
2497 size_max = po->tx_ring.frame_size 2515 size_max = po->tx_ring.frame_size
2498 - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); 2516 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2499 2517
2500 if (size_max > dev->mtu + reserve) 2518 if (size_max > dev->mtu + reserve + VLAN_HLEN)
2501 size_max = dev->mtu + reserve; 2519 size_max = dev->mtu + reserve + VLAN_HLEN;
2502 2520
2503 do { 2521 do {
2504 ph = packet_current_frame(po, &po->tx_ring, 2522 ph = packet_current_frame(po, &po->tx_ring,
@@ -2525,18 +2543,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2525 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 2543 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2526 addr, hlen); 2544 addr, hlen);
2527 if (likely(tp_len >= 0) && 2545 if (likely(tp_len >= 0) &&
2528 tp_len > dev->mtu + dev->hard_header_len) { 2546 tp_len > dev->mtu + reserve &&
2529 struct ethhdr *ehdr; 2547 !packet_extra_vlan_len_allowed(dev, skb))
2530 /* Earlier code assumed this would be a VLAN pkt, 2548 tp_len = -EMSGSIZE;
2531 * double-check this now that we have the actual
2532 * packet in hand.
2533 */
2534 2549
2535 skb_reset_mac_header(skb);
2536 ehdr = eth_hdr(skb);
2537 if (ehdr->h_proto != htons(ETH_P_8021Q))
2538 tp_len = -EMSGSIZE;
2539 }
2540 if (unlikely(tp_len < 0)) { 2550 if (unlikely(tp_len < 0)) {
2541 if (po->tp_loss) { 2551 if (po->tp_loss) {
2542 __packet_set_status(po, ph, 2552 __packet_set_status(po, ph,
@@ -2630,6 +2640,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2630 __be16 proto; 2640 __be16 proto;
2631 unsigned char *addr; 2641 unsigned char *addr;
2632 int err, reserve = 0; 2642 int err, reserve = 0;
2643 struct sockcm_cookie sockc;
2633 struct virtio_net_hdr vnet_hdr = { 0 }; 2644 struct virtio_net_hdr vnet_hdr = { 0 };
2634 int offset = 0; 2645 int offset = 0;
2635 int vnet_hdr_len; 2646 int vnet_hdr_len;
@@ -2665,6 +2676,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2665 if (unlikely(!(dev->flags & IFF_UP))) 2676 if (unlikely(!(dev->flags & IFF_UP)))
2666 goto out_unlock; 2677 goto out_unlock;
2667 2678
2679 sockc.mark = sk->sk_mark;
2680 if (msg->msg_controllen) {
2681 err = sock_cmsg_send(sk, msg, &sockc);
2682 if (unlikely(err))
2683 goto out_unlock;
2684 }
2685
2668 if (sock->type == SOCK_RAW) 2686 if (sock->type == SOCK_RAW)
2669 reserve = dev->hard_header_len; 2687 reserve = dev->hard_header_len;
2670 if (po->has_vnet_hdr) { 2688 if (po->has_vnet_hdr) {
@@ -2757,24 +2775,16 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2757 2775
2758 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2776 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2759 2777
2760 if (!gso_type && (len > dev->mtu + reserve + extra_len)) { 2778 if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
2761 /* Earlier code assumed this would be a VLAN pkt, 2779 !packet_extra_vlan_len_allowed(dev, skb)) {
2762 * double-check this now that we have the actual 2780 err = -EMSGSIZE;
2763 * packet in hand. 2781 goto out_free;
2764 */
2765 struct ethhdr *ehdr;
2766 skb_reset_mac_header(skb);
2767 ehdr = eth_hdr(skb);
2768 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
2769 err = -EMSGSIZE;
2770 goto out_free;
2771 }
2772 } 2782 }
2773 2783
2774 skb->protocol = proto; 2784 skb->protocol = proto;
2775 skb->dev = dev; 2785 skb->dev = dev;
2776 skb->priority = sk->sk_priority; 2786 skb->priority = sk->sk_priority;
2777 skb->mark = sk->sk_mark; 2787 skb->mark = sockc.mark;
2778 2788
2779 packet_pick_tx_queue(dev, skb); 2789 packet_pick_tx_queue(dev, skb);
2780 2790
@@ -2799,8 +2809,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2799 len += vnet_hdr_len; 2809 len += vnet_hdr_len;
2800 } 2810 }
2801 2811
2802 if (!packet_use_direct_xmit(po)) 2812 skb_probe_transport_header(skb, reserve);
2803 skb_probe_transport_header(skb, reserve); 2813
2804 if (unlikely(extra_len == 4)) 2814 if (unlikely(extra_len == 4))
2805 skb->no_fcs = 1; 2815 skb->no_fcs = 1;
2806 2816
@@ -2903,22 +2913,40 @@ static int packet_release(struct socket *sock)
2903 * Attach a packet hook. 2913 * Attach a packet hook.
2904 */ 2914 */
2905 2915
2906static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) 2916static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
2917 __be16 proto)
2907{ 2918{
2908 struct packet_sock *po = pkt_sk(sk); 2919 struct packet_sock *po = pkt_sk(sk);
2909 struct net_device *dev_curr; 2920 struct net_device *dev_curr;
2910 __be16 proto_curr; 2921 __be16 proto_curr;
2911 bool need_rehook; 2922 bool need_rehook;
2923 struct net_device *dev = NULL;
2924 int ret = 0;
2925 bool unlisted = false;
2912 2926
2913 if (po->fanout) { 2927 if (po->fanout)
2914 if (dev)
2915 dev_put(dev);
2916
2917 return -EINVAL; 2928 return -EINVAL;
2918 }
2919 2929
2920 lock_sock(sk); 2930 lock_sock(sk);
2921 spin_lock(&po->bind_lock); 2931 spin_lock(&po->bind_lock);
2932 rcu_read_lock();
2933
2934 if (name) {
2935 dev = dev_get_by_name_rcu(sock_net(sk), name);
2936 if (!dev) {
2937 ret = -ENODEV;
2938 goto out_unlock;
2939 }
2940 } else if (ifindex) {
2941 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
2942 if (!dev) {
2943 ret = -ENODEV;
2944 goto out_unlock;
2945 }
2946 }
2947
2948 if (dev)
2949 dev_hold(dev);
2922 2950
2923 proto_curr = po->prot_hook.type; 2951 proto_curr = po->prot_hook.type;
2924 dev_curr = po->prot_hook.dev; 2952 dev_curr = po->prot_hook.dev;
@@ -2926,14 +2954,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2926 need_rehook = proto_curr != proto || dev_curr != dev; 2954 need_rehook = proto_curr != proto || dev_curr != dev;
2927 2955
2928 if (need_rehook) { 2956 if (need_rehook) {
2929 unregister_prot_hook(sk, true); 2957 if (po->running) {
2958 rcu_read_unlock();
2959 __unregister_prot_hook(sk, true);
2960 rcu_read_lock();
2961 dev_curr = po->prot_hook.dev;
2962 if (dev)
2963 unlisted = !dev_get_by_index_rcu(sock_net(sk),
2964 dev->ifindex);
2965 }
2930 2966
2931 po->num = proto; 2967 po->num = proto;
2932 po->prot_hook.type = proto; 2968 po->prot_hook.type = proto;
2933 po->prot_hook.dev = dev;
2934 2969
2935 po->ifindex = dev ? dev->ifindex : 0; 2970 if (unlikely(unlisted)) {
2936 packet_cached_dev_assign(po, dev); 2971 dev_put(dev);
2972 po->prot_hook.dev = NULL;
2973 po->ifindex = -1;
2974 packet_cached_dev_reset(po);
2975 } else {
2976 po->prot_hook.dev = dev;
2977 po->ifindex = dev ? dev->ifindex : 0;
2978 packet_cached_dev_assign(po, dev);
2979 }
2937 } 2980 }
2938 if (dev_curr) 2981 if (dev_curr)
2939 dev_put(dev_curr); 2982 dev_put(dev_curr);
@@ -2941,7 +2984,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2941 if (proto == 0 || !need_rehook) 2984 if (proto == 0 || !need_rehook)
2942 goto out_unlock; 2985 goto out_unlock;
2943 2986
2944 if (!dev || (dev->flags & IFF_UP)) { 2987 if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
2945 register_prot_hook(sk); 2988 register_prot_hook(sk);
2946 } else { 2989 } else {
2947 sk->sk_err = ENETDOWN; 2990 sk->sk_err = ENETDOWN;
@@ -2950,9 +2993,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
2950 } 2993 }
2951 2994
2952out_unlock: 2995out_unlock:
2996 rcu_read_unlock();
2953 spin_unlock(&po->bind_lock); 2997 spin_unlock(&po->bind_lock);
2954 release_sock(sk); 2998 release_sock(sk);
2955 return 0; 2999 return ret;
2956} 3000}
2957 3001
2958/* 3002/*
@@ -2964,8 +3008,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
2964{ 3008{
2965 struct sock *sk = sock->sk; 3009 struct sock *sk = sock->sk;
2966 char name[15]; 3010 char name[15];
2967 struct net_device *dev;
2968 int err = -ENODEV;
2969 3011
2970 /* 3012 /*
2971 * Check legality 3013 * Check legality
@@ -2975,19 +3017,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
2975 return -EINVAL; 3017 return -EINVAL;
2976 strlcpy(name, uaddr->sa_data, sizeof(name)); 3018 strlcpy(name, uaddr->sa_data, sizeof(name));
2977 3019
2978 dev = dev_get_by_name(sock_net(sk), name); 3020 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
2979 if (dev)
2980 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
2981 return err;
2982} 3021}
2983 3022
2984static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 3023static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
2985{ 3024{
2986 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; 3025 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
2987 struct sock *sk = sock->sk; 3026 struct sock *sk = sock->sk;
2988 struct net_device *dev = NULL;
2989 int err;
2990
2991 3027
2992 /* 3028 /*
2993 * Check legality 3029 * Check legality
@@ -2998,16 +3034,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
2998 if (sll->sll_family != AF_PACKET) 3034 if (sll->sll_family != AF_PACKET)
2999 return -EINVAL; 3035 return -EINVAL;
3000 3036
3001 if (sll->sll_ifindex) { 3037 return packet_do_bind(sk, NULL, sll->sll_ifindex,
3002 err = -ENODEV; 3038 sll->sll_protocol ? : pkt_sk(sk)->num);
3003 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
3004 if (dev == NULL)
3005 goto out;
3006 }
3007 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
3008
3009out:
3010 return err;
3011} 3039}
3012 3040
3013static struct proto packet_proto = { 3041static struct proto packet_proto = {
@@ -4081,7 +4109,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4081 err = -EINVAL; 4109 err = -EINVAL;
4082 if (unlikely((int)req->tp_block_size <= 0)) 4110 if (unlikely((int)req->tp_block_size <= 0))
4083 goto out; 4111 goto out;
4084 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) 4112 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4085 goto out; 4113 goto out;
4086 if (po->tp_version >= TPACKET_V3 && 4114 if (po->tp_version >= TPACKET_V3 &&
4087 (int)(req->tp_block_size - 4115 (int)(req->tp_block_size -
@@ -4093,8 +4121,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4093 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 4121 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4094 goto out; 4122 goto out;
4095 4123
4096 rb->frames_per_block = req->tp_block_size/req->tp_frame_size; 4124 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4097 if (unlikely(rb->frames_per_block <= 0)) 4125 if (unlikely(rb->frames_per_block == 0))
4098 goto out; 4126 goto out;
4099 if (unlikely((rb->frames_per_block * req->tp_block_nr) != 4127 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4100 req->tp_frame_nr)) 4128 req->tp_frame_nr))
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index a2f28a6d4dc5..b5476aebd68d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -72,13 +72,7 @@ static int rds_release(struct socket *sock)
72 rds_clear_recv_queue(rs); 72 rds_clear_recv_queue(rs);
73 rds_cong_remove_socket(rs); 73 rds_cong_remove_socket(rs);
74 74
75 /*
76 * the binding lookup hash uses rcu, we need to
77 * make sure we synchronize_rcu before we free our
78 * entry
79 */
80 rds_remove_bound(rs); 75 rds_remove_bound(rs);
81 synchronize_rcu();
82 76
83 rds_send_drop_to(rs, NULL); 77 rds_send_drop_to(rs, NULL);
84 rds_rdma_drop_keys(rs); 78 rds_rdma_drop_keys(rs);
@@ -579,6 +573,7 @@ static void rds_exit(void)
579 rds_threads_exit(); 573 rds_threads_exit();
580 rds_stats_exit(); 574 rds_stats_exit();
581 rds_page_exit(); 575 rds_page_exit();
576 rds_bind_lock_destroy();
582 rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); 577 rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
583 rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); 578 rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
584} 579}
@@ -588,9 +583,14 @@ static int rds_init(void)
588{ 583{
589 int ret; 584 int ret;
590 585
591 ret = rds_conn_init(); 586 ret = rds_bind_lock_init();
592 if (ret) 587 if (ret)
593 goto out; 588 goto out;
589
590 ret = rds_conn_init();
591 if (ret)
592 goto out_bind;
593
594 ret = rds_threads_init(); 594 ret = rds_threads_init();
595 if (ret) 595 if (ret)
596 goto out_conn; 596 goto out_conn;
@@ -624,6 +624,8 @@ out_conn:
624 rds_conn_exit(); 624 rds_conn_exit();
625 rds_cong_exit(); 625 rds_cong_exit();
626 rds_page_exit(); 626 rds_page_exit();
627out_bind:
628 rds_bind_lock_destroy();
627out: 629out:
628 return ret; 630 return ret;
629} 631}
diff --git a/net/rds/bind.c b/net/rds/bind.c
index dd666fb9b4e1..b22ea956522b 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -38,51 +38,16 @@
38#include <linux/ratelimit.h> 38#include <linux/ratelimit.h>
39#include "rds.h" 39#include "rds.h"
40 40
41#define BIND_HASH_SIZE 1024 41static struct rhashtable bind_hash_table;
42static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
43static DEFINE_SPINLOCK(rds_bind_lock);
44 42
45static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port) 43static struct rhashtable_params ht_parms = {
46{ 44 .nelem_hint = 768,
47 return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) & 45 .key_len = sizeof(u64),
48 (BIND_HASH_SIZE - 1)); 46 .key_offset = offsetof(struct rds_sock, rs_bound_key),
49} 47 .head_offset = offsetof(struct rds_sock, rs_bound_node),
50 48 .max_size = 16384,
51static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port, 49 .min_size = 1024,
52 struct rds_sock *insert) 50};
53{
54 struct rds_sock *rs;
55 struct hlist_head *head = hash_to_bucket(addr, port);
56 u64 cmp;
57 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
58
59 rcu_read_lock();
60 hlist_for_each_entry_rcu(rs, head, rs_bound_node) {
61 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
62 be16_to_cpu(rs->rs_bound_port);
63
64 if (cmp == needle) {
65 rcu_read_unlock();
66 return rs;
67 }
68 }
69 rcu_read_unlock();
70
71 if (insert) {
72 /*
73 * make sure our addr and port are set before
74 * we are added to the list, other people
75 * in rcu will find us as soon as the
76 * hlist_add_head_rcu is done
77 */
78 insert->rs_bound_addr = addr;
79 insert->rs_bound_port = port;
80 rds_sock_addref(insert);
81
82 hlist_add_head_rcu(&insert->rs_bound_node, head);
83 }
84 return NULL;
85}
86 51
87/* 52/*
88 * Return the rds_sock bound at the given local address. 53 * Return the rds_sock bound at the given local address.
@@ -92,10 +57,10 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
92 */ 57 */
93struct rds_sock *rds_find_bound(__be32 addr, __be16 port) 58struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
94{ 59{
60 u64 key = ((u64)addr << 32) | port;
95 struct rds_sock *rs; 61 struct rds_sock *rs;
96 62
97 rs = rds_bind_lookup(addr, port, NULL); 63 rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms);
98
99 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) 64 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
100 rds_sock_addref(rs); 65 rds_sock_addref(rs);
101 else 66 else
@@ -103,15 +68,16 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
103 68
104 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, 69 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
105 ntohs(port)); 70 ntohs(port));
71
106 return rs; 72 return rs;
107} 73}
108 74
109/* returns -ve errno or +ve port */ 75/* returns -ve errno or +ve port */
110static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) 76static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
111{ 77{
112 unsigned long flags;
113 int ret = -EADDRINUSE; 78 int ret = -EADDRINUSE;
114 u16 rover, last; 79 u16 rover, last;
80 u64 key;
115 81
116 if (*port != 0) { 82 if (*port != 0) {
117 rover = be16_to_cpu(*port); 83 rover = be16_to_cpu(*port);
@@ -121,42 +87,49 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
121 last = rover - 1; 87 last = rover - 1;
122 } 88 }
123 89
124 spin_lock_irqsave(&rds_bind_lock, flags);
125
126 do { 90 do {
127 if (rover == 0) 91 if (rover == 0)
128 rover++; 92 rover++;
129 if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) { 93
94 key = ((u64)addr << 32) | cpu_to_be16(rover);
95 if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
96 continue;
97
98 rs->rs_bound_key = key;
99 rs->rs_bound_addr = addr;
100 rs->rs_bound_port = cpu_to_be16(rover);
101 rs->rs_bound_node.next = NULL;
102 rds_sock_addref(rs);
103 if (!rhashtable_insert_fast(&bind_hash_table,
104 &rs->rs_bound_node, ht_parms)) {
130 *port = rs->rs_bound_port; 105 *port = rs->rs_bound_port;
131 ret = 0; 106 ret = 0;
132 rdsdebug("rs %p binding to %pI4:%d\n", 107 rdsdebug("rs %p binding to %pI4:%d\n",
133 rs, &addr, (int)ntohs(*port)); 108 rs, &addr, (int)ntohs(*port));
134 break; 109 break;
110 } else {
111 rds_sock_put(rs);
112 ret = -ENOMEM;
113 break;
135 } 114 }
136 } while (rover++ != last); 115 } while (rover++ != last);
137 116
138 spin_unlock_irqrestore(&rds_bind_lock, flags);
139
140 return ret; 117 return ret;
141} 118}
142 119
143void rds_remove_bound(struct rds_sock *rs) 120void rds_remove_bound(struct rds_sock *rs)
144{ 121{
145 unsigned long flags;
146 122
147 spin_lock_irqsave(&rds_bind_lock, flags); 123 if (!rs->rs_bound_addr)
124 return;
148 125
149 if (rs->rs_bound_addr) { 126 rdsdebug("rs %p unbinding from %pI4:%d\n",
150 rdsdebug("rs %p unbinding from %pI4:%d\n", 127 rs, &rs->rs_bound_addr,
151 rs, &rs->rs_bound_addr, 128 ntohs(rs->rs_bound_port));
152 ntohs(rs->rs_bound_port));
153
154 hlist_del_init_rcu(&rs->rs_bound_node);
155 rds_sock_put(rs);
156 rs->rs_bound_addr = 0;
157 }
158 129
159 spin_unlock_irqrestore(&rds_bind_lock, flags); 130 rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
131 rds_sock_put(rs);
132 rs->rs_bound_addr = 0;
160} 133}
161 134
162int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 135int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -182,7 +155,14 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
182 goto out; 155 goto out;
183 156
184 if (rs->rs_transport) { /* previously bound */ 157 if (rs->rs_transport) { /* previously bound */
185 ret = 0; 158 trans = rs->rs_transport;
159 if (trans->laddr_check(sock_net(sock->sk),
160 sin->sin_addr.s_addr) != 0) {
161 ret = -ENOPROTOOPT;
162 rds_remove_bound(rs);
163 } else {
164 ret = 0;
165 }
186 goto out; 166 goto out;
187 } 167 }
188 trans = rds_trans_get_preferred(sock_net(sock->sk), 168 trans = rds_trans_get_preferred(sock_net(sock->sk),
@@ -200,9 +180,15 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
200 180
201out: 181out:
202 release_sock(sk); 182 release_sock(sk);
203
204 /* we might have called rds_remove_bound on error */
205 if (ret)
206 synchronize_rcu();
207 return ret; 183 return ret;
208} 184}
185
186void rds_bind_lock_destroy(void)
187{
188 rhashtable_destroy(&bind_hash_table);
189}
190
191int rds_bind_lock_init(void)
192{
193 return rhashtable_init(&bind_hash_table, &ht_parms);
194}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 49adeef8090c..d4564036a339 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -128,10 +128,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
128 struct rds_transport *loop_trans; 128 struct rds_transport *loop_trans;
129 unsigned long flags; 129 unsigned long flags;
130 int ret; 130 int ret;
131 struct rds_transport *otrans = trans;
132 131
133 if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
134 goto new_conn;
135 rcu_read_lock(); 132 rcu_read_lock();
136 conn = rds_conn_lookup(net, head, laddr, faddr, trans); 133 conn = rds_conn_lookup(net, head, laddr, faddr, trans);
137 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && 134 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
@@ -147,7 +144,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
147 if (conn) 144 if (conn)
148 goto out; 145 goto out;
149 146
150new_conn:
151 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 147 conn = kmem_cache_zalloc(rds_conn_slab, gfp);
152 if (!conn) { 148 if (!conn) {
153 conn = ERR_PTR(-ENOMEM); 149 conn = ERR_PTR(-ENOMEM);
@@ -207,6 +203,7 @@ new_conn:
207 203
208 atomic_set(&conn->c_state, RDS_CONN_DOWN); 204 atomic_set(&conn->c_state, RDS_CONN_DOWN);
209 conn->c_send_gen = 0; 205 conn->c_send_gen = 0;
206 conn->c_outgoing = (is_outgoing ? 1 : 0);
210 conn->c_reconnect_jiffies = 0; 207 conn->c_reconnect_jiffies = 0;
211 INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); 208 INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
212 INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); 209 INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
@@ -243,22 +240,13 @@ new_conn:
243 /* Creating normal conn */ 240 /* Creating normal conn */
244 struct rds_connection *found; 241 struct rds_connection *found;
245 242
246 if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP) 243 found = rds_conn_lookup(net, head, laddr, faddr, trans);
247 found = NULL;
248 else
249 found = rds_conn_lookup(net, head, laddr, faddr, trans);
250 if (found) { 244 if (found) {
251 trans->conn_free(conn->c_transport_data); 245 trans->conn_free(conn->c_transport_data);
252 kmem_cache_free(rds_conn_slab, conn); 246 kmem_cache_free(rds_conn_slab, conn);
253 conn = found; 247 conn = found;
254 } else { 248 } else {
255 if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) || 249 hlist_add_head_rcu(&conn->c_hash_node, head);
256 (otrans->t_type != RDS_TRANS_TCP)) {
257 /* Only the active side should be added to
258 * reconnect list for TCP.
259 */
260 hlist_add_head_rcu(&conn->c_hash_node, head);
261 }
262 rds_cong_add_conn(conn); 250 rds_cong_add_conn(conn);
263 rds_conn_count++; 251 rds_conn_count++;
264 } 252 }
@@ -337,7 +325,9 @@ void rds_conn_shutdown(struct rds_connection *conn)
337 rcu_read_lock(); 325 rcu_read_lock();
338 if (!hlist_unhashed(&conn->c_hash_node)) { 326 if (!hlist_unhashed(&conn->c_hash_node)) {
339 rcu_read_unlock(); 327 rcu_read_unlock();
340 rds_queue_reconnect(conn); 328 if (conn->c_trans->t_type != RDS_TRANS_TCP ||
329 conn->c_outgoing == 1)
330 rds_queue_reconnect(conn);
341 } else { 331 } else {
342 rcu_read_unlock(); 332 rcu_read_unlock();
343 } 333 }
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 2d3f2ab475df..f222885ac0c7 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -43,14 +43,14 @@
43#include "rds.h" 43#include "rds.h"
44#include "ib.h" 44#include "ib.h"
45 45
46static unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE; 46unsigned int rds_ib_fmr_1m_pool_size = RDS_FMR_1M_POOL_SIZE;
47unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */ 47unsigned int rds_ib_fmr_8k_pool_size = RDS_FMR_8K_POOL_SIZE;
48unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 48unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
49 49
50module_param(fmr_pool_size, int, 0444); 50module_param(rds_ib_fmr_1m_pool_size, int, 0444);
51MODULE_PARM_DESC(fmr_pool_size, " Max number of fmr per HCA"); 51MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1M fmr per HCA");
52module_param(fmr_message_size, int, 0444); 52module_param(rds_ib_fmr_8k_pool_size, int, 0444);
53MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); 53MODULE_PARM_DESC(rds_ib_fmr_8k_pool_size, " Max number of 8K fmr per HCA");
54module_param(rds_ib_retry_count, int, 0444); 54module_param(rds_ib_retry_count, int, 0444);
55MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 55MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
56 56
@@ -97,8 +97,10 @@ static void rds_ib_dev_free(struct work_struct *work)
97 struct rds_ib_device *rds_ibdev = container_of(work, 97 struct rds_ib_device *rds_ibdev = container_of(work,
98 struct rds_ib_device, free_work); 98 struct rds_ib_device, free_work);
99 99
100 if (rds_ibdev->mr_pool) 100 if (rds_ibdev->mr_8k_pool)
101 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); 101 rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool);
102 if (rds_ibdev->mr_1m_pool)
103 rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
102 if (rds_ibdev->pd) 104 if (rds_ibdev->pd)
103 ib_dealloc_pd(rds_ibdev->pd); 105 ib_dealloc_pd(rds_ibdev->pd);
104 106
@@ -148,9 +150,13 @@ static void rds_ib_add_one(struct ib_device *device)
148 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); 150 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
149 151
150 rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32; 152 rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
151 rds_ibdev->max_fmrs = dev_attr->max_fmr ? 153 rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
152 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : 154 min_t(unsigned int, (dev_attr->max_mr / 2),
153 fmr_pool_size; 155 rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
156
157 rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
158 min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
159 rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
154 160
155 rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom; 161 rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
156 rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom; 162 rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
@@ -162,12 +168,25 @@ static void rds_ib_add_one(struct ib_device *device)
162 goto put_dev; 168 goto put_dev;
163 } 169 }
164 170
165 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); 171 rds_ibdev->mr_1m_pool =
166 if (IS_ERR(rds_ibdev->mr_pool)) { 172 rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
167 rds_ibdev->mr_pool = NULL; 173 if (IS_ERR(rds_ibdev->mr_1m_pool)) {
174 rds_ibdev->mr_1m_pool = NULL;
168 goto put_dev; 175 goto put_dev;
169 } 176 }
170 177
178 rds_ibdev->mr_8k_pool =
179 rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL);
180 if (IS_ERR(rds_ibdev->mr_8k_pool)) {
181 rds_ibdev->mr_8k_pool = NULL;
182 goto put_dev;
183 }
184
185 rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
186 dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
187 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
188 rds_ibdev->max_8k_fmrs);
189
171 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 190 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
172 INIT_LIST_HEAD(&rds_ibdev->conn_list); 191 INIT_LIST_HEAD(&rds_ibdev->conn_list);
173 192
@@ -317,7 +336,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
317 /* Create a CMA ID and try to bind it. This catches both 336 /* Create a CMA ID and try to bind it. This catches both
318 * IB and iWARP capable NICs. 337 * IB and iWARP capable NICs.
319 */ 338 */
320 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 339 cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
321 if (IS_ERR(cm_id)) 340 if (IS_ERR(cm_id))
322 return PTR_ERR(cm_id); 341 return PTR_ERR(cm_id);
323 342
diff --git a/net/rds/ib.h b/net/rds/ib.h
index aae60fda77f6..b3fdebb57460 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -9,8 +9,11 @@
9#include "rds.h" 9#include "rds.h"
10#include "rdma_transport.h" 10#include "rdma_transport.h"
11 11
12#define RDS_FMR_SIZE 256 12#define RDS_FMR_1M_POOL_SIZE (8192 / 2)
13#define RDS_FMR_POOL_SIZE 8192 13#define RDS_FMR_1M_MSG_SIZE 256
14#define RDS_FMR_8K_MSG_SIZE 2
15#define RDS_MR_8K_SCALE (256 / (RDS_FMR_8K_MSG_SIZE + 1))
16#define RDS_FMR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2))
14 17
15#define RDS_IB_MAX_SGE 8 18#define RDS_IB_MAX_SGE 8
16#define RDS_IB_RECV_SGE 2 19#define RDS_IB_RECV_SGE 2
@@ -24,6 +27,9 @@
24 27
25#define RDS_IB_RECYCLE_BATCH_COUNT 32 28#define RDS_IB_RECYCLE_BATCH_COUNT 32
26 29
30#define RDS_IB_WC_MAX 32
31#define RDS_IB_SEND_OP BIT_ULL(63)
32
27extern struct rw_semaphore rds_ib_devices_lock; 33extern struct rw_semaphore rds_ib_devices_lock;
28extern struct list_head rds_ib_devices; 34extern struct list_head rds_ib_devices;
29 35
@@ -69,7 +75,11 @@ struct rds_ib_connect_private {
69 75
70struct rds_ib_send_work { 76struct rds_ib_send_work {
71 void *s_op; 77 void *s_op;
72 struct ib_send_wr s_wr; 78 union {
79 struct ib_send_wr s_wr;
80 struct ib_rdma_wr s_rdma_wr;
81 struct ib_atomic_wr s_atomic_wr;
82 };
73 struct ib_sge s_sge[RDS_IB_MAX_SGE]; 83 struct ib_sge s_sge[RDS_IB_MAX_SGE];
74 unsigned long s_queued; 84 unsigned long s_queued;
75}; 85};
@@ -89,6 +99,20 @@ struct rds_ib_work_ring {
89 atomic_t w_free_ctr; 99 atomic_t w_free_ctr;
90}; 100};
91 101
102/* Rings are posted with all the allocations they'll need to queue the
103 * incoming message to the receiving socket so this can't fail.
104 * All fragments start with a header, so we can make sure we're not receiving
105 * garbage, and we can tell a small 8 byte fragment from an ACK frame.
106 */
107struct rds_ib_ack_state {
108 u64 ack_next;
109 u64 ack_recv;
110 unsigned int ack_required:1;
111 unsigned int ack_next_valid:1;
112 unsigned int ack_recv_valid:1;
113};
114
115
92struct rds_ib_device; 116struct rds_ib_device;
93 117
94struct rds_ib_connection { 118struct rds_ib_connection {
@@ -102,6 +126,12 @@ struct rds_ib_connection {
102 struct ib_pd *i_pd; 126 struct ib_pd *i_pd;
103 struct ib_cq *i_send_cq; 127 struct ib_cq *i_send_cq;
104 struct ib_cq *i_recv_cq; 128 struct ib_cq *i_recv_cq;
129 struct ib_wc i_send_wc[RDS_IB_WC_MAX];
130 struct ib_wc i_recv_wc[RDS_IB_WC_MAX];
131
132 /* interrupt handling */
133 struct tasklet_struct i_send_tasklet;
134 struct tasklet_struct i_recv_tasklet;
105 135
106 /* tx */ 136 /* tx */
107 struct rds_ib_work_ring i_send_ring; 137 struct rds_ib_work_ring i_send_ring;
@@ -112,7 +142,6 @@ struct rds_ib_connection {
112 atomic_t i_signaled_sends; 142 atomic_t i_signaled_sends;
113 143
114 /* rx */ 144 /* rx */
115 struct tasklet_struct i_recv_tasklet;
116 struct mutex i_recv_mutex; 145 struct mutex i_recv_mutex;
117 struct rds_ib_work_ring i_recv_ring; 146 struct rds_ib_work_ring i_recv_ring;
118 struct rds_ib_incoming *i_ibinc; 147 struct rds_ib_incoming *i_ibinc;
@@ -164,6 +193,12 @@ struct rds_ib_connection {
164struct rds_ib_ipaddr { 193struct rds_ib_ipaddr {
165 struct list_head list; 194 struct list_head list;
166 __be32 ipaddr; 195 __be32 ipaddr;
196 struct rcu_head rcu;
197};
198
199enum {
200 RDS_IB_MR_8K_POOL,
201 RDS_IB_MR_1M_POOL,
167}; 202};
168 203
169struct rds_ib_device { 204struct rds_ib_device {
@@ -172,9 +207,12 @@ struct rds_ib_device {
172 struct list_head conn_list; 207 struct list_head conn_list;
173 struct ib_device *dev; 208 struct ib_device *dev;
174 struct ib_pd *pd; 209 struct ib_pd *pd;
175 struct rds_ib_mr_pool *mr_pool;
176 unsigned int fmr_max_remaps;
177 unsigned int max_fmrs; 210 unsigned int max_fmrs;
211 struct rds_ib_mr_pool *mr_1m_pool;
212 struct rds_ib_mr_pool *mr_8k_pool;
213 unsigned int fmr_max_remaps;
214 unsigned int max_8k_fmrs;
215 unsigned int max_1m_fmrs;
178 int max_sge; 216 int max_sge;
179 unsigned int max_wrs; 217 unsigned int max_wrs;
180 unsigned int max_initiator_depth; 218 unsigned int max_initiator_depth;
@@ -197,14 +235,14 @@ struct rds_ib_device {
197struct rds_ib_statistics { 235struct rds_ib_statistics {
198 uint64_t s_ib_connect_raced; 236 uint64_t s_ib_connect_raced;
199 uint64_t s_ib_listen_closed_stale; 237 uint64_t s_ib_listen_closed_stale;
200 uint64_t s_ib_tx_cq_call; 238 uint64_t s_ib_evt_handler_call;
239 uint64_t s_ib_tasklet_call;
201 uint64_t s_ib_tx_cq_event; 240 uint64_t s_ib_tx_cq_event;
202 uint64_t s_ib_tx_ring_full; 241 uint64_t s_ib_tx_ring_full;
203 uint64_t s_ib_tx_throttle; 242 uint64_t s_ib_tx_throttle;
204 uint64_t s_ib_tx_sg_mapping_failure; 243 uint64_t s_ib_tx_sg_mapping_failure;
205 uint64_t s_ib_tx_stalled; 244 uint64_t s_ib_tx_stalled;
206 uint64_t s_ib_tx_credit_updates; 245 uint64_t s_ib_tx_credit_updates;
207 uint64_t s_ib_rx_cq_call;
208 uint64_t s_ib_rx_cq_event; 246 uint64_t s_ib_rx_cq_event;
209 uint64_t s_ib_rx_ring_empty; 247 uint64_t s_ib_rx_ring_empty;
210 uint64_t s_ib_rx_refill_from_cq; 248 uint64_t s_ib_rx_refill_from_cq;
@@ -216,12 +254,18 @@ struct rds_ib_statistics {
216 uint64_t s_ib_ack_send_delayed; 254 uint64_t s_ib_ack_send_delayed;
217 uint64_t s_ib_ack_send_piggybacked; 255 uint64_t s_ib_ack_send_piggybacked;
218 uint64_t s_ib_ack_received; 256 uint64_t s_ib_ack_received;
219 uint64_t s_ib_rdma_mr_alloc; 257 uint64_t s_ib_rdma_mr_8k_alloc;
220 uint64_t s_ib_rdma_mr_free; 258 uint64_t s_ib_rdma_mr_8k_free;
221 uint64_t s_ib_rdma_mr_used; 259 uint64_t s_ib_rdma_mr_8k_used;
222 uint64_t s_ib_rdma_mr_pool_flush; 260 uint64_t s_ib_rdma_mr_8k_pool_flush;
223 uint64_t s_ib_rdma_mr_pool_wait; 261 uint64_t s_ib_rdma_mr_8k_pool_wait;
224 uint64_t s_ib_rdma_mr_pool_depleted; 262 uint64_t s_ib_rdma_mr_8k_pool_depleted;
263 uint64_t s_ib_rdma_mr_1m_alloc;
264 uint64_t s_ib_rdma_mr_1m_free;
265 uint64_t s_ib_rdma_mr_1m_used;
266 uint64_t s_ib_rdma_mr_1m_pool_flush;
267 uint64_t s_ib_rdma_mr_1m_pool_wait;
268 uint64_t s_ib_rdma_mr_1m_pool_depleted;
225 uint64_t s_ib_atomic_cswp; 269 uint64_t s_ib_atomic_cswp;
226 uint64_t s_ib_atomic_fadd; 270 uint64_t s_ib_atomic_fadd;
227}; 271};
@@ -273,7 +317,8 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
273void rds_ib_dev_put(struct rds_ib_device *rds_ibdev); 317void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
274extern struct ib_client rds_ib_client; 318extern struct ib_client rds_ib_client;
275 319
276extern unsigned int fmr_message_size; 320extern unsigned int rds_ib_fmr_1m_pool_size;
321extern unsigned int rds_ib_fmr_8k_pool_size;
277extern unsigned int rds_ib_retry_count; 322extern unsigned int rds_ib_retry_count;
278 323
279extern spinlock_t ib_nodev_conns_lock; 324extern spinlock_t ib_nodev_conns_lock;
@@ -303,7 +348,8 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
303void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 348void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
304void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 349void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
305void rds_ib_destroy_nodev_conns(void); 350void rds_ib_destroy_nodev_conns(void);
306struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 351struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
352 int npages);
307void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 353void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
308void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 354void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
309void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 355void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -323,7 +369,8 @@ void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
323void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); 369void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
324void rds_ib_inc_free(struct rds_incoming *inc); 370void rds_ib_inc_free(struct rds_incoming *inc);
325int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); 371int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
326void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context); 372void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
373 struct rds_ib_ack_state *state);
327void rds_ib_recv_tasklet_fn(unsigned long data); 374void rds_ib_recv_tasklet_fn(unsigned long data);
328void rds_ib_recv_init_ring(struct rds_ib_connection *ic); 375void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
329void rds_ib_recv_clear_ring(struct rds_ib_connection *ic); 376void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
@@ -331,6 +378,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
331void rds_ib_attempt_ack(struct rds_ib_connection *ic); 378void rds_ib_attempt_ack(struct rds_ib_connection *ic);
332void rds_ib_ack_send_complete(struct rds_ib_connection *ic); 379void rds_ib_ack_send_complete(struct rds_ib_connection *ic);
333u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic); 380u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic);
381void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required);
334 382
335/* ib_ring.c */ 383/* ib_ring.c */
336void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr); 384void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr);
@@ -348,7 +396,7 @@ extern wait_queue_head_t rds_ib_ring_empty_wait;
348void rds_ib_xmit_complete(struct rds_connection *conn); 396void rds_ib_xmit_complete(struct rds_connection *conn);
349int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, 397int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
350 unsigned int hdr_off, unsigned int sg, unsigned int off); 398 unsigned int hdr_off, unsigned int sg, unsigned int off);
351void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); 399void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
352void rds_ib_send_init_ring(struct rds_ib_connection *ic); 400void rds_ib_send_init_ring(struct rds_ib_connection *ic);
353void rds_ib_send_clear_ring(struct rds_ib_connection *ic); 401void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
354int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op); 402int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 9043f5c04787..da5a7fb98c77 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -216,6 +216,96 @@ static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
216 event->event, ib_event_msg(event->event), data); 216 event->event, ib_event_msg(event->event), data);
217} 217}
218 218
219/* Plucking the oldest entry from the ring can be done concurrently with
220 * the thread refilling the ring. Each ring operation is protected by
221 * spinlocks and the transient state of refilling doesn't change the
222 * recording of which entry is oldest.
223 *
224 * This relies on IB only calling one cq comp_handler for each cq so that
225 * there will only be one caller of rds_recv_incoming() per RDS connection.
226 */
227static void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context)
228{
229 struct rds_connection *conn = context;
230 struct rds_ib_connection *ic = conn->c_transport_data;
231
232 rdsdebug("conn %p cq %p\n", conn, cq);
233
234 rds_ib_stats_inc(s_ib_evt_handler_call);
235
236 tasklet_schedule(&ic->i_recv_tasklet);
237}
238
239static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
240 struct ib_wc *wcs,
241 struct rds_ib_ack_state *ack_state)
242{
243 int nr;
244 int i;
245 struct ib_wc *wc;
246
247 while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
248 for (i = 0; i < nr; i++) {
249 wc = wcs + i;
250 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
251 (unsigned long long)wc->wr_id, wc->status,
252 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
253
254 if (wc->wr_id & RDS_IB_SEND_OP)
255 rds_ib_send_cqe_handler(ic, wc);
256 else
257 rds_ib_recv_cqe_handler(ic, wc, ack_state);
258 }
259 }
260}
261
262static void rds_ib_tasklet_fn_send(unsigned long data)
263{
264 struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
265 struct rds_connection *conn = ic->conn;
266 struct rds_ib_ack_state state;
267
268 rds_ib_stats_inc(s_ib_tasklet_call);
269
270 memset(&state, 0, sizeof(state));
271 poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state);
272 ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
273 poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state);
274
275 if (rds_conn_up(conn) &&
276 (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
277 test_bit(0, &conn->c_map_queued)))
278 rds_send_xmit(ic->conn);
279}
280
281static void rds_ib_tasklet_fn_recv(unsigned long data)
282{
283 struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
284 struct rds_connection *conn = ic->conn;
285 struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
286 struct rds_ib_ack_state state;
287
288 if (!rds_ibdev)
289 rds_conn_drop(conn);
290
291 rds_ib_stats_inc(s_ib_tasklet_call);
292
293 memset(&state, 0, sizeof(state));
294 poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
295 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
296 poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
297
298 if (state.ack_next_valid)
299 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
300 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
301 rds_send_drop_acked(conn, state.ack_recv, NULL);
302 ic->i_ack_recv = state.ack_recv;
303 }
304
305 if (rds_conn_up(conn))
306 rds_ib_attempt_ack(ic);
307}
308
219static void rds_ib_qp_event_handler(struct ib_event *event, void *data) 309static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
220{ 310{
221 struct rds_connection *conn = data; 311 struct rds_connection *conn = data;
@@ -238,6 +328,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
238 } 328 }
239} 329}
240 330
331static void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context)
332{
333 struct rds_connection *conn = context;
334 struct rds_ib_connection *ic = conn->c_transport_data;
335
336 rdsdebug("conn %p cq %p\n", conn, cq);
337
338 rds_ib_stats_inc(s_ib_evt_handler_call);
339
340 tasklet_schedule(&ic->i_send_tasklet);
341}
342
241/* 343/*
242 * This needs to be very careful to not leave IS_ERR pointers around for 344 * This needs to be very careful to not leave IS_ERR pointers around for
243 * cleanup to trip over. 345 * cleanup to trip over.
@@ -271,7 +373,8 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
271 ic->i_pd = rds_ibdev->pd; 373 ic->i_pd = rds_ibdev->pd;
272 374
273 cq_attr.cqe = ic->i_send_ring.w_nr + 1; 375 cq_attr.cqe = ic->i_send_ring.w_nr + 1;
274 ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, 376
377 ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
275 rds_ib_cq_event_handler, conn, 378 rds_ib_cq_event_handler, conn,
276 &cq_attr); 379 &cq_attr);
277 if (IS_ERR(ic->i_send_cq)) { 380 if (IS_ERR(ic->i_send_cq)) {
@@ -282,7 +385,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
282 } 385 }
283 386
284 cq_attr.cqe = ic->i_recv_ring.w_nr; 387 cq_attr.cqe = ic->i_recv_ring.w_nr;
285 ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler, 388 ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
286 rds_ib_cq_event_handler, conn, 389 rds_ib_cq_event_handler, conn,
287 &cq_attr); 390 &cq_attr);
288 if (IS_ERR(ic->i_recv_cq)) { 391 if (IS_ERR(ic->i_recv_cq)) {
@@ -565,7 +668,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
565 668
566 /* XXX I wonder what affect the port space has */ 669 /* XXX I wonder what affect the port space has */
567 /* delegate cm event handler to rdma_transport */ 670 /* delegate cm event handler to rdma_transport */
568 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, 671 ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
569 RDMA_PS_TCP, IB_QPT_RC); 672 RDMA_PS_TCP, IB_QPT_RC);
570 if (IS_ERR(ic->i_cm_id)) { 673 if (IS_ERR(ic->i_cm_id)) {
571 ret = PTR_ERR(ic->i_cm_id); 674 ret = PTR_ERR(ic->i_cm_id);
@@ -637,6 +740,7 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
637 wait_event(rds_ib_ring_empty_wait, 740 wait_event(rds_ib_ring_empty_wait,
638 rds_ib_ring_empty(&ic->i_recv_ring) && 741 rds_ib_ring_empty(&ic->i_recv_ring) &&
639 (atomic_read(&ic->i_signaled_sends) == 0)); 742 (atomic_read(&ic->i_signaled_sends) == 0));
743 tasklet_kill(&ic->i_send_tasklet);
640 tasklet_kill(&ic->i_recv_tasklet); 744 tasklet_kill(&ic->i_recv_tasklet);
641 745
642 /* first destroy the ib state that generates callbacks */ 746 /* first destroy the ib state that generates callbacks */
@@ -743,8 +847,10 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
743 } 847 }
744 848
745 INIT_LIST_HEAD(&ic->ib_node); 849 INIT_LIST_HEAD(&ic->ib_node);
746 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, 850 tasklet_init(&ic->i_send_tasklet, rds_ib_tasklet_fn_send,
747 (unsigned long) ic); 851 (unsigned long)ic);
852 tasklet_init(&ic->i_recv_tasklet, rds_ib_tasklet_fn_recv,
853 (unsigned long)ic);
748 mutex_init(&ic->i_recv_mutex); 854 mutex_init(&ic->i_recv_mutex);
749#ifndef KERNEL_HAS_ATOMIC64 855#ifndef KERNEL_HAS_ATOMIC64
750 spin_lock_init(&ic->i_ack_lock); 856 spin_lock_init(&ic->i_ack_lock);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 251d1ce0b7c7..a2340748ec86 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -65,6 +65,7 @@ struct rds_ib_mr {
65 * Our own little FMR pool 65 * Our own little FMR pool
66 */ 66 */
67struct rds_ib_mr_pool { 67struct rds_ib_mr_pool {
68 unsigned int pool_type;
68 struct mutex flush_lock; /* serialize fmr invalidate */ 69 struct mutex flush_lock; /* serialize fmr invalidate */
69 struct delayed_work flush_worker; /* flush worker */ 70 struct delayed_work flush_worker; /* flush worker */
70 71
@@ -83,7 +84,7 @@ struct rds_ib_mr_pool {
83 struct ib_fmr_attr fmr_attr; 84 struct ib_fmr_attr fmr_attr;
84}; 85};
85 86
86struct workqueue_struct *rds_ib_fmr_wq; 87static struct workqueue_struct *rds_ib_fmr_wq;
87 88
88int rds_ib_fmr_init(void) 89int rds_ib_fmr_init(void)
89{ 90{
@@ -159,10 +160,8 @@ static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
159 } 160 }
160 spin_unlock_irq(&rds_ibdev->spinlock); 161 spin_unlock_irq(&rds_ibdev->spinlock);
161 162
162 if (to_free) { 163 if (to_free)
163 synchronize_rcu(); 164 kfree_rcu(to_free, rcu);
164 kfree(to_free);
165 }
166} 165}
167 166
168int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 167int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
@@ -236,7 +235,8 @@ void rds_ib_destroy_nodev_conns(void)
236 rds_conn_destroy(ic->conn); 235 rds_conn_destroy(ic->conn);
237} 236}
238 237
239struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) 238struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
239 int pool_type)
240{ 240{
241 struct rds_ib_mr_pool *pool; 241 struct rds_ib_mr_pool *pool;
242 242
@@ -244,6 +244,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
244 if (!pool) 244 if (!pool)
245 return ERR_PTR(-ENOMEM); 245 return ERR_PTR(-ENOMEM);
246 246
247 pool->pool_type = pool_type;
247 init_llist_head(&pool->free_list); 248 init_llist_head(&pool->free_list);
248 init_llist_head(&pool->drop_list); 249 init_llist_head(&pool->drop_list);
249 init_llist_head(&pool->clean_list); 250 init_llist_head(&pool->clean_list);
@@ -251,28 +252,30 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
251 init_waitqueue_head(&pool->flush_wait); 252 init_waitqueue_head(&pool->flush_wait);
252 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 253 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
253 254
254 pool->fmr_attr.max_pages = fmr_message_size; 255 if (pool_type == RDS_IB_MR_1M_POOL) {
256 /* +1 allows for unaligned MRs */
257 pool->fmr_attr.max_pages = RDS_FMR_1M_MSG_SIZE + 1;
258 pool->max_items = RDS_FMR_1M_POOL_SIZE;
259 } else {
260 /* pool_type == RDS_IB_MR_8K_POOL */
261 pool->fmr_attr.max_pages = RDS_FMR_8K_MSG_SIZE + 1;
262 pool->max_items = RDS_FMR_8K_POOL_SIZE;
263 }
264
265 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
255 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; 266 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
256 pool->fmr_attr.page_shift = PAGE_SHIFT; 267 pool->fmr_attr.page_shift = PAGE_SHIFT;
257 pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4;
258
259 /* We never allow more than max_items MRs to be allocated.
260 * When we exceed more than max_items_soft, we start freeing
261 * items more aggressively.
262 * Make sure that max_items > max_items_soft > max_items / 2
263 */
264 pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4; 268 pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4;
265 pool->max_items = rds_ibdev->max_fmrs;
266 269
267 return pool; 270 return pool;
268} 271}
269 272
270void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) 273void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
271{ 274{
272 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 275 struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
273 276
274 iinfo->rdma_mr_max = pool->max_items; 277 iinfo->rdma_mr_max = pool_1m->max_items;
275 iinfo->rdma_mr_size = pool->fmr_attr.max_pages; 278 iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
276} 279}
277 280
278void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 281void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
@@ -314,14 +317,28 @@ static inline void wait_clean_list_grace(void)
314 } 317 }
315} 318}
316 319
317static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) 320static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev,
321 int npages)
318{ 322{
319 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 323 struct rds_ib_mr_pool *pool;
320 struct rds_ib_mr *ibmr = NULL; 324 struct rds_ib_mr *ibmr = NULL;
321 int err = 0, iter = 0; 325 int err = 0, iter = 0;
322 326
327 if (npages <= RDS_FMR_8K_MSG_SIZE)
328 pool = rds_ibdev->mr_8k_pool;
329 else
330 pool = rds_ibdev->mr_1m_pool;
331
323 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) 332 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
324 schedule_delayed_work(&pool->flush_worker, 10); 333 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
334
335 /* Switch pools if one of the pool is reaching upper limit */
336 if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
337 if (pool->pool_type == RDS_IB_MR_8K_POOL)
338 pool = rds_ibdev->mr_1m_pool;
339 else
340 pool = rds_ibdev->mr_8k_pool;
341 }
325 342
326 while (1) { 343 while (1) {
327 ibmr = rds_ib_reuse_fmr(pool); 344 ibmr = rds_ib_reuse_fmr(pool);
@@ -343,12 +360,18 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
343 atomic_dec(&pool->item_count); 360 atomic_dec(&pool->item_count);
344 361
345 if (++iter > 2) { 362 if (++iter > 2) {
346 rds_ib_stats_inc(s_ib_rdma_mr_pool_depleted); 363 if (pool->pool_type == RDS_IB_MR_8K_POOL)
364 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
365 else
366 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
347 return ERR_PTR(-EAGAIN); 367 return ERR_PTR(-EAGAIN);
348 } 368 }
349 369
350 /* We do have some empty MRs. Flush them out. */ 370 /* We do have some empty MRs. Flush them out. */
351 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); 371 if (pool->pool_type == RDS_IB_MR_8K_POOL)
372 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
373 else
374 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
352 rds_ib_flush_mr_pool(pool, 0, &ibmr); 375 rds_ib_flush_mr_pool(pool, 0, &ibmr);
353 if (ibmr) 376 if (ibmr)
354 return ibmr; 377 return ibmr;
@@ -373,7 +396,12 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
373 goto out_no_cigar; 396 goto out_no_cigar;
374 } 397 }
375 398
376 rds_ib_stats_inc(s_ib_rdma_mr_alloc); 399 ibmr->pool = pool;
400 if (pool->pool_type == RDS_IB_MR_8K_POOL)
401 rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
402 else
403 rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
404
377 return ibmr; 405 return ibmr;
378 406
379out_no_cigar: 407out_no_cigar:
@@ -429,7 +457,7 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
429 } 457 }
430 458
431 page_cnt += len >> PAGE_SHIFT; 459 page_cnt += len >> PAGE_SHIFT;
432 if (page_cnt > fmr_message_size) 460 if (page_cnt > ibmr->pool->fmr_attr.max_pages)
433 return -EINVAL; 461 return -EINVAL;
434 462
435 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC, 463 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
@@ -461,7 +489,10 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
461 ibmr->sg_dma_len = sg_dma_len; 489 ibmr->sg_dma_len = sg_dma_len;
462 ibmr->remap_count++; 490 ibmr->remap_count++;
463 491
464 rds_ib_stats_inc(s_ib_rdma_mr_used); 492 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
493 rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
494 else
495 rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
465 ret = 0; 496 ret = 0;
466 497
467out: 498out:
@@ -524,8 +555,7 @@ static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
524 555
525 __rds_ib_teardown_mr(ibmr); 556 __rds_ib_teardown_mr(ibmr);
526 if (pinned) { 557 if (pinned) {
527 struct rds_ib_device *rds_ibdev = ibmr->device; 558 struct rds_ib_mr_pool *pool = ibmr->pool;
528 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
529 559
530 atomic_sub(pinned, &pool->free_pinned); 560 atomic_sub(pinned, &pool->free_pinned);
531 } 561 }
@@ -594,7 +624,7 @@ static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
594 * to free as many MRs as needed to get back to this limit. 624 * to free as many MRs as needed to get back to this limit.
595 */ 625 */
596static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, 626static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
597 int free_all, struct rds_ib_mr **ibmr_ret) 627 int free_all, struct rds_ib_mr **ibmr_ret)
598{ 628{
599 struct rds_ib_mr *ibmr, *next; 629 struct rds_ib_mr *ibmr, *next;
600 struct llist_node *clean_nodes; 630 struct llist_node *clean_nodes;
@@ -605,11 +635,14 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
605 unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; 635 unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
606 int ret = 0; 636 int ret = 0;
607 637
608 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); 638 if (pool->pool_type == RDS_IB_MR_8K_POOL)
639 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
640 else
641 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush);
609 642
610 if (ibmr_ret) { 643 if (ibmr_ret) {
611 DEFINE_WAIT(wait); 644 DEFINE_WAIT(wait);
612 while(!mutex_trylock(&pool->flush_lock)) { 645 while (!mutex_trylock(&pool->flush_lock)) {
613 ibmr = rds_ib_reuse_fmr(pool); 646 ibmr = rds_ib_reuse_fmr(pool);
614 if (ibmr) { 647 if (ibmr) {
615 *ibmr_ret = ibmr; 648 *ibmr_ret = ibmr;
@@ -666,8 +699,12 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
666 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) { 699 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
667 unpinned += ibmr->sg_len; 700 unpinned += ibmr->sg_len;
668 __rds_ib_teardown_mr(ibmr); 701 __rds_ib_teardown_mr(ibmr);
669 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { 702 if (nfreed < free_goal ||
670 rds_ib_stats_inc(s_ib_rdma_mr_free); 703 ibmr->remap_count >= pool->fmr_attr.max_maps) {
704 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
705 rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
706 else
707 rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
671 list_del(&ibmr->unmap_list); 708 list_del(&ibmr->unmap_list);
672 ib_dealloc_fmr(ibmr->fmr); 709 ib_dealloc_fmr(ibmr->fmr);
673 kfree(ibmr); 710 kfree(ibmr);
@@ -719,8 +756,8 @@ static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
719void rds_ib_free_mr(void *trans_private, int invalidate) 756void rds_ib_free_mr(void *trans_private, int invalidate)
720{ 757{
721 struct rds_ib_mr *ibmr = trans_private; 758 struct rds_ib_mr *ibmr = trans_private;
759 struct rds_ib_mr_pool *pool = ibmr->pool;
722 struct rds_ib_device *rds_ibdev = ibmr->device; 760 struct rds_ib_device *rds_ibdev = ibmr->device;
723 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
724 761
725 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 762 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
726 763
@@ -759,10 +796,11 @@ void rds_ib_flush_mrs(void)
759 796
760 down_read(&rds_ib_devices_lock); 797 down_read(&rds_ib_devices_lock);
761 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 798 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
762 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 799 if (rds_ibdev->mr_8k_pool)
800 rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL);
763 801
764 if (pool) 802 if (rds_ibdev->mr_1m_pool)
765 rds_ib_flush_mr_pool(pool, 0, NULL); 803 rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL);
766 } 804 }
767 up_read(&rds_ib_devices_lock); 805 up_read(&rds_ib_devices_lock);
768} 806}
@@ -780,12 +818,12 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
780 goto out; 818 goto out;
781 } 819 }
782 820
783 if (!rds_ibdev->mr_pool) { 821 if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
784 ret = -ENODEV; 822 ret = -ENODEV;
785 goto out; 823 goto out;
786 } 824 }
787 825
788 ibmr = rds_ib_alloc_fmr(rds_ibdev); 826 ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
789 if (IS_ERR(ibmr)) { 827 if (IS_ERR(ibmr)) {
790 rds_ib_dev_put(rds_ibdev); 828 rds_ib_dev_put(rds_ibdev);
791 return ibmr; 829 return ibmr;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index f43831e4186a..977fb86065b7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
305 gfp_t slab_mask = GFP_NOWAIT; 305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT; 306 gfp_t page_mask = GFP_NOWAIT;
307 307
308 if (gfp & __GFP_WAIT) { 308 if (gfp & __GFP_DIRECT_RECLAIM) {
309 slab_mask = GFP_KERNEL; 309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER; 310 page_mask = GFP_HIGHUSER;
311 } 311 }
@@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
379 struct ib_recv_wr *failed_wr; 379 struct ib_recv_wr *failed_wr;
380 unsigned int posted = 0; 380 unsigned int posted = 0;
381 int ret = 0; 381 int ret = 0;
382 bool can_wait = !!(gfp & __GFP_WAIT); 382 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
383 u32 pos; 383 u32 pos;
384 384
385 /* the goal here is to just make sure that someone, somewhere 385 /* the goal here is to just make sure that someone, somewhere
@@ -596,8 +596,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
596 * wr_id and avoids working with the ring in that case. 596 * wr_id and avoids working with the ring in that case.
597 */ 597 */
598#ifndef KERNEL_HAS_ATOMIC64 598#ifndef KERNEL_HAS_ATOMIC64
599static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, 599void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
600 int ack_required)
601{ 600{
602 unsigned long flags; 601 unsigned long flags;
603 602
@@ -622,8 +621,7 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
622 return seq; 621 return seq;
623} 622}
624#else 623#else
625static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, 624void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
626 int ack_required)
627{ 625{
628 atomic64_set(&ic->i_ack_next, seq); 626 atomic64_set(&ic->i_ack_next, seq);
629 if (ack_required) { 627 if (ack_required) {
@@ -830,20 +828,6 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
830 rds_cong_map_updated(map, uncongested); 828 rds_cong_map_updated(map, uncongested);
831} 829}
832 830
833/*
834 * Rings are posted with all the allocations they'll need to queue the
835 * incoming message to the receiving socket so this can't fail.
836 * All fragments start with a header, so we can make sure we're not receiving
837 * garbage, and we can tell a small 8 byte fragment from an ACK frame.
838 */
839struct rds_ib_ack_state {
840 u64 ack_next;
841 u64 ack_recv;
842 unsigned int ack_required:1;
843 unsigned int ack_next_valid:1;
844 unsigned int ack_recv_valid:1;
845};
846
847static void rds_ib_process_recv(struct rds_connection *conn, 831static void rds_ib_process_recv(struct rds_connection *conn,
848 struct rds_ib_recv_work *recv, u32 data_len, 832 struct rds_ib_recv_work *recv, u32 data_len,
849 struct rds_ib_ack_state *state) 833 struct rds_ib_ack_state *state)
@@ -969,96 +953,50 @@ static void rds_ib_process_recv(struct rds_connection *conn,
969 } 953 }
970} 954}
971 955
972/* 956void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
973 * Plucking the oldest entry from the ring can be done concurrently with 957 struct ib_wc *wc,
974 * the thread refilling the ring. Each ring operation is protected by 958 struct rds_ib_ack_state *state)
975 * spinlocks and the transient state of refilling doesn't change the
976 * recording of which entry is oldest.
977 *
978 * This relies on IB only calling one cq comp_handler for each cq so that
979 * there will only be one caller of rds_recv_incoming() per RDS connection.
980 */
981void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
982{
983 struct rds_connection *conn = context;
984 struct rds_ib_connection *ic = conn->c_transport_data;
985
986 rdsdebug("conn %p cq %p\n", conn, cq);
987
988 rds_ib_stats_inc(s_ib_rx_cq_call);
989
990 tasklet_schedule(&ic->i_recv_tasklet);
991}
992
993static inline void rds_poll_cq(struct rds_ib_connection *ic,
994 struct rds_ib_ack_state *state)
995{ 959{
996 struct rds_connection *conn = ic->conn; 960 struct rds_connection *conn = ic->conn;
997 struct ib_wc wc;
998 struct rds_ib_recv_work *recv; 961 struct rds_ib_recv_work *recv;
999 962
1000 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { 963 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
1001 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n", 964 (unsigned long long)wc->wr_id, wc->status,
1002 (unsigned long long)wc.wr_id, wc.status, 965 ib_wc_status_msg(wc->status), wc->byte_len,
1003 ib_wc_status_msg(wc.status), wc.byte_len, 966 be32_to_cpu(wc->ex.imm_data));
1004 be32_to_cpu(wc.ex.imm_data));
1005 rds_ib_stats_inc(s_ib_rx_cq_event);
1006 967
1007 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; 968 rds_ib_stats_inc(s_ib_rx_cq_event);
1008 969 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
1009 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE); 970 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1,
1010 971 DMA_FROM_DEVICE);
1011 /*
1012 * Also process recvs in connecting state because it is possible
1013 * to get a recv completion _before_ the rdmacm ESTABLISHED
1014 * event is processed.
1015 */
1016 if (wc.status == IB_WC_SUCCESS) {
1017 rds_ib_process_recv(conn, recv, wc.byte_len, state);
1018 } else {
1019 /* We expect errors as the qp is drained during shutdown */
1020 if (rds_conn_up(conn) || rds_conn_connecting(conn))
1021 rds_ib_conn_error(conn, "recv completion on %pI4 had "
1022 "status %u (%s), disconnecting and "
1023 "reconnecting\n", &conn->c_faddr,
1024 wc.status,
1025 ib_wc_status_msg(wc.status));
1026 }
1027 972
1028 /* 973 /* Also process recvs in connecting state because it is possible
1029 * rds_ib_process_recv() doesn't always consume the frag, and 974 * to get a recv completion _before_ the rdmacm ESTABLISHED
1030 * we might not have called it at all if the wc didn't indicate 975 * event is processed.
1031 * success. We already unmapped the frag's pages, though, and 976 */
1032 * the following rds_ib_ring_free() call tells the refill path 977 if (wc->status == IB_WC_SUCCESS) {
1033 * that it will not find an allocated frag here. Make sure we 978 rds_ib_process_recv(conn, recv, wc->byte_len, state);
1034 * keep that promise by freeing a frag that's still on the ring. 979 } else {
1035 */ 980 /* We expect errors as the qp is drained during shutdown */
1036 if (recv->r_frag) { 981 if (rds_conn_up(conn) || rds_conn_connecting(conn))
1037 rds_ib_frag_free(ic, recv->r_frag); 982 rds_ib_conn_error(conn, "recv completion on %pI4 had status %u (%s), disconnecting and reconnecting\n",
1038 recv->r_frag = NULL; 983 &conn->c_faddr,
1039 } 984 wc->status,
1040 rds_ib_ring_free(&ic->i_recv_ring, 1); 985 ib_wc_status_msg(wc->status));
1041 } 986 }
1042}
1043 987
1044void rds_ib_recv_tasklet_fn(unsigned long data) 988 /* rds_ib_process_recv() doesn't always consume the frag, and
1045{ 989 * we might not have called it at all if the wc didn't indicate
1046 struct rds_ib_connection *ic = (struct rds_ib_connection *) data; 990 * success. We already unmapped the frag's pages, though, and
1047 struct rds_connection *conn = ic->conn; 991 * the following rds_ib_ring_free() call tells the refill path
1048 struct rds_ib_ack_state state = { 0, }; 992 * that it will not find an allocated frag here. Make sure we
1049 993 * keep that promise by freeing a frag that's still on the ring.
1050 rds_poll_cq(ic, &state); 994 */
1051 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); 995 if (recv->r_frag) {
1052 rds_poll_cq(ic, &state); 996 rds_ib_frag_free(ic, recv->r_frag);
1053 997 recv->r_frag = NULL;
1054 if (state.ack_next_valid)
1055 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
1056 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
1057 rds_send_drop_acked(conn, state.ack_recv, NULL);
1058 ic->i_ack_recv = state.ack_recv;
1059 } 998 }
1060 if (rds_conn_up(conn)) 999 rds_ib_ring_free(&ic->i_recv_ring, 1);
1061 rds_ib_attempt_ack(ic);
1062 1000
1063 /* If we ever end up with a really empty receive ring, we're 1001 /* If we ever end up with a really empty receive ring, we're
1064 * in deep trouble, as the sender will definitely see RNR 1002 * in deep trouble, as the sender will definitely see RNR
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 4e88047086b6..eac30bf486d7 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -195,7 +195,7 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
195 195
196 send->s_op = NULL; 196 send->s_op = NULL;
197 197
198 send->s_wr.wr_id = i; 198 send->s_wr.wr_id = i | RDS_IB_SEND_OP;
199 send->s_wr.sg_list = send->s_sge; 199 send->s_wr.sg_list = send->s_sge;
200 send->s_wr.ex.imm_data = 0; 200 send->s_wr.ex.imm_data = 0;
201 201
@@ -237,81 +237,73 @@ static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
237 * unallocs the next free entry in the ring it doesn't alter which is 237 * unallocs the next free entry in the ring it doesn't alter which is
238 * the next to be freed, which is what this is concerned with. 238 * the next to be freed, which is what this is concerned with.
239 */ 239 */
240void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) 240void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
241{ 241{
242 struct rds_connection *conn = context;
243 struct rds_ib_connection *ic = conn->c_transport_data;
244 struct rds_message *rm = NULL; 242 struct rds_message *rm = NULL;
245 struct ib_wc wc; 243 struct rds_connection *conn = ic->conn;
246 struct rds_ib_send_work *send; 244 struct rds_ib_send_work *send;
247 u32 completed; 245 u32 completed;
248 u32 oldest; 246 u32 oldest;
249 u32 i = 0; 247 u32 i = 0;
250 int ret;
251 int nr_sig = 0; 248 int nr_sig = 0;
252 249
253 rdsdebug("cq %p conn %p\n", cq, conn);
254 rds_ib_stats_inc(s_ib_tx_cq_call);
255 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
256 if (ret)
257 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
258
259 while (ib_poll_cq(cq, 1, &wc) > 0) {
260 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
261 (unsigned long long)wc.wr_id, wc.status,
262 ib_wc_status_msg(wc.status), wc.byte_len,
263 be32_to_cpu(wc.ex.imm_data));
264 rds_ib_stats_inc(s_ib_tx_cq_event);
265
266 if (wc.wr_id == RDS_IB_ACK_WR_ID) {
267 if (time_after(jiffies, ic->i_ack_queued + HZ/2))
268 rds_ib_stats_inc(s_ib_tx_stalled);
269 rds_ib_ack_send_complete(ic);
270 continue;
271 }
272 250
273 oldest = rds_ib_ring_oldest(&ic->i_send_ring); 251 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
252 (unsigned long long)wc->wr_id, wc->status,
253 ib_wc_status_msg(wc->status), wc->byte_len,
254 be32_to_cpu(wc->ex.imm_data));
255 rds_ib_stats_inc(s_ib_tx_cq_event);
274 256
275 completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest); 257 if (wc->wr_id == RDS_IB_ACK_WR_ID) {
258 if (time_after(jiffies, ic->i_ack_queued + HZ / 2))
259 rds_ib_stats_inc(s_ib_tx_stalled);
260 rds_ib_ack_send_complete(ic);
261 return;
262 }
276 263
277 for (i = 0; i < completed; i++) { 264 oldest = rds_ib_ring_oldest(&ic->i_send_ring);
278 send = &ic->i_sends[oldest];
279 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
280 nr_sig++;
281 265
282 rm = rds_ib_send_unmap_op(ic, send, wc.status); 266 completed = rds_ib_ring_completed(&ic->i_send_ring,
267 (wc->wr_id & ~RDS_IB_SEND_OP),
268 oldest);
283 269
284 if (time_after(jiffies, send->s_queued + HZ/2)) 270 for (i = 0; i < completed; i++) {
285 rds_ib_stats_inc(s_ib_tx_stalled); 271 send = &ic->i_sends[oldest];
272 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
273 nr_sig++;
286 274
287 if (send->s_op) { 275 rm = rds_ib_send_unmap_op(ic, send, wc->status);
288 if (send->s_op == rm->m_final_op) {
289 /* If anyone waited for this message to get flushed out, wake
290 * them up now */
291 rds_message_unmapped(rm);
292 }
293 rds_message_put(rm);
294 send->s_op = NULL;
295 }
296 276
297 oldest = (oldest + 1) % ic->i_send_ring.w_nr; 277 if (time_after(jiffies, send->s_queued + HZ / 2))
298 } 278 rds_ib_stats_inc(s_ib_tx_stalled);
299 279
300 rds_ib_ring_free(&ic->i_send_ring, completed); 280 if (send->s_op) {
301 rds_ib_sub_signaled(ic, nr_sig); 281 if (send->s_op == rm->m_final_op) {
302 nr_sig = 0; 282 /* If anyone waited for this message to get
303 283 * flushed out, wake them up now
304 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 284 */
305 test_bit(0, &conn->c_map_queued)) 285 rds_message_unmapped(rm);
306 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 286 }
307 287 rds_message_put(rm);
308 /* We expect errors as the qp is drained during shutdown */ 288 send->s_op = NULL;
309 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
310 rds_ib_conn_error(conn, "send completion on %pI4 had status "
311 "%u (%s), disconnecting and reconnecting\n",
312 &conn->c_faddr, wc.status,
313 ib_wc_status_msg(wc.status));
314 } 289 }
290
291 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
292 }
293
294 rds_ib_ring_free(&ic->i_send_ring, completed);
295 rds_ib_sub_signaled(ic, nr_sig);
296 nr_sig = 0;
297
298 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
299 test_bit(0, &conn->c_map_queued))
300 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
301
302 /* We expect errors as the qp is drained during shutdown */
303 if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
304 rds_ib_conn_error(conn, "send completion on %pI4 had status %u (%s), disconnecting and reconnecting\n",
305 &conn->c_faddr, wc->status,
306 ib_wc_status_msg(wc->status));
315 } 307 }
316} 308}
317 309
@@ -785,23 +777,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
785 send->s_queued = jiffies; 777 send->s_queued = jiffies;
786 778
787 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { 779 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
788 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; 780 send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
789 send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare; 781 send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
790 send->s_wr.wr.atomic.swap = op->op_m_cswp.swap; 782 send->s_atomic_wr.swap = op->op_m_cswp.swap;
791 send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask; 783 send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
792 send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask; 784 send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
793 } else { /* FADD */ 785 } else { /* FADD */
794 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; 786 send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
795 send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add; 787 send->s_atomic_wr.compare_add = op->op_m_fadd.add;
796 send->s_wr.wr.atomic.swap = 0; 788 send->s_atomic_wr.swap = 0;
797 send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask; 789 send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
798 send->s_wr.wr.atomic.swap_mask = 0; 790 send->s_atomic_wr.swap_mask = 0;
799 } 791 }
800 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); 792 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
801 send->s_wr.num_sge = 1; 793 send->s_atomic_wr.wr.num_sge = 1;
802 send->s_wr.next = NULL; 794 send->s_atomic_wr.wr.next = NULL;
803 send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; 795 send->s_atomic_wr.remote_addr = op->op_remote_addr;
804 send->s_wr.wr.atomic.rkey = op->op_rkey; 796 send->s_atomic_wr.rkey = op->op_rkey;
805 send->s_op = op; 797 send->s_op = op;
806 rds_message_addref(container_of(send->s_op, struct rds_message, atomic)); 798 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
807 799
@@ -826,11 +818,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
826 if (nr_sig) 818 if (nr_sig)
827 atomic_add(nr_sig, &ic->i_signaled_sends); 819 atomic_add(nr_sig, &ic->i_signaled_sends);
828 820
829 failed_wr = &send->s_wr; 821 failed_wr = &send->s_atomic_wr.wr;
830 ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); 822 ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
831 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, 823 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
832 send, &send->s_wr, ret, failed_wr); 824 send, &send->s_atomic_wr, ret, failed_wr);
833 BUG_ON(failed_wr != &send->s_wr); 825 BUG_ON(failed_wr != &send->s_atomic_wr.wr);
834 if (ret) { 826 if (ret) {
835 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " 827 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
836 "returned %d\n", &conn->c_faddr, ret); 828 "returned %d\n", &conn->c_faddr, ret);
@@ -839,9 +831,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
839 goto out; 831 goto out;
840 } 832 }
841 833
842 if (unlikely(failed_wr != &send->s_wr)) { 834 if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
843 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); 835 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
844 BUG_ON(failed_wr != &send->s_wr); 836 BUG_ON(failed_wr != &send->s_atomic_wr.wr);
845 } 837 }
846 838
847out: 839out:
@@ -912,22 +904,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
912 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify); 904 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
913 905
914 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; 906 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
915 send->s_wr.wr.rdma.remote_addr = remote_addr; 907 send->s_rdma_wr.remote_addr = remote_addr;
916 send->s_wr.wr.rdma.rkey = op->op_rkey; 908 send->s_rdma_wr.rkey = op->op_rkey;
917 909
918 if (num_sge > max_sge) { 910 if (num_sge > max_sge) {
919 send->s_wr.num_sge = max_sge; 911 send->s_rdma_wr.wr.num_sge = max_sge;
920 num_sge -= max_sge; 912 num_sge -= max_sge;
921 } else { 913 } else {
922 send->s_wr.num_sge = num_sge; 914 send->s_rdma_wr.wr.num_sge = num_sge;
923 } 915 }
924 916
925 send->s_wr.next = NULL; 917 send->s_rdma_wr.wr.next = NULL;
926 918
927 if (prev) 919 if (prev)
928 prev->s_wr.next = &send->s_wr; 920 prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
929 921
930 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { 922 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
923 scat != &op->op_sg[op->op_count]; j++) {
931 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 924 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
932 send->s_sge[j].addr = 925 send->s_sge[j].addr =
933 ib_sg_dma_address(ic->i_cm_id->device, scat); 926 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -942,7 +935,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
942 } 935 }
943 936
944 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 937 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
945 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 938 &send->s_rdma_wr.wr,
939 send->s_rdma_wr.wr.num_sge,
940 send->s_rdma_wr.wr.next);
946 941
947 prev = send; 942 prev = send;
948 if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) 943 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -963,11 +958,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
963 if (nr_sig) 958 if (nr_sig)
964 atomic_add(nr_sig, &ic->i_signaled_sends); 959 atomic_add(nr_sig, &ic->i_signaled_sends);
965 960
966 failed_wr = &first->s_wr; 961 failed_wr = &first->s_rdma_wr.wr;
967 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 962 ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
968 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 963 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
969 first, &first->s_wr, ret, failed_wr); 964 first, &first->s_rdma_wr.wr, ret, failed_wr);
970 BUG_ON(failed_wr != &first->s_wr); 965 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
971 if (ret) { 966 if (ret) {
972 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " 967 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
973 "returned %d\n", &conn->c_faddr, ret); 968 "returned %d\n", &conn->c_faddr, ret);
@@ -976,9 +971,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
976 goto out; 971 goto out;
977 } 972 }
978 973
979 if (unlikely(failed_wr != &first->s_wr)) { 974 if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
980 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret); 975 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
981 BUG_ON(failed_wr != &first->s_wr); 976 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
982 } 977 }
983 978
984 979
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index 2d5965d6e97c..d77e04473056 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -42,14 +42,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
42static const char *const rds_ib_stat_names[] = { 42static const char *const rds_ib_stat_names[] = {
43 "ib_connect_raced", 43 "ib_connect_raced",
44 "ib_listen_closed_stale", 44 "ib_listen_closed_stale",
45 "ib_tx_cq_call", 45 "s_ib_evt_handler_call",
46 "ib_tasklet_call",
46 "ib_tx_cq_event", 47 "ib_tx_cq_event",
47 "ib_tx_ring_full", 48 "ib_tx_ring_full",
48 "ib_tx_throttle", 49 "ib_tx_throttle",
49 "ib_tx_sg_mapping_failure", 50 "ib_tx_sg_mapping_failure",
50 "ib_tx_stalled", 51 "ib_tx_stalled",
51 "ib_tx_credit_updates", 52 "ib_tx_credit_updates",
52 "ib_rx_cq_call",
53 "ib_rx_cq_event", 53 "ib_rx_cq_event",
54 "ib_rx_ring_empty", 54 "ib_rx_ring_empty",
55 "ib_rx_refill_from_cq", 55 "ib_rx_refill_from_cq",
@@ -61,12 +61,18 @@ static const char *const rds_ib_stat_names[] = {
61 "ib_ack_send_delayed", 61 "ib_ack_send_delayed",
62 "ib_ack_send_piggybacked", 62 "ib_ack_send_piggybacked",
63 "ib_ack_received", 63 "ib_ack_received",
64 "ib_rdma_mr_alloc", 64 "ib_rdma_mr_8k_alloc",
65 "ib_rdma_mr_free", 65 "ib_rdma_mr_8k_free",
66 "ib_rdma_mr_used", 66 "ib_rdma_mr_8k_used",
67 "ib_rdma_mr_pool_flush", 67 "ib_rdma_mr_8k_pool_flush",
68 "ib_rdma_mr_pool_wait", 68 "ib_rdma_mr_8k_pool_wait",
69 "ib_rdma_mr_pool_depleted", 69 "ib_rdma_mr_8k_pool_depleted",
70 "ib_rdma_mr_1m_alloc",
71 "ib_rdma_mr_1m_free",
72 "ib_rdma_mr_1m_used",
73 "ib_rdma_mr_1m_pool_flush",
74 "ib_rdma_mr_1m_pool_wait",
75 "ib_rdma_mr_1m_pool_depleted",
70 "ib_atomic_cswp", 76 "ib_atomic_cswp",
71 "ib_atomic_fadd", 77 "ib_atomic_fadd",
72}; 78};
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 3df0295c6659..576f1825fc55 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr)
223 /* Create a CMA ID and try to bind it. This catches both 223 /* Create a CMA ID and try to bind it. This catches both
224 * IB and iWARP capable NICs. 224 * IB and iWARP capable NICs.
225 */ 225 */
226 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 226 cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
227 if (IS_ERR(cm_id)) 227 if (IS_ERR(cm_id))
228 return PTR_ERR(cm_id); 228 return PTR_ERR(cm_id);
229 229
diff --git a/net/rds/iw.h b/net/rds/iw.h
index cbe6674e31ee..5af01d1758b3 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -74,10 +74,13 @@ struct rds_iw_send_work {
74 struct rm_rdma_op *s_op; 74 struct rm_rdma_op *s_op;
75 struct rds_iw_mapping *s_mapping; 75 struct rds_iw_mapping *s_mapping;
76 struct ib_mr *s_mr; 76 struct ib_mr *s_mr;
77 struct ib_fast_reg_page_list *s_page_list;
78 unsigned char s_remap_count; 77 unsigned char s_remap_count;
79 78
80 struct ib_send_wr s_wr; 79 union {
80 struct ib_send_wr s_send_wr;
81 struct ib_rdma_wr s_rdma_wr;
82 struct ib_reg_wr s_reg_wr;
83 };
81 struct ib_sge s_sge[RDS_IW_MAX_SGE]; 84 struct ib_sge s_sge[RDS_IW_MAX_SGE];
82 unsigned long s_queued; 85 unsigned long s_queued;
83}; 86};
@@ -195,7 +198,7 @@ struct rds_iw_device {
195 198
196/* Magic WR_ID for ACKs */ 199/* Magic WR_ID for ACKs */
197#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL) 200#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
198#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL) 201#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL)
199#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL) 202#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
200 203
201struct rds_iw_statistics { 204struct rds_iw_statistics {
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a6553a6fb2bc..aea4c911bc76 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
524 524
525 /* XXX I wonder what affect the port space has */ 525 /* XXX I wonder what affect the port space has */
526 /* delegate cm event handler to rdma_transport */ 526 /* delegate cm event handler to rdma_transport */
527 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, 527 ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
528 RDMA_PS_TCP, IB_QPT_RC); 528 RDMA_PS_TCP, IB_QPT_RC);
529 if (IS_ERR(ic->i_cm_id)) { 529 if (IS_ERR(ic->i_cm_id)) {
530 ret = PTR_ERR(ic->i_cm_id); 530 ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 6a8fbd6e69e7..b09a40c1adce 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -47,7 +47,6 @@ struct rds_iw_mr {
47 struct rdma_cm_id *cm_id; 47 struct rdma_cm_id *cm_id;
48 48
49 struct ib_mr *mr; 49 struct ib_mr *mr;
50 struct ib_fast_reg_page_list *page_list;
51 50
52 struct rds_iw_mapping mapping; 51 struct rds_iw_mapping mapping;
53 unsigned char remap_count; 52 unsigned char remap_count;
@@ -75,10 +74,10 @@ struct rds_iw_mr_pool {
75 int max_pages; 74 int max_pages;
76}; 75};
77 76
78static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all); 77static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
79static void rds_iw_mr_pool_flush_worker(struct work_struct *work); 78static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
80static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); 79static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
81static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, 80static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
82 struct rds_iw_mr *ibmr, 81 struct rds_iw_mr *ibmr,
83 struct scatterlist *sg, unsigned int nents); 82 struct scatterlist *sg, unsigned int nents);
84static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); 83static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
@@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
258 sg->bytes = 0; 257 sg->bytes = 0;
259} 258}
260 259
261static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, 260static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
262 struct rds_iw_scatterlist *sg) 261 struct rds_iw_scatterlist *sg)
263{ 262{
264 struct ib_device *dev = rds_iwdev->dev; 263 struct ib_device *dev = rds_iwdev->dev;
265 u64 *dma_pages = NULL; 264 int i, ret;
266 int i, j, ret;
267 265
268 WARN_ON(sg->dma_len); 266 WARN_ON(sg->dma_len);
269 267
270 sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL); 268 sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
271 if (unlikely(!sg->dma_len)) { 269 if (unlikely(!sg->dma_len)) {
272 printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n"); 270 printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
273 return ERR_PTR(-EBUSY); 271 return -EBUSY;
274 } 272 }
275 273
276 sg->bytes = 0; 274 sg->bytes = 0;
@@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
303 if (sg->dma_npages > fastreg_message_size) 301 if (sg->dma_npages > fastreg_message_size)
304 goto out_unmap; 302 goto out_unmap;
305 303
306 dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
307 if (!dma_pages) {
308 ret = -ENOMEM;
309 goto out_unmap;
310 }
311
312 for (i = j = 0; i < sg->dma_len; ++i) {
313 unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
314 u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
315 u64 end_addr;
316 304
317 end_addr = dma_addr + dma_len;
318 dma_addr &= ~PAGE_MASK;
319 for (; dma_addr < end_addr; dma_addr += PAGE_SIZE)
320 dma_pages[j++] = dma_addr;
321 BUG_ON(j > sg->dma_npages);
322 }
323 305
324 return dma_pages; 306 return 0;
325 307
326out_unmap: 308out_unmap:
327 ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL); 309 ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
328 sg->dma_len = 0; 310 sg->dma_len = 0;
329 kfree(dma_pages); 311 return ret;
330 return ERR_PTR(ret);
331} 312}
332 313
333 314
@@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
440 INIT_LIST_HEAD(&ibmr->mapping.m_list); 421 INIT_LIST_HEAD(&ibmr->mapping.m_list);
441 ibmr->mapping.m_mr = ibmr; 422 ibmr->mapping.m_mr = ibmr;
442 423
443 err = rds_iw_init_fastreg(pool, ibmr); 424 err = rds_iw_init_reg(pool, ibmr);
444 if (err) 425 if (err)
445 goto out_no_cigar; 426 goto out_no_cigar;
446 427
@@ -479,14 +460,13 @@ void rds_iw_sync_mr(void *trans_private, int direction)
479 * If the number of MRs allocated exceeds the limit, we also try 460 * If the number of MRs allocated exceeds the limit, we also try
480 * to free as many MRs as needed to get back to this limit. 461 * to free as many MRs as needed to get back to this limit.
481 */ 462 */
482static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) 463static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
483{ 464{
484 struct rds_iw_mr *ibmr, *next; 465 struct rds_iw_mr *ibmr, *next;
485 LIST_HEAD(unmap_list); 466 LIST_HEAD(unmap_list);
486 LIST_HEAD(kill_list); 467 LIST_HEAD(kill_list);
487 unsigned long flags; 468 unsigned long flags;
488 unsigned int nfreed = 0, ncleaned = 0, unpinned = 0; 469 unsigned int nfreed = 0, ncleaned = 0, unpinned = 0;
489 int ret = 0;
490 470
491 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); 471 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
492 472
@@ -538,7 +518,6 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
538 atomic_sub(nfreed, &pool->item_count); 518 atomic_sub(nfreed, &pool->item_count);
539 519
540 mutex_unlock(&pool->flush_lock); 520 mutex_unlock(&pool->flush_lock);
541 return ret;
542} 521}
543 522
544static void rds_iw_mr_pool_flush_worker(struct work_struct *work) 523static void rds_iw_mr_pool_flush_worker(struct work_struct *work)
@@ -622,7 +601,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
622 ibmr->cm_id = cm_id; 601 ibmr->cm_id = cm_id;
623 ibmr->device = rds_iwdev; 602 ibmr->device = rds_iwdev;
624 603
625 ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents); 604 ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
626 if (ret == 0) 605 if (ret == 0)
627 *key_ret = ibmr->mr->rkey; 606 *key_ret = ibmr->mr->rkey;
628 else 607 else
@@ -638,7 +617,7 @@ out:
638} 617}
639 618
640/* 619/*
641 * iWARP fastreg handling 620 * iWARP reg handling
642 * 621 *
643 * The life cycle of a fastreg registration is a bit different from 622 * The life cycle of a fastreg registration is a bit different from
644 * FMRs. 623 * FMRs.
@@ -650,7 +629,7 @@ out:
650 * This creates a bit of a problem for us, as we do not have the destination 629 * This creates a bit of a problem for us, as we do not have the destination
651 * IP in GET_MR, so the connection must be setup prior to the GET_MR call for 630 * IP in GET_MR, so the connection must be setup prior to the GET_MR call for
652 * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit 631 * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
653 * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request 632 * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
654 * before queuing the SEND. When completions for these arrive, they are 633 * before queuing the SEND. When completions for these arrive, they are
655 * dispatched to the MR has a bit set showing that RDMa can be performed. 634 * dispatched to the MR has a bit set showing that RDMa can be performed.
656 * 635 *
@@ -659,11 +638,10 @@ out:
659 * The expectation there is that this invalidation step includes ALL 638 * The expectation there is that this invalidation step includes ALL
660 * PREVIOUSLY FREED MRs. 639 * PREVIOUSLY FREED MRs.
661 */ 640 */
662static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, 641static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
663 struct rds_iw_mr *ibmr) 642 struct rds_iw_mr *ibmr)
664{ 643{
665 struct rds_iw_device *rds_iwdev = pool->device; 644 struct rds_iw_device *rds_iwdev = pool->device;
666 struct ib_fast_reg_page_list *page_list = NULL;
667 struct ib_mr *mr; 645 struct ib_mr *mr;
668 int err; 646 int err;
669 647
@@ -676,55 +654,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
676 return err; 654 return err;
677 } 655 }
678 656
679 /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
680 * is not filled in.
681 */
682 page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
683 if (IS_ERR(page_list)) {
684 err = PTR_ERR(page_list);
685
686 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
687 ib_dereg_mr(mr);
688 return err;
689 }
690
691 ibmr->page_list = page_list;
692 ibmr->mr = mr; 657 ibmr->mr = mr;
693 return 0; 658 return 0;
694} 659}
695 660
696static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) 661static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
697{ 662{
698 struct rds_iw_mr *ibmr = mapping->m_mr; 663 struct rds_iw_mr *ibmr = mapping->m_mr;
699 struct ib_send_wr f_wr, *failed_wr; 664 struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
700 int ret; 665 struct ib_reg_wr reg_wr;
666 struct ib_send_wr *failed_wr;
667 int ret, n;
668
669 n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
670 if (unlikely(n != m_sg->len))
671 return n < 0 ? n : -EINVAL;
672
673 reg_wr.wr.next = NULL;
674 reg_wr.wr.opcode = IB_WR_REG_MR;
675 reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
676 reg_wr.wr.num_sge = 0;
677 reg_wr.mr = ibmr->mr;
678 reg_wr.key = mapping->m_rkey;
679 reg_wr.access = IB_ACCESS_LOCAL_WRITE |
680 IB_ACCESS_REMOTE_READ |
681 IB_ACCESS_REMOTE_WRITE;
701 682
702 /* 683 /*
703 * Perform a WR for the fast_reg_mr. Each individual page 684 * Perform a WR for the reg_mr. Each individual page
704 * in the sg list is added to the fast reg page list and placed 685 * in the sg list is added to the fast reg page list and placed
705 * inside the fast_reg_mr WR. The key used is a rolling 8bit 686 * inside the reg_mr WR. The key used is a rolling 8bit
706 * counter, which should guarantee uniqueness. 687 * counter, which should guarantee uniqueness.
707 */ 688 */
708 ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++); 689 ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
709 mapping->m_rkey = ibmr->mr->rkey; 690 mapping->m_rkey = ibmr->mr->rkey;
710 691
711 memset(&f_wr, 0, sizeof(f_wr)); 692 failed_wr = &reg_wr.wr;
712 f_wr.wr_id = RDS_IW_FAST_REG_WR_ID; 693 ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
713 f_wr.opcode = IB_WR_FAST_REG_MR; 694 BUG_ON(failed_wr != &reg_wr.wr);
714 f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
715 f_wr.wr.fast_reg.rkey = mapping->m_rkey;
716 f_wr.wr.fast_reg.page_list = ibmr->page_list;
717 f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
718 f_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
719 f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
720 IB_ACCESS_REMOTE_READ |
721 IB_ACCESS_REMOTE_WRITE;
722 f_wr.wr.fast_reg.iova_start = 0;
723 f_wr.send_flags = IB_SEND_SIGNALED;
724
725 failed_wr = &f_wr;
726 ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
727 BUG_ON(failed_wr != &f_wr);
728 if (ret) 695 if (ret)
729 printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", 696 printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
730 __func__, __LINE__, ret); 697 __func__, __LINE__, ret);
@@ -756,21 +723,20 @@ out:
756 return ret; 723 return ret;
757} 724}
758 725
759static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, 726static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
760 struct rds_iw_mr *ibmr, 727 struct rds_iw_mr *ibmr,
761 struct scatterlist *sg, 728 struct scatterlist *sg,
762 unsigned int sg_len) 729 unsigned int sg_len)
763{ 730{
764 struct rds_iw_device *rds_iwdev = pool->device; 731 struct rds_iw_device *rds_iwdev = pool->device;
765 struct rds_iw_mapping *mapping = &ibmr->mapping; 732 struct rds_iw_mapping *mapping = &ibmr->mapping;
766 u64 *dma_pages; 733 u64 *dma_pages;
767 int i, ret = 0; 734 int ret = 0;
768 735
769 rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len); 736 rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
770 737
771 dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg); 738 ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
772 if (IS_ERR(dma_pages)) { 739 if (ret) {
773 ret = PTR_ERR(dma_pages);
774 dma_pages = NULL; 740 dma_pages = NULL;
775 goto out; 741 goto out;
776 } 742 }
@@ -780,10 +746,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
780 goto out; 746 goto out;
781 } 747 }
782 748
783 for (i = 0; i < mapping->m_sg.dma_npages; ++i) 749 ret = rds_iw_rdma_reg_mr(mapping);
784 ibmr->page_list->page_list[i] = dma_pages[i];
785
786 ret = rds_iw_rdma_build_fastreg(mapping);
787 if (ret) 750 if (ret)
788 goto out; 751 goto out;
789 752
@@ -869,8 +832,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
869static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, 832static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
870 struct rds_iw_mr *ibmr) 833 struct rds_iw_mr *ibmr)
871{ 834{
872 if (ibmr->page_list)
873 ib_free_fast_reg_page_list(ibmr->page_list);
874 if (ibmr->mr) 835 if (ibmr->mr)
875 ib_dereg_mr(ibmr->mr); 836 ib_dereg_mr(ibmr->mr);
876} 837}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 86152ec3b887..e20bd503f4bd 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
137 send->s_op = NULL; 137 send->s_op = NULL;
138 send->s_mapping = NULL; 138 send->s_mapping = NULL;
139 139
140 send->s_wr.next = NULL; 140 send->s_send_wr.next = NULL;
141 send->s_wr.wr_id = i; 141 send->s_send_wr.wr_id = i;
142 send->s_wr.sg_list = send->s_sge; 142 send->s_send_wr.sg_list = send->s_sge;
143 send->s_wr.num_sge = 1; 143 send->s_send_wr.num_sge = 1;
144 send->s_wr.opcode = IB_WR_SEND; 144 send->s_send_wr.opcode = IB_WR_SEND;
145 send->s_wr.send_flags = 0; 145 send->s_send_wr.send_flags = 0;
146 send->s_wr.ex.imm_data = 0; 146 send->s_send_wr.ex.imm_data = 0;
147 147
148 sge = rds_iw_data_sge(ic, send->s_sge); 148 sge = rds_iw_data_sge(ic, send->s_sge);
149 sge->lkey = 0; 149 sge->lkey = 0;
@@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
159 printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n"); 159 printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
160 break; 160 break;
161 } 161 }
162
163 send->s_page_list = ib_alloc_fast_reg_page_list(
164 ic->i_cm_id->device, fastreg_message_size);
165 if (IS_ERR(send->s_page_list)) {
166 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
167 break;
168 }
169 } 162 }
170} 163}
171 164
@@ -177,9 +170,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
177 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 170 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
178 BUG_ON(!send->s_mr); 171 BUG_ON(!send->s_mr);
179 ib_dereg_mr(send->s_mr); 172 ib_dereg_mr(send->s_mr);
180 BUG_ON(!send->s_page_list); 173 if (send->s_send_wr.opcode == 0xdead)
181 ib_free_fast_reg_page_list(send->s_page_list);
182 if (send->s_wr.opcode == 0xdead)
183 continue; 174 continue;
184 if (send->s_rm) 175 if (send->s_rm)
185 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); 176 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
@@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
227 continue; 218 continue;
228 } 219 }
229 220
230 if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) { 221 if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
231 ic->i_fastreg_posted = 1; 222 ic->i_fastreg_posted = 1;
232 continue; 223 continue;
233 } 224 }
@@ -247,12 +238,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
247 send = &ic->i_sends[oldest]; 238 send = &ic->i_sends[oldest];
248 239
249 /* In the error case, wc.opcode sometimes contains garbage */ 240 /* In the error case, wc.opcode sometimes contains garbage */
250 switch (send->s_wr.opcode) { 241 switch (send->s_send_wr.opcode) {
251 case IB_WR_SEND: 242 case IB_WR_SEND:
252 if (send->s_rm) 243 if (send->s_rm)
253 rds_iw_send_unmap_rm(ic, send, wc.status); 244 rds_iw_send_unmap_rm(ic, send, wc.status);
254 break; 245 break;
255 case IB_WR_FAST_REG_MR: 246 case IB_WR_REG_MR:
256 case IB_WR_RDMA_WRITE: 247 case IB_WR_RDMA_WRITE:
257 case IB_WR_RDMA_READ: 248 case IB_WR_RDMA_READ:
258 case IB_WR_RDMA_READ_WITH_INV: 249 case IB_WR_RDMA_READ_WITH_INV:
@@ -262,12 +253,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
262 default: 253 default:
263 printk_ratelimited(KERN_NOTICE 254 printk_ratelimited(KERN_NOTICE
264 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", 255 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
265 __func__, send->s_wr.opcode); 256 __func__, send->s_send_wr.opcode);
266 break; 257 break;
267 } 258 }
268 259
269 send->s_wr.opcode = 0xdead; 260 send->s_send_wr.opcode = 0xdead;
270 send->s_wr.num_sge = 1; 261 send->s_send_wr.num_sge = 1;
271 if (time_after(jiffies, send->s_queued + HZ/2)) 262 if (time_after(jiffies, send->s_queued + HZ/2))
272 rds_iw_stats_inc(s_iw_tx_stalled); 263 rds_iw_stats_inc(s_iw_tx_stalled);
273 264
@@ -455,10 +446,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
455 446
456 WARN_ON(pos != send - ic->i_sends); 447 WARN_ON(pos != send - ic->i_sends);
457 448
458 send->s_wr.send_flags = send_flags; 449 send->s_send_wr.send_flags = send_flags;
459 send->s_wr.opcode = IB_WR_SEND; 450 send->s_send_wr.opcode = IB_WR_SEND;
460 send->s_wr.num_sge = 2; 451 send->s_send_wr.num_sge = 2;
461 send->s_wr.next = NULL; 452 send->s_send_wr.next = NULL;
462 send->s_queued = jiffies; 453 send->s_queued = jiffies;
463 send->s_op = NULL; 454 send->s_op = NULL;
464 455
@@ -472,7 +463,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
472 } else { 463 } else {
473 /* We're sending a packet with no payload. There is only 464 /* We're sending a packet with no payload. There is only
474 * one SGE */ 465 * one SGE */
475 send->s_wr.num_sge = 1; 466 send->s_send_wr.num_sge = 1;
476 sge = &send->s_sge[0]; 467 sge = &send->s_sge[0];
477 } 468 }
478 469
@@ -672,23 +663,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
672 */ 663 */
673 if (ic->i_unsignaled_wrs-- == 0) { 664 if (ic->i_unsignaled_wrs-- == 0) {
674 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 665 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
675 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 666 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
676 } 667 }
677 668
678 ic->i_unsignaled_bytes -= len; 669 ic->i_unsignaled_bytes -= len;
679 if (ic->i_unsignaled_bytes <= 0) { 670 if (ic->i_unsignaled_bytes <= 0) {
680 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; 671 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
681 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 672 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
682 } 673 }
683 674
684 /* 675 /*
685 * Always signal the last one if we're stopping due to flow control. 676 * Always signal the last one if we're stopping due to flow control.
686 */ 677 */
687 if (flow_controlled && i == (work_alloc-1)) 678 if (flow_controlled && i == (work_alloc-1))
688 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 679 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
689 680
690 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 681 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
691 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 682 &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
692 683
693 sent += len; 684 sent += len;
694 rm->data.op_dmaoff += len; 685 rm->data.op_dmaoff += len;
@@ -722,7 +713,7 @@ add_header:
722 } 713 }
723 714
724 if (prev) 715 if (prev)
725 prev->s_wr.next = &send->s_wr; 716 prev->s_send_wr.next = &send->s_send_wr;
726 prev = send; 717 prev = send;
727 718
728 pos = (pos + 1) % ic->i_send_ring.w_nr; 719 pos = (pos + 1) % ic->i_send_ring.w_nr;
@@ -736,7 +727,7 @@ add_header:
736 /* if we finished the message then send completion owns it */ 727 /* if we finished the message then send completion owns it */
737 if (scat == &rm->data.op_sg[rm->data.op_count]) { 728 if (scat == &rm->data.op_sg[rm->data.op_count]) {
738 prev->s_rm = ic->i_rm; 729 prev->s_rm = ic->i_rm;
739 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 730 prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
740 ic->i_rm = NULL; 731 ic->i_rm = NULL;
741 } 732 }
742 733
@@ -748,11 +739,11 @@ add_header:
748 rds_iw_send_add_credits(conn, credit_alloc - i); 739 rds_iw_send_add_credits(conn, credit_alloc - i);
749 740
750 /* XXX need to worry about failed_wr and partial sends. */ 741 /* XXX need to worry about failed_wr and partial sends. */
751 failed_wr = &first->s_wr; 742 failed_wr = &first->s_send_wr;
752 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 743 ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
753 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 744 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
754 first, &first->s_wr, ret, failed_wr); 745 first, &first->s_send_wr, ret, failed_wr);
755 BUG_ON(failed_wr != &first->s_wr); 746 BUG_ON(failed_wr != &first->s_send_wr);
756 if (ret) { 747 if (ret) {
757 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 " 748 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
758 "returned %d\n", &conn->c_faddr, ret); 749 "returned %d\n", &conn->c_faddr, ret);
@@ -770,24 +761,26 @@ out:
770 return ret; 761 return ret;
771} 762}
772 763
773static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr) 764static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
765 struct scatterlist *sg,
766 int sg_nents)
774{ 767{
775 BUG_ON(nent > send->s_page_list->max_page_list_len); 768 int n;
776 /* 769
777 * Perform a WR for the fast_reg_mr. Each individual page 770 n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
778 * in the sg list is added to the fast reg page list and placed 771 if (unlikely(n != sg_nents))
779 * inside the fast_reg_mr WR. 772 return n < 0 ? n : -EINVAL;
780 */ 773
781 send->s_wr.opcode = IB_WR_FAST_REG_MR; 774 send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
782 send->s_wr.wr.fast_reg.length = len; 775 send->s_reg_wr.wr.wr_id = 0;
783 send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey; 776 send->s_reg_wr.wr.num_sge = 0;
784 send->s_wr.wr.fast_reg.page_list = send->s_page_list; 777 send->s_reg_wr.mr = send->s_mr;
785 send->s_wr.wr.fast_reg.page_list_len = nent; 778 send->s_reg_wr.key = send->s_mr->rkey;
786 send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 779 send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
787 send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
788 send->s_wr.wr.fast_reg.iova_start = sg_addr;
789 780
790 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); 781 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
782
783 return 0;
791} 784}
792 785
793int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) 786int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
@@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
808 int sent; 801 int sent;
809 int ret; 802 int ret;
810 int num_sge; 803 int num_sge;
804 int sg_nents;
811 805
812 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); 806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
813 807
@@ -861,9 +855,10 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
861 scat = &op->op_sg[0]; 855 scat = &op->op_sg[0];
862 sent = 0; 856 sent = 0;
863 num_sge = op->op_count; 857 num_sge = op->op_count;
858 sg_nents = 0;
864 859
865 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) { 860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
866 send->s_wr.send_flags = 0; 861 send->s_rdma_wr.wr.send_flags = 0;
867 send->s_queued = jiffies; 862 send->s_queued = jiffies;
868 863
869 /* 864 /*
@@ -872,7 +867,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
872 */ 867 */
873 if (ic->i_unsignaled_wrs-- == 0) { 868 if (ic->i_unsignaled_wrs-- == 0) {
874 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 869 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
875 send->s_wr.send_flags = IB_SEND_SIGNALED; 870 send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
876 } 871 }
877 872
878 /* To avoid the need to have the plumbing to invalidate the fastreg_mr used 873 /* To avoid the need to have the plumbing to invalidate the fastreg_mr used
@@ -880,30 +875,31 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
880 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. 875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
881 */ 876 */
882 if (op->op_write) 877 if (op->op_write)
883 send->s_wr.opcode = IB_WR_RDMA_WRITE; 878 send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
884 else 879 else
885 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 880 send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
886 881
887 send->s_wr.wr.rdma.remote_addr = remote_addr; 882 send->s_rdma_wr.remote_addr = remote_addr;
888 send->s_wr.wr.rdma.rkey = op->op_rkey; 883 send->s_rdma_wr.rkey = op->op_rkey;
889 send->s_op = op; 884 send->s_op = op;
890 885
891 if (num_sge > rds_iwdev->max_sge) { 886 if (num_sge > rds_iwdev->max_sge) {
892 send->s_wr.num_sge = rds_iwdev->max_sge; 887 send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
893 num_sge -= rds_iwdev->max_sge; 888 num_sge -= rds_iwdev->max_sge;
894 } else 889 } else
895 send->s_wr.num_sge = num_sge; 890 send->s_rdma_wr.wr.num_sge = num_sge;
896 891
897 send->s_wr.next = NULL; 892 send->s_rdma_wr.wr.next = NULL;
898 893
899 if (prev) 894 if (prev)
900 prev->s_wr.next = &send->s_wr; 895 prev->s_send_wr.next = &send->s_rdma_wr.wr;
901 896
902 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { 897 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
898 scat != &op->op_sg[op->op_count]; j++) {
903 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 899 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
904 900
905 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) 901 if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
906 send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat); 902 sg_nents++;
907 else { 903 else {
908 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); 904 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
909 send->s_sge[j].length = len; 905 send->s_sge[j].length = len;
@@ -917,15 +913,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
917 scat++; 913 scat++;
918 } 914 }
919 915
920 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) { 916 if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
921 send->s_wr.num_sge = 1; 917 send->s_rdma_wr.wr.num_sge = 1;
922 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr; 918 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
923 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes; 919 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
924 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey; 920 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
925 } 921 }
926 922
927 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 923 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
928 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 924 &send->s_rdma_wr,
925 send->s_rdma_wr.wr.num_sge,
926 send->s_rdma_wr.wr.next);
929 927
930 prev = send; 928 prev = send;
931 if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) 929 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -934,7 +932,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
934 932
935 /* if we finished the message then send completion owns it */ 933 /* if we finished the message then send completion owns it */
936 if (scat == &op->op_sg[op->op_count]) 934 if (scat == &op->op_sg[op->op_count])
937 first->s_wr.send_flags = IB_SEND_SIGNALED; 935 first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
938 936
939 if (i < work_alloc) { 937 if (i < work_alloc) {
940 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i); 938 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
@@ -948,16 +946,20 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
948 * fastreg_mr (or possibly a dma_mr) 946 * fastreg_mr (or possibly a dma_mr)
949 */ 947 */
950 if (!op->op_write) { 948 if (!op->op_write) {
951 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], 949 ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
952 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); 950 &op->op_sg[0], sg_nents);
951 if (ret) {
952 printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
953 goto out;
954 }
953 work_alloc++; 955 work_alloc++;
954 } 956 }
955 957
956 failed_wr = &first->s_wr; 958 failed_wr = &first->s_rdma_wr.wr;
957 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 959 ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
958 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 960 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
959 first, &first->s_wr, ret, failed_wr); 961 first, &first->s_rdma_wr, ret, failed_wr);
960 BUG_ON(failed_wr != &first->s_wr); 962 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
961 if (ret) { 963 if (ret) {
962 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 " 964 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
963 "returned %d\n", &conn->c_faddr, ret); 965 "returned %d\n", &conn->c_faddr, ret);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index b9b40af5345b..9c1fed81bf0f 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void)
142 struct rdma_cm_id *cm_id; 142 struct rdma_cm_id *cm_id;
143 int ret; 143 int ret;
144 144
145 cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, 145 cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
146 IB_QPT_RC); 146 RDMA_PS_TCP, IB_QPT_RC);
147 if (IS_ERR(cm_id)) { 147 if (IS_ERR(cm_id)) {
148 ret = PTR_ERR(cm_id); 148 ret = PTR_ERR(cm_id);
149 printk(KERN_ERR "RDS/RDMA: failed to setup listener, " 149 printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
diff --git a/net/rds/rds.h b/net/rds/rds.h
index afb4048d0cfd..0e2797bdc316 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -7,6 +7,7 @@
7#include <rdma/rdma_cm.h> 7#include <rdma/rdma_cm.h>
8#include <linux/mutex.h> 8#include <linux/mutex.h>
9#include <linux/rds.h> 9#include <linux/rds.h>
10#include <linux/rhashtable.h>
10 11
11#include "info.h" 12#include "info.h"
12 13
@@ -86,7 +87,9 @@ struct rds_connection {
86 struct hlist_node c_hash_node; 87 struct hlist_node c_hash_node;
87 __be32 c_laddr; 88 __be32 c_laddr;
88 __be32 c_faddr; 89 __be32 c_faddr;
89 unsigned int c_loopback:1; 90 unsigned int c_loopback:1,
91 c_outgoing:1,
92 c_pad_to_32:30;
90 struct rds_connection *c_passive; 93 struct rds_connection *c_passive;
91 94
92 struct rds_cong_map *c_lcong; 95 struct rds_cong_map *c_lcong;
@@ -472,7 +475,8 @@ struct rds_sock {
472 * bound_addr used for both incoming and outgoing, no INADDR_ANY 475 * bound_addr used for both incoming and outgoing, no INADDR_ANY
473 * support. 476 * support.
474 */ 477 */
475 struct hlist_node rs_bound_node; 478 struct rhash_head rs_bound_node;
479 u64 rs_bound_key;
476 __be32 rs_bound_addr; 480 __be32 rs_bound_addr;
477 __be32 rs_conn_addr; 481 __be32 rs_conn_addr;
478 __be16 rs_bound_port; 482 __be16 rs_bound_port;
@@ -603,6 +607,8 @@ extern wait_queue_head_t rds_poll_waitq;
603int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 607int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
604void rds_remove_bound(struct rds_sock *rs); 608void rds_remove_bound(struct rds_sock *rs);
605struct rds_sock *rds_find_bound(__be32 addr, __be16 port); 609struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
610int rds_bind_lock_init(void);
611void rds_bind_lock_destroy(void);
606 612
607/* cong.c */ 613/* cong.c */
608int rds_cong_get_maps(struct rds_connection *conn); 614int rds_cong_get_maps(struct rds_connection *conn);
diff --git a/net/rds/send.c b/net/rds/send.c
index 4df61a515b83..827155c2ead1 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -38,6 +38,7 @@
38#include <linux/list.h> 38#include <linux/list.h>
39#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
40#include <linux/export.h> 40#include <linux/export.h>
41#include <linux/sizes.h>
41 42
42#include "rds.h" 43#include "rds.h"
43 44
@@ -51,7 +52,7 @@
51 * it to 0 will restore the old behavior (where we looped until we had 52 * it to 0 will restore the old behavior (where we looped until we had
52 * drained the queue). 53 * drained the queue).
53 */ 54 */
54static int send_batch_count = 64; 55static int send_batch_count = SZ_1K;
55module_param(send_batch_count, int, 0444); 56module_param(send_batch_count, int, 0444);
56MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); 57MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
57 58
@@ -223,7 +224,7 @@ restart:
223 * through a lot of messages, lets back off and see 224 * through a lot of messages, lets back off and see
224 * if anyone else jumps in 225 * if anyone else jumps in
225 */ 226 */
226 if (batch_count >= 1024) 227 if (batch_count >= send_batch_count)
227 goto over_batch; 228 goto over_batch;
228 229
229 spin_lock_irqsave(&conn->c_lock, flags); 230 spin_lock_irqsave(&conn->c_lock, flags);
@@ -423,12 +424,15 @@ over_batch:
423 !list_empty(&conn->c_send_queue)) && 424 !list_empty(&conn->c_send_queue)) &&
424 send_gen == conn->c_send_gen) { 425 send_gen == conn->c_send_gen) {
425 rds_stats_inc(s_send_lock_queue_raced); 426 rds_stats_inc(s_send_lock_queue_raced);
426 goto restart; 427 if (batch_count < send_batch_count)
428 goto restart;
429 queue_delayed_work(rds_wq, &conn->c_send_w, 1);
427 } 430 }
428 } 431 }
429out: 432out:
430 return ret; 433 return ret;
431} 434}
435EXPORT_SYMBOL_GPL(rds_send_xmit);
432 436
433static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm) 437static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm)
434{ 438{
@@ -1120,8 +1124,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1120 */ 1124 */
1121 rds_stats_inc(s_send_queued); 1125 rds_stats_inc(s_send_queued);
1122 1126
1123 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) 1127 ret = rds_send_xmit(conn);
1124 rds_send_xmit(conn); 1128 if (ret == -ENOMEM || ret == -EAGAIN)
1129 queue_delayed_work(rds_wq, &conn->c_send_w, 1);
1125 1130
1126 rds_message_put(rm); 1131 rds_message_put(rm);
1127 return payload_len; 1132 return payload_len;
@@ -1177,8 +1182,8 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
1177 rds_stats_inc(s_send_queued); 1182 rds_stats_inc(s_send_queued);
1178 rds_stats_inc(s_send_pong); 1183 rds_stats_inc(s_send_pong);
1179 1184
1180 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) 1185 /* schedule the send work on rds_wq */
1181 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 1186 queue_delayed_work(rds_wq, &conn->c_send_w, 1);
1182 1187
1183 rds_message_put(rm); 1188 rds_message_put(rm);
1184 return 0; 1189 return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index c42b60bf4c68..9d6ddbacd875 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -67,21 +67,13 @@ void rds_tcp_nonagle(struct socket *sock)
67 set_fs(oldfs); 67 set_fs(oldfs);
68} 68}
69 69
70/* All module specific customizations to the RDS-TCP socket should be done in
71 * rds_tcp_tune() and applied after socket creation. In general these
72 * customizations should be tunable via module_param()
73 */
70void rds_tcp_tune(struct socket *sock) 74void rds_tcp_tune(struct socket *sock)
71{ 75{
72 struct sock *sk = sock->sk;
73
74 rds_tcp_nonagle(sock); 76 rds_tcp_nonagle(sock);
75
76 /*
77 * We're trying to saturate gigabit with the default,
78 * see svc_sock_setbufsize().
79 */
80 lock_sock(sk);
81 sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE;
82 sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE;
83 sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
84 release_sock(sk);
85} 77}
86 78
87u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) 79u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 444d78d0bd77..0936a4a32b47 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -110,28 +110,27 @@ int rds_tcp_accept_one(struct socket *sock)
110 goto out; 110 goto out;
111 } 111 }
112 /* An incoming SYN request came in, and TCP just accepted it. 112 /* An incoming SYN request came in, and TCP just accepted it.
113 * We always create a new conn for listen side of TCP, and do not
114 * add it to the c_hash_list.
115 * 113 *
116 * If the client reboots, this conn will need to be cleaned up. 114 * If the client reboots, this conn will need to be cleaned up.
117 * rds_tcp_state_change() will do that cleanup 115 * rds_tcp_state_change() will do that cleanup
118 */ 116 */
119 rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; 117 rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
120 WARN_ON(!rs_tcp || rs_tcp->t_sock); 118 if (rs_tcp->t_sock &&
121 119 ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
122 /* 120 struct sock *nsk = new_sock->sk;
123 * see the comment above rds_queue_delayed_reconnect() 121
124 */ 122 nsk->sk_user_data = NULL;
125 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { 123 nsk->sk_prot->disconnect(nsk, 0);
126 if (rds_conn_state(conn) == RDS_CONN_UP) 124 tcp_done(nsk);
127 rds_tcp_stats_inc(s_tcp_listen_closed_stale); 125 new_sock = NULL;
128 else
129 rds_tcp_stats_inc(s_tcp_connect_raced);
130 rds_conn_drop(conn);
131 ret = 0; 126 ret = 0;
132 goto out; 127 goto out;
128 } else if (rs_tcp->t_sock) {
129 rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
130 conn->c_outgoing = 0;
133 } 131 }
134 132
133 rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
135 rds_tcp_set_callbacks(new_sock, conn); 134 rds_tcp_set_callbacks(new_sock, conn);
136 rds_connect_complete(conn); 135 rds_connect_complete(conn);
137 new_sock = NULL; 136 new_sock = NULL;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index fbc5ef88bc0e..27a992154804 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -214,8 +214,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
214 } 214 }
215 215
216 to_copy = min(tc->t_tinc_data_rem, left); 216 to_copy = min(tc->t_tinc_data_rem, left);
217 pskb_pull(clone, offset); 217 if (!pskb_pull(clone, offset) ||
218 pskb_trim(clone, to_copy); 218 pskb_trim(clone, to_copy)) {
219 pr_warn("rds_tcp_data_recv: pull/trim failed "
220 "left %zu data_rem %zu skb_len %d\n",
221 left, tc->t_tinc_data_rem, skb->len);
222 kfree_skb(clone);
223 desc->error = -ENOMEM;
224 goto out;
225 }
219 skb_queue_tail(&tinc->ti_skb_list, clone); 226 skb_queue_tail(&tinc->ti_skb_list, clone);
220 227
221 rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " 228 rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 53b17ca0dff5..2894e6095e3b 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -83,6 +83,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
83 struct rds_tcp_connection *tc = conn->c_transport_data; 83 struct rds_tcp_connection *tc = conn->c_transport_data;
84 int done = 0; 84 int done = 0;
85 int ret = 0; 85 int ret = 0;
86 int more;
86 87
87 if (hdr_off == 0) { 88 if (hdr_off == 0) {
88 /* 89 /*
@@ -116,12 +117,15 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
116 goto out; 117 goto out;
117 } 118 }
118 119
120 more = rm->data.op_nents > 1 ? (MSG_MORE | MSG_SENDPAGE_NOTLAST) : 0;
119 while (sg < rm->data.op_nents) { 121 while (sg < rm->data.op_nents) {
122 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
123
120 ret = tc->t_sock->ops->sendpage(tc->t_sock, 124 ret = tc->t_sock->ops->sendpage(tc->t_sock,
121 sg_page(&rm->data.op_sg[sg]), 125 sg_page(&rm->data.op_sg[sg]),
122 rm->data.op_sg[sg].offset + off, 126 rm->data.op_sg[sg].offset + off,
123 rm->data.op_sg[sg].length - off, 127 rm->data.op_sg[sg].length - off,
124 MSG_DONTWAIT|MSG_NOSIGNAL); 128 flags);
125 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]), 129 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
126 rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off, 130 rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
127 ret); 131 ret);
@@ -134,6 +138,8 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
134 off = 0; 138 off = 0;
135 sg++; 139 sg++;
136 } 140 }
141 if (sg == rm->data.op_nents - 1)
142 more = 0;
137 } 143 }
138 144
139out: 145out:
diff --git a/net/rds/threads.c b/net/rds/threads.c
index dc2402e871fd..454aa6d23327 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -162,7 +162,9 @@ void rds_send_worker(struct work_struct *work)
162 int ret; 162 int ret;
163 163
164 if (rds_conn_state(conn) == RDS_CONN_UP) { 164 if (rds_conn_state(conn) == RDS_CONN_UP) {
165 clear_bit(RDS_LL_SEND_FULL, &conn->c_flags);
165 ret = rds_send_xmit(conn); 166 ret = rds_send_xmit(conn);
167 cond_resched();
166 rdsdebug("conn %p ret %d\n", conn, ret); 168 rdsdebug("conn %p ret %d\n", conn, ret);
167 switch (ret) { 169 switch (ret) {
168 case -EAGAIN: 170 case -EAGAIN:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 25d60ed15284..1f8a144a5dc2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -305,7 +305,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
305 305
306 if (!key) 306 if (!key)
307 key = rx->key; 307 key = rx->key;
308 if (key && !key->payload.data) 308 if (key && !key->payload.data[0])
309 key = NULL; /* a no-security key */ 309 key = NULL; /* a no-security key */
310 310
311 bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp); 311 bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp);
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 6631f4f1e39b..6c71ed1caf16 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -500,7 +500,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
500 if (bundle->num_conns >= 20) { 500 if (bundle->num_conns >= 20) {
501 _debug("too many conns"); 501 _debug("too many conns");
502 502
503 if (!(gfp & __GFP_WAIT)) { 503 if (!gfpflags_allow_blocking(gfp)) {
504 _leave(" = -EAGAIN"); 504 _leave(" = -EAGAIN");
505 return -EAGAIN; 505 return -EAGAIN;
506 } 506 }
@@ -808,7 +808,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
808 808
809 ASSERTCMP(atomic_read(&conn->usage), >, 0); 809 ASSERTCMP(atomic_read(&conn->usage), >, 0);
810 810
811 conn->put_time = get_seconds(); 811 conn->put_time = ktime_get_seconds();
812 if (atomic_dec_and_test(&conn->usage)) { 812 if (atomic_dec_and_test(&conn->usage)) {
813 _debug("zombie"); 813 _debug("zombie");
814 rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); 814 rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
@@ -852,7 +852,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
852 852
853 _enter(""); 853 _enter("");
854 854
855 now = get_seconds(); 855 now = ktime_get_seconds();
856 earliest = ULONG_MAX; 856 earliest = ULONG_MAX;
857 857
858 write_lock_bh(&rxrpc_connection_lock); 858 write_lock_bh(&rxrpc_connection_lock);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index aef1bd294e17..2934a73a5981 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -208,7 +208,7 @@ struct rxrpc_transport {
208 struct rb_root server_conns; /* server connections on this transport */ 208 struct rb_root server_conns; /* server connections on this transport */
209 struct list_head link; /* link in master session list */ 209 struct list_head link; /* link in master session list */
210 struct sk_buff_head error_queue; /* error packets awaiting processing */ 210 struct sk_buff_head error_queue; /* error packets awaiting processing */
211 time_t put_time; /* time at which to reap */ 211 unsigned long put_time; /* time at which to reap */
212 spinlock_t client_lock; /* client connection allocation lock */ 212 spinlock_t client_lock; /* client connection allocation lock */
213 rwlock_t conn_lock; /* lock for active/dead connections */ 213 rwlock_t conn_lock; /* lock for active/dead connections */
214 atomic_t usage; 214 atomic_t usage;
@@ -256,7 +256,7 @@ struct rxrpc_connection {
256 struct rxrpc_crypt csum_iv; /* packet checksum base */ 256 struct rxrpc_crypt csum_iv; /* packet checksum base */
257 unsigned long events; 257 unsigned long events;
258#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */ 258#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
259 time_t put_time; /* time at which to reap */ 259 unsigned long put_time; /* time at which to reap */
260 rwlock_t lock; /* access lock */ 260 rwlock_t lock; /* access lock */
261 spinlock_t state_lock; /* state-change lock */ 261 spinlock_t state_lock; /* state-change lock */
262 atomic_t usage; 262 atomic_t usage;
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index db0f39f5ef96..da3cc09f683e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -148,10 +148,10 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
148 token->kad->ticket[6], token->kad->ticket[7]); 148 token->kad->ticket[6], token->kad->ticket[7]);
149 149
150 /* count the number of tokens attached */ 150 /* count the number of tokens attached */
151 prep->type_data[0] = (void *)((unsigned long)prep->type_data[0] + 1); 151 prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
152 152
153 /* attach the data */ 153 /* attach the data */
154 for (pptoken = (struct rxrpc_key_token **)&prep->payload[0]; 154 for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0];
155 *pptoken; 155 *pptoken;
156 pptoken = &(*pptoken)->next) 156 pptoken = &(*pptoken)->next)
157 continue; 157 continue;
@@ -522,7 +522,7 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
522 goto inval; 522 goto inval;
523 523
524 /* attach the payload */ 524 /* attach the payload */
525 for (pptoken = (struct rxrpc_key_token **)&prep->payload[0]; 525 for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0];
526 *pptoken; 526 *pptoken;
527 pptoken = &(*pptoken)->next) 527 pptoken = &(*pptoken)->next)
528 continue; 528 continue;
@@ -764,10 +764,10 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
764 memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length); 764 memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length);
765 765
766 /* count the number of tokens attached */ 766 /* count the number of tokens attached */
767 prep->type_data[0] = (void *)((unsigned long)prep->type_data[0] + 1); 767 prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
768 768
769 /* attach the data */ 769 /* attach the data */
770 pp = (struct rxrpc_key_token **)&prep->payload[0]; 770 pp = (struct rxrpc_key_token **)&prep->payload.data[0];
771 while (*pp) 771 while (*pp)
772 pp = &(*pp)->next; 772 pp = &(*pp)->next;
773 *pp = token; 773 *pp = token;
@@ -814,7 +814,7 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token)
814 */ 814 */
815static void rxrpc_free_preparse(struct key_preparsed_payload *prep) 815static void rxrpc_free_preparse(struct key_preparsed_payload *prep)
816{ 816{
817 rxrpc_free_token_list(prep->payload[0]); 817 rxrpc_free_token_list(prep->payload.data[0]);
818} 818}
819 819
820/* 820/*
@@ -831,7 +831,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
831 if (prep->datalen != 8) 831 if (prep->datalen != 8)
832 return -EINVAL; 832 return -EINVAL;
833 833
834 memcpy(&prep->type_data, prep->data, 8); 834 memcpy(&prep->payload.data[2], prep->data, 8);
835 835
836 ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); 836 ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
837 if (IS_ERR(ci)) { 837 if (IS_ERR(ci)) {
@@ -842,7 +842,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
842 if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0) 842 if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0)
843 BUG(); 843 BUG();
844 844
845 prep->payload[0] = ci; 845 prep->payload.data[0] = ci;
846 _leave(" = 0"); 846 _leave(" = 0");
847 return 0; 847 return 0;
848} 848}
@@ -852,8 +852,8 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
852 */ 852 */
853static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) 853static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
854{ 854{
855 if (prep->payload[0]) 855 if (prep->payload.data[0])
856 crypto_free_blkcipher(prep->payload[0]); 856 crypto_free_blkcipher(prep->payload.data[0]);
857} 857}
858 858
859/* 859/*
@@ -861,7 +861,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
861 */ 861 */
862static void rxrpc_destroy(struct key *key) 862static void rxrpc_destroy(struct key *key)
863{ 863{
864 rxrpc_free_token_list(key->payload.data); 864 rxrpc_free_token_list(key->payload.data[0]);
865} 865}
866 866
867/* 867/*
@@ -869,9 +869,9 @@ static void rxrpc_destroy(struct key *key)
869 */ 869 */
870static void rxrpc_destroy_s(struct key *key) 870static void rxrpc_destroy_s(struct key *key)
871{ 871{
872 if (key->payload.data) { 872 if (key->payload.data[0]) {
873 crypto_free_blkcipher(key->payload.data); 873 crypto_free_blkcipher(key->payload.data[0]);
874 key->payload.data = NULL; 874 key->payload.data[0] = NULL;
875 } 875 }
876} 876}
877 877
@@ -1070,7 +1070,7 @@ static long rxrpc_read(const struct key *key,
1070 size += 1 * 4; /* token count */ 1070 size += 1 * 4; /* token count */
1071 1071
1072 ntoks = 0; 1072 ntoks = 0;
1073 for (token = key->payload.data; token; token = token->next) { 1073 for (token = key->payload.data[0]; token; token = token->next) {
1074 toksize = 4; /* sec index */ 1074 toksize = 4; /* sec index */
1075 1075
1076 switch (token->security_index) { 1076 switch (token->security_index) {
@@ -1163,7 +1163,7 @@ static long rxrpc_read(const struct key *key,
1163 ENCODE(ntoks); 1163 ENCODE(ntoks);
1164 1164
1165 tok = 0; 1165 tok = 0;
1166 for (token = key->payload.data; token; token = token->next) { 1166 for (token = key->payload.data[0]; token; token = token->next) {
1167 toksize = toksizes[tok++]; 1167 toksize = toksizes[tok++];
1168 ENCODE(toksize); 1168 ENCODE(toksize);
1169 oldxdr = xdr; 1169 oldxdr = xdr;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index c0042807bfc6..a40d3afe93b7 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -158,7 +158,7 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
158 service_id = htons(srx->srx_service); 158 service_id = htons(srx->srx_service);
159 } 159 }
160 key = rx->key; 160 key = rx->key;
161 if (key && !rx->key->payload.data) 161 if (key && !rx->key->payload.data[0])
162 key = NULL; 162 key = NULL;
163 bundle = rxrpc_get_bundle(rx, trans, key, service_id, 163 bundle = rxrpc_get_bundle(rx, trans, key, service_id,
164 GFP_KERNEL); 164 GFP_KERNEL);
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
index 49b3cc31ee1f..8334474eb26c 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/ar-security.c
@@ -137,9 +137,9 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
137 if (ret < 0) 137 if (ret < 0)
138 return ret; 138 return ret;
139 139
140 if (!key->payload.data) 140 token = key->payload.data[0];
141 if (!token)
141 return -EKEYREJECTED; 142 return -EKEYREJECTED;
142 token = key->payload.data;
143 143
144 sec = rxrpc_security_lookup(token->security_index); 144 sec = rxrpc_security_lookup(token->security_index);
145 if (!sec) 145 if (!sec)
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 1976dec84f29..9946467f16b4 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -189,7 +189,7 @@ void rxrpc_put_transport(struct rxrpc_transport *trans)
189 189
190 ASSERTCMP(atomic_read(&trans->usage), >, 0); 190 ASSERTCMP(atomic_read(&trans->usage), >, 0);
191 191
192 trans->put_time = get_seconds(); 192 trans->put_time = ktime_get_seconds();
193 if (unlikely(atomic_dec_and_test(&trans->usage))) { 193 if (unlikely(atomic_dec_and_test(&trans->usage))) {
194 _debug("zombie"); 194 _debug("zombie");
195 /* let the reaper determine the timeout to avoid a race with 195 /* let the reaper determine the timeout to avoid a race with
@@ -226,7 +226,7 @@ static void rxrpc_transport_reaper(struct work_struct *work)
226 226
227 _enter(""); 227 _enter("");
228 228
229 now = get_seconds(); 229 now = ktime_get_seconds();
230 earliest = ULONG_MAX; 230 earliest = ULONG_MAX;
231 231
232 /* extract all the transports that have been dead too long */ 232 /* extract all the transports that have been dead too long */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index f226709ebd8f..d7a9ab5a9d9c 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -67,7 +67,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
67 67
68 _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key)); 68 _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
69 69
70 token = conn->key->payload.data; 70 token = conn->key->payload.data[0];
71 conn->security_ix = token->security_index; 71 conn->security_ix = token->security_index;
72 72
73 ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); 73 ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
@@ -125,7 +125,7 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
125 if (!conn->key) 125 if (!conn->key)
126 return; 126 return;
127 127
128 token = conn->key->payload.data; 128 token = conn->key->payload.data[0];
129 memcpy(&iv, token->kad->session_key, sizeof(iv)); 129 memcpy(&iv, token->kad->session_key, sizeof(iv));
130 130
131 desc.tfm = conn->cipher; 131 desc.tfm = conn->cipher;
@@ -221,7 +221,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
221 rxkhdr.checksum = 0; 221 rxkhdr.checksum = 0;
222 222
223 /* encrypt from the session key */ 223 /* encrypt from the session key */
224 token = call->conn->key->payload.data; 224 token = call->conn->key->payload.data[0];
225 memcpy(&iv, token->kad->session_key, sizeof(iv)); 225 memcpy(&iv, token->kad->session_key, sizeof(iv));
226 desc.tfm = call->conn->cipher; 226 desc.tfm = call->conn->cipher;
227 desc.info = iv.x; 227 desc.info = iv.x;
@@ -433,7 +433,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
433 skb_to_sgvec(skb, sg, 0, skb->len); 433 skb_to_sgvec(skb, sg, 0, skb->len);
434 434
435 /* decrypt from the session key */ 435 /* decrypt from the session key */
436 token = call->conn->key->payload.data; 436 token = call->conn->key->payload.data[0];
437 memcpy(&iv, token->kad->session_key, sizeof(iv)); 437 memcpy(&iv, token->kad->session_key, sizeof(iv));
438 desc.tfm = call->conn->cipher; 438 desc.tfm = call->conn->cipher;
439 desc.info = iv.x; 439 desc.info = iv.x;
@@ -780,7 +780,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
780 if (conn->security_level < min_level) 780 if (conn->security_level < min_level)
781 goto protocol_error; 781 goto protocol_error;
782 782
783 token = conn->key->payload.data; 783 token = conn->key->payload.data[0];
784 784
785 /* build the response packet */ 785 /* build the response packet */
786 memset(&resp, 0, sizeof(resp)); 786 memset(&resp, 0, sizeof(resp));
@@ -848,12 +848,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
848 } 848 }
849 } 849 }
850 850
851 ASSERT(conn->server_key->payload.data != NULL); 851 ASSERT(conn->server_key->payload.data[0] != NULL);
852 ASSERTCMP((unsigned long) ticket & 7UL, ==, 0); 852 ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
853 853
854 memcpy(&iv, &conn->server_key->type_data, sizeof(iv)); 854 memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
855 855
856 desc.tfm = conn->server_key->payload.data; 856 desc.tfm = conn->server_key->payload.data[0];
857 desc.info = iv.x; 857 desc.info = iv.x;
858 desc.flags = 0; 858 desc.flags = 0;
859 859
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 559bfa011bda..0bc6f912f870 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
72 case TC_ACT_PIPE: 72 case TC_ACT_PIPE:
73 case TC_ACT_RECLASSIFY: 73 case TC_ACT_RECLASSIFY:
74 case TC_ACT_OK: 74 case TC_ACT_OK:
75 case TC_ACT_REDIRECT:
75 action = filter_res; 76 action = filter_res;
76 break; 77 break;
77 case TC_ACT_SHOT: 78 case TC_ACT_SHOT:
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 5019a47b9270..bb41699c6c49 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -68,13 +68,13 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
68 } 68 }
69 69
70 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), 70 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
71 proto, &tuple)) 71 proto, ca->net, &tuple))
72 goto out; 72 goto out;
73 73
74 zone.id = ca->zone; 74 zone.id = ca->zone;
75 zone.dir = NF_CT_DEFAULT_ZONE_DIR; 75 zone.dir = NF_CT_DEFAULT_ZONE_DIR;
76 76
77 thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple); 77 thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
78 if (!thash) 78 if (!thash)
79 goto out; 79 goto out;
80 80
@@ -119,6 +119,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
119 119
120 ci = to_connmark(a); 120 ci = to_connmark(a);
121 ci->tcf_action = parm->action; 121 ci->tcf_action = parm->action;
122 ci->net = net;
122 ci->zone = parm->zone; 123 ci->zone = parm->zone;
123 124
124 tcf_hash_insert(a); 125 tcf_hash_insert(a);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 99c9cc1c7af9..d05869646515 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -189,6 +189,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
189 * worry later - danger - this API seems to have changed 189 * worry later - danger - this API seems to have changed
190 * from earlier kernels 190 * from earlier kernels
191 */ 191 */
192 par.net = dev_net(skb->dev);
192 par.in = skb->dev; 193 par.in = skb->dev;
193 par.out = NULL; 194 par.out = NULL;
194 par.hooknum = ipt->tcfi_hook; 195 par.hooknum = ipt->tcfi_hook;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 2d1be4a760fd..32fcdecdb9e2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -31,13 +31,17 @@
31 31
32#define MIRRED_TAB_MASK 7 32#define MIRRED_TAB_MASK 7
33static LIST_HEAD(mirred_list); 33static LIST_HEAD(mirred_list);
34static DEFINE_SPINLOCK(mirred_list_lock);
34 35
35static void tcf_mirred_release(struct tc_action *a, int bind) 36static void tcf_mirred_release(struct tc_action *a, int bind)
36{ 37{
37 struct tcf_mirred *m = to_mirred(a); 38 struct tcf_mirred *m = to_mirred(a);
38 struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1); 39 struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1);
39 40
41 /* We could be called either in a RCU callback or with RTNL lock held. */
42 spin_lock_bh(&mirred_list_lock);
40 list_del(&m->tcfm_list); 43 list_del(&m->tcfm_list);
44 spin_unlock_bh(&mirred_list_lock);
41 if (dev) 45 if (dev)
42 dev_put(dev); 46 dev_put(dev);
43} 47}
@@ -103,10 +107,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
103 } else { 107 } else {
104 if (bind) 108 if (bind)
105 return 0; 109 return 0;
106 if (!ovr) { 110
107 tcf_hash_release(a, bind); 111 tcf_hash_release(a, bind);
112 if (!ovr)
108 return -EEXIST; 113 return -EEXIST;
109 }
110 } 114 }
111 m = to_mirred(a); 115 m = to_mirred(a);
112 116
@@ -123,7 +127,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
123 } 127 }
124 128
125 if (ret == ACT_P_CREATED) { 129 if (ret == ACT_P_CREATED) {
130 spin_lock_bh(&mirred_list_lock);
126 list_add(&m->tcfm_list, &mirred_list); 131 list_add(&m->tcfm_list, &mirred_list);
132 spin_unlock_bh(&mirred_list_lock);
127 tcf_hash_insert(a); 133 tcf_hash_insert(a);
128 } 134 }
129 135
@@ -173,6 +179,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
173 179
174 skb2->skb_iif = skb->dev->ifindex; 180 skb2->skb_iif = skb->dev->ifindex;
175 skb2->dev = dev; 181 skb2->dev = dev;
182 skb_sender_cpu_clear(skb2);
176 err = dev_queue_xmit(skb2); 183 err = dev_queue_xmit(skb2);
177 184
178 if (err) { 185 if (err) {
@@ -221,7 +228,8 @@ static int mirred_device_event(struct notifier_block *unused,
221 struct tcf_mirred *m; 228 struct tcf_mirred *m;
222 229
223 ASSERT_RTNL(); 230 ASSERT_RTNL();
224 if (event == NETDEV_UNREGISTER) 231 if (event == NETDEV_UNREGISTER) {
232 spin_lock_bh(&mirred_list_lock);
225 list_for_each_entry(m, &mirred_list, tcfm_list) { 233 list_for_each_entry(m, &mirred_list, tcfm_list) {
226 if (rcu_access_pointer(m->tcfm_dev) == dev) { 234 if (rcu_access_pointer(m->tcfm_dev) == dev) {
227 dev_put(dev); 235 dev_put(dev);
@@ -231,6 +239,8 @@ static int mirred_device_event(struct notifier_block *unused,
231 RCU_INIT_POINTER(m->tcfm_dev, NULL); 239 RCU_INIT_POINTER(m->tcfm_dev, NULL);
232 } 240 }
233 } 241 }
242 spin_unlock_bh(&mirred_list_lock);
243 }
234 244
235 return NOTIFY_DONE; 245 return NOTIFY_DONE;
236} 246}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index e5168f8b9640..5faaa5425f7b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -38,6 +38,7 @@ struct cls_bpf_prog {
38 struct bpf_prog *filter; 38 struct bpf_prog *filter;
39 struct list_head link; 39 struct list_head link;
40 struct tcf_result res; 40 struct tcf_result res;
41 bool exts_integrated;
41 struct tcf_exts exts; 42 struct tcf_exts exts;
42 u32 handle; 43 u32 handle;
43 union { 44 union {
@@ -52,6 +53,7 @@ struct cls_bpf_prog {
52 53
53static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { 54static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
54 [TCA_BPF_CLASSID] = { .type = NLA_U32 }, 55 [TCA_BPF_CLASSID] = { .type = NLA_U32 },
56 [TCA_BPF_FLAGS] = { .type = NLA_U32 },
55 [TCA_BPF_FD] = { .type = NLA_U32 }, 57 [TCA_BPF_FD] = { .type = NLA_U32 },
56 [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, 58 [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
57 [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, 59 [TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
@@ -59,6 +61,20 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
59 .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, 61 .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
60}; 62};
61 63
64static int cls_bpf_exec_opcode(int code)
65{
66 switch (code) {
67 case TC_ACT_OK:
68 case TC_ACT_SHOT:
69 case TC_ACT_STOLEN:
70 case TC_ACT_REDIRECT:
71 case TC_ACT_UNSPEC:
72 return code;
73 default:
74 return TC_ACT_UNSPEC;
75 }
76}
77
62static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, 78static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
63 struct tcf_result *res) 79 struct tcf_result *res)
64{ 80{
@@ -79,6 +95,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
79 list_for_each_entry_rcu(prog, &head->plist, link) { 95 list_for_each_entry_rcu(prog, &head->plist, link) {
80 int filter_res; 96 int filter_res;
81 97
98 qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
99
82 if (at_ingress) { 100 if (at_ingress) {
83 /* It is safe to push/pull even if skb_shared() */ 101 /* It is safe to push/pull even if skb_shared() */
84 __skb_push(skb, skb->mac_len); 102 __skb_push(skb, skb->mac_len);
@@ -88,6 +106,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
88 filter_res = BPF_PROG_RUN(prog->filter, skb); 106 filter_res = BPF_PROG_RUN(prog->filter, skb);
89 } 107 }
90 108
109 if (prog->exts_integrated) {
110 res->class = prog->res.class;
111 res->classid = qdisc_skb_cb(skb)->tc_classid;
112
113 ret = cls_bpf_exec_opcode(filter_res);
114 if (ret == TC_ACT_UNSPEC)
115 continue;
116 break;
117 }
118
91 if (filter_res == 0) 119 if (filter_res == 0)
92 continue; 120 continue;
93 121
@@ -195,8 +223,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
195 return ret; 223 return ret;
196} 224}
197 225
198static int cls_bpf_prog_from_ops(struct nlattr **tb, 226static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
199 struct cls_bpf_prog *prog, u32 classid)
200{ 227{
201 struct sock_filter *bpf_ops; 228 struct sock_filter *bpf_ops;
202 struct sock_fprog_kern fprog_tmp; 229 struct sock_fprog_kern fprog_tmp;
@@ -230,15 +257,13 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb,
230 prog->bpf_ops = bpf_ops; 257 prog->bpf_ops = bpf_ops;
231 prog->bpf_num_ops = bpf_num_ops; 258 prog->bpf_num_ops = bpf_num_ops;
232 prog->bpf_name = NULL; 259 prog->bpf_name = NULL;
233
234 prog->filter = fp; 260 prog->filter = fp;
235 prog->res.classid = classid;
236 261
237 return 0; 262 return 0;
238} 263}
239 264
240static int cls_bpf_prog_from_efd(struct nlattr **tb, 265static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
241 struct cls_bpf_prog *prog, u32 classid) 266 const struct tcf_proto *tp)
242{ 267{
243 struct bpf_prog *fp; 268 struct bpf_prog *fp;
244 char *name = NULL; 269 char *name = NULL;
@@ -268,9 +293,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb,
268 prog->bpf_ops = NULL; 293 prog->bpf_ops = NULL;
269 prog->bpf_fd = bpf_fd; 294 prog->bpf_fd = bpf_fd;
270 prog->bpf_name = name; 295 prog->bpf_name = name;
271
272 prog->filter = fp; 296 prog->filter = fp;
273 prog->res.classid = classid; 297
298 if (fp->dst_needed)
299 netif_keep_dst(qdisc_dev(tp->q));
274 300
275 return 0; 301 return 0;
276} 302}
@@ -280,16 +306,13 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
280 unsigned long base, struct nlattr **tb, 306 unsigned long base, struct nlattr **tb,
281 struct nlattr *est, bool ovr) 307 struct nlattr *est, bool ovr)
282{ 308{
309 bool is_bpf, is_ebpf, have_exts = false;
283 struct tcf_exts exts; 310 struct tcf_exts exts;
284 bool is_bpf, is_ebpf;
285 u32 classid;
286 int ret; 311 int ret;
287 312
288 is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; 313 is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
289 is_ebpf = tb[TCA_BPF_FD]; 314 is_ebpf = tb[TCA_BPF_FD];
290 315 if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
291 if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
292 !tb[TCA_BPF_CLASSID])
293 return -EINVAL; 316 return -EINVAL;
294 317
295 tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); 318 tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
@@ -297,18 +320,32 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
297 if (ret < 0) 320 if (ret < 0)
298 return ret; 321 return ret;
299 322
300 classid = nla_get_u32(tb[TCA_BPF_CLASSID]); 323 if (tb[TCA_BPF_FLAGS]) {
324 u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
325
326 if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
327 tcf_exts_destroy(&exts);
328 return -EINVAL;
329 }
330
331 have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
332 }
333
334 prog->exts_integrated = have_exts;
301 335
302 ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) : 336 ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
303 cls_bpf_prog_from_efd(tb, prog, classid); 337 cls_bpf_prog_from_efd(tb, prog, tp);
304 if (ret < 0) { 338 if (ret < 0) {
305 tcf_exts_destroy(&exts); 339 tcf_exts_destroy(&exts);
306 return ret; 340 return ret;
307 } 341 }
308 342
309 tcf_bind_filter(tp, &prog->res, base); 343 if (tb[TCA_BPF_CLASSID]) {
310 tcf_exts_change(tp, &prog->exts, &exts); 344 prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
345 tcf_bind_filter(tp, &prog->res, base);
346 }
311 347
348 tcf_exts_change(tp, &prog->exts, &exts);
312 return 0; 349 return 0;
313} 350}
314 351
@@ -429,6 +466,7 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
429{ 466{
430 struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; 467 struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
431 struct nlattr *nest; 468 struct nlattr *nest;
469 u32 bpf_flags = 0;
432 int ret; 470 int ret;
433 471
434 if (prog == NULL) 472 if (prog == NULL)
@@ -440,7 +478,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
440 if (nest == NULL) 478 if (nest == NULL)
441 goto nla_put_failure; 479 goto nla_put_failure;
442 480
443 if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) 481 if (prog->res.classid &&
482 nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
444 goto nla_put_failure; 483 goto nla_put_failure;
445 484
446 if (cls_bpf_is_ebpf(prog)) 485 if (cls_bpf_is_ebpf(prog))
@@ -453,6 +492,11 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
453 if (tcf_exts_dump(skb, &prog->exts) < 0) 492 if (tcf_exts_dump(skb, &prog->exts) < 0)
454 goto nla_put_failure; 493 goto nla_put_failure;
455 494
495 if (prog->exts_integrated)
496 bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
497 if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
498 goto nla_put_failure;
499
456 nla_nest_end(skb, nest); 500 nla_nest_end(skb, nest);
457 501
458 if (tcf_exts_dump_stats(skb, &prog->exts) < 0) 502 if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 536838b657bf..fbfec6a18839 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -22,6 +22,7 @@
22#include <linux/if_vlan.h> 22#include <linux/if_vlan.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <net/inet_sock.h>
25 26
26#include <net/pkt_cls.h> 27#include <net/pkt_cls.h>
27#include <net/ip.h> 28#include <net/ip.h>
@@ -197,8 +198,11 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb)
197 198
198static u32 flow_get_skuid(const struct sk_buff *skb) 199static u32 flow_get_skuid(const struct sk_buff *skb)
199{ 200{
200 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { 201 struct sock *sk = skb_to_full_sk(skb);
201 kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid; 202
203 if (sk && sk->sk_socket && sk->sk_socket->file) {
204 kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;
205
202 return from_kuid(&init_user_ns, skuid); 206 return from_kuid(&init_user_ns, skuid);
203 } 207 }
204 return 0; 208 return 0;
@@ -206,8 +210,11 @@ static u32 flow_get_skuid(const struct sk_buff *skb)
206 210
207static u32 flow_get_skgid(const struct sk_buff *skb) 211static u32 flow_get_skgid(const struct sk_buff *skb)
208{ 212{
209 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) { 213 struct sock *sk = skb_to_full_sk(skb);
210 kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid; 214
215 if (sk && sk->sk_socket && sk->sk_socket->file) {
216 kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;
217
211 return from_kgid(&init_user_ns, skgid); 218 return from_kgid(&init_user_ns, skgid);
212 } 219 }
213 return 0; 220 return 0;
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index df0328ba6a48..c66ca9400ab4 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -95,6 +95,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
95 if (skb->skb_iif) 95 if (skb->skb_iif)
96 indev = dev_get_by_index_rcu(em->net, skb->skb_iif); 96 indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
97 97
98 acpar.net = em->net;
98 acpar.in = indev ? indev : dev; 99 acpar.in = indev ? indev : dev;
99 acpar.out = dev; 100 acpar.out = dev;
100 101
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index b5294ce20cd4..f2aabc0089da 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -343,119 +343,145 @@ META_COLLECTOR(int_sk_refcnt)
343 343
344META_COLLECTOR(int_sk_rcvbuf) 344META_COLLECTOR(int_sk_rcvbuf)
345{ 345{
346 if (skip_nonlocal(skb)) { 346 const struct sock *sk = skb_to_full_sk(skb);
347
348 if (!sk) {
347 *err = -1; 349 *err = -1;
348 return; 350 return;
349 } 351 }
350 dst->value = skb->sk->sk_rcvbuf; 352 dst->value = sk->sk_rcvbuf;
351} 353}
352 354
353META_COLLECTOR(int_sk_shutdown) 355META_COLLECTOR(int_sk_shutdown)
354{ 356{
355 if (skip_nonlocal(skb)) { 357 const struct sock *sk = skb_to_full_sk(skb);
358
359 if (!sk) {
356 *err = -1; 360 *err = -1;
357 return; 361 return;
358 } 362 }
359 dst->value = skb->sk->sk_shutdown; 363 dst->value = sk->sk_shutdown;
360} 364}
361 365
362META_COLLECTOR(int_sk_proto) 366META_COLLECTOR(int_sk_proto)
363{ 367{
364 if (skip_nonlocal(skb)) { 368 const struct sock *sk = skb_to_full_sk(skb);
369
370 if (!sk) {
365 *err = -1; 371 *err = -1;
366 return; 372 return;
367 } 373 }
368 dst->value = skb->sk->sk_protocol; 374 dst->value = sk->sk_protocol;
369} 375}
370 376
371META_COLLECTOR(int_sk_type) 377META_COLLECTOR(int_sk_type)
372{ 378{
373 if (skip_nonlocal(skb)) { 379 const struct sock *sk = skb_to_full_sk(skb);
380
381 if (!sk) {
374 *err = -1; 382 *err = -1;
375 return; 383 return;
376 } 384 }
377 dst->value = skb->sk->sk_type; 385 dst->value = sk->sk_type;
378} 386}
379 387
380META_COLLECTOR(int_sk_rmem_alloc) 388META_COLLECTOR(int_sk_rmem_alloc)
381{ 389{
382 if (skip_nonlocal(skb)) { 390 const struct sock *sk = skb_to_full_sk(skb);
391
392 if (!sk) {
383 *err = -1; 393 *err = -1;
384 return; 394 return;
385 } 395 }
386 dst->value = sk_rmem_alloc_get(skb->sk); 396 dst->value = sk_rmem_alloc_get(sk);
387} 397}
388 398
389META_COLLECTOR(int_sk_wmem_alloc) 399META_COLLECTOR(int_sk_wmem_alloc)
390{ 400{
391 if (skip_nonlocal(skb)) { 401 const struct sock *sk = skb_to_full_sk(skb);
402
403 if (!sk) {
392 *err = -1; 404 *err = -1;
393 return; 405 return;
394 } 406 }
395 dst->value = sk_wmem_alloc_get(skb->sk); 407 dst->value = sk_wmem_alloc_get(sk);
396} 408}
397 409
398META_COLLECTOR(int_sk_omem_alloc) 410META_COLLECTOR(int_sk_omem_alloc)
399{ 411{
400 if (skip_nonlocal(skb)) { 412 const struct sock *sk = skb_to_full_sk(skb);
413
414 if (!sk) {
401 *err = -1; 415 *err = -1;
402 return; 416 return;
403 } 417 }
404 dst->value = atomic_read(&skb->sk->sk_omem_alloc); 418 dst->value = atomic_read(&sk->sk_omem_alloc);
405} 419}
406 420
407META_COLLECTOR(int_sk_rcv_qlen) 421META_COLLECTOR(int_sk_rcv_qlen)
408{ 422{
409 if (skip_nonlocal(skb)) { 423 const struct sock *sk = skb_to_full_sk(skb);
424
425 if (!sk) {
410 *err = -1; 426 *err = -1;
411 return; 427 return;
412 } 428 }
413 dst->value = skb->sk->sk_receive_queue.qlen; 429 dst->value = sk->sk_receive_queue.qlen;
414} 430}
415 431
416META_COLLECTOR(int_sk_snd_qlen) 432META_COLLECTOR(int_sk_snd_qlen)
417{ 433{
418 if (skip_nonlocal(skb)) { 434 const struct sock *sk = skb_to_full_sk(skb);
435
436 if (!sk) {
419 *err = -1; 437 *err = -1;
420 return; 438 return;
421 } 439 }
422 dst->value = skb->sk->sk_write_queue.qlen; 440 dst->value = sk->sk_write_queue.qlen;
423} 441}
424 442
425META_COLLECTOR(int_sk_wmem_queued) 443META_COLLECTOR(int_sk_wmem_queued)
426{ 444{
427 if (skip_nonlocal(skb)) { 445 const struct sock *sk = skb_to_full_sk(skb);
446
447 if (!sk) {
428 *err = -1; 448 *err = -1;
429 return; 449 return;
430 } 450 }
431 dst->value = skb->sk->sk_wmem_queued; 451 dst->value = sk->sk_wmem_queued;
432} 452}
433 453
434META_COLLECTOR(int_sk_fwd_alloc) 454META_COLLECTOR(int_sk_fwd_alloc)
435{ 455{
436 if (skip_nonlocal(skb)) { 456 const struct sock *sk = skb_to_full_sk(skb);
457
458 if (!sk) {
437 *err = -1; 459 *err = -1;
438 return; 460 return;
439 } 461 }
440 dst->value = skb->sk->sk_forward_alloc; 462 dst->value = sk->sk_forward_alloc;
441} 463}
442 464
443META_COLLECTOR(int_sk_sndbuf) 465META_COLLECTOR(int_sk_sndbuf)
444{ 466{
445 if (skip_nonlocal(skb)) { 467 const struct sock *sk = skb_to_full_sk(skb);
468
469 if (!sk) {
446 *err = -1; 470 *err = -1;
447 return; 471 return;
448 } 472 }
449 dst->value = skb->sk->sk_sndbuf; 473 dst->value = sk->sk_sndbuf;
450} 474}
451 475
452META_COLLECTOR(int_sk_alloc) 476META_COLLECTOR(int_sk_alloc)
453{ 477{
454 if (skip_nonlocal(skb)) { 478 const struct sock *sk = skb_to_full_sk(skb);
479
480 if (!sk) {
455 *err = -1; 481 *err = -1;
456 return; 482 return;
457 } 483 }
458 dst->value = (__force int) skb->sk->sk_allocation; 484 dst->value = (__force int) sk->sk_allocation;
459} 485}
460 486
461META_COLLECTOR(int_sk_hash) 487META_COLLECTOR(int_sk_hash)
@@ -469,92 +495,112 @@ META_COLLECTOR(int_sk_hash)
469 495
470META_COLLECTOR(int_sk_lingertime) 496META_COLLECTOR(int_sk_lingertime)
471{ 497{
472 if (skip_nonlocal(skb)) { 498 const struct sock *sk = skb_to_full_sk(skb);
499
500 if (!sk) {
473 *err = -1; 501 *err = -1;
474 return; 502 return;
475 } 503 }
476 dst->value = skb->sk->sk_lingertime / HZ; 504 dst->value = sk->sk_lingertime / HZ;
477} 505}
478 506
479META_COLLECTOR(int_sk_err_qlen) 507META_COLLECTOR(int_sk_err_qlen)
480{ 508{
481 if (skip_nonlocal(skb)) { 509 const struct sock *sk = skb_to_full_sk(skb);
510
511 if (!sk) {
482 *err = -1; 512 *err = -1;
483 return; 513 return;
484 } 514 }
485 dst->value = skb->sk->sk_error_queue.qlen; 515 dst->value = sk->sk_error_queue.qlen;
486} 516}
487 517
488META_COLLECTOR(int_sk_ack_bl) 518META_COLLECTOR(int_sk_ack_bl)
489{ 519{
490 if (skip_nonlocal(skb)) { 520 const struct sock *sk = skb_to_full_sk(skb);
521
522 if (!sk) {
491 *err = -1; 523 *err = -1;
492 return; 524 return;
493 } 525 }
494 dst->value = skb->sk->sk_ack_backlog; 526 dst->value = sk->sk_ack_backlog;
495} 527}
496 528
497META_COLLECTOR(int_sk_max_ack_bl) 529META_COLLECTOR(int_sk_max_ack_bl)
498{ 530{
499 if (skip_nonlocal(skb)) { 531 const struct sock *sk = skb_to_full_sk(skb);
532
533 if (!sk) {
500 *err = -1; 534 *err = -1;
501 return; 535 return;
502 } 536 }
503 dst->value = skb->sk->sk_max_ack_backlog; 537 dst->value = sk->sk_max_ack_backlog;
504} 538}
505 539
506META_COLLECTOR(int_sk_prio) 540META_COLLECTOR(int_sk_prio)
507{ 541{
508 if (skip_nonlocal(skb)) { 542 const struct sock *sk = skb_to_full_sk(skb);
543
544 if (!sk) {
509 *err = -1; 545 *err = -1;
510 return; 546 return;
511 } 547 }
512 dst->value = skb->sk->sk_priority; 548 dst->value = sk->sk_priority;
513} 549}
514 550
515META_COLLECTOR(int_sk_rcvlowat) 551META_COLLECTOR(int_sk_rcvlowat)
516{ 552{
517 if (skip_nonlocal(skb)) { 553 const struct sock *sk = skb_to_full_sk(skb);
554
555 if (!sk) {
518 *err = -1; 556 *err = -1;
519 return; 557 return;
520 } 558 }
521 dst->value = skb->sk->sk_rcvlowat; 559 dst->value = sk->sk_rcvlowat;
522} 560}
523 561
524META_COLLECTOR(int_sk_rcvtimeo) 562META_COLLECTOR(int_sk_rcvtimeo)
525{ 563{
526 if (skip_nonlocal(skb)) { 564 const struct sock *sk = skb_to_full_sk(skb);
565
566 if (!sk) {
527 *err = -1; 567 *err = -1;
528 return; 568 return;
529 } 569 }
530 dst->value = skb->sk->sk_rcvtimeo / HZ; 570 dst->value = sk->sk_rcvtimeo / HZ;
531} 571}
532 572
533META_COLLECTOR(int_sk_sndtimeo) 573META_COLLECTOR(int_sk_sndtimeo)
534{ 574{
535 if (skip_nonlocal(skb)) { 575 const struct sock *sk = skb_to_full_sk(skb);
576
577 if (!sk) {
536 *err = -1; 578 *err = -1;
537 return; 579 return;
538 } 580 }
539 dst->value = skb->sk->sk_sndtimeo / HZ; 581 dst->value = sk->sk_sndtimeo / HZ;
540} 582}
541 583
542META_COLLECTOR(int_sk_sendmsg_off) 584META_COLLECTOR(int_sk_sendmsg_off)
543{ 585{
544 if (skip_nonlocal(skb)) { 586 const struct sock *sk = skb_to_full_sk(skb);
587
588 if (!sk) {
545 *err = -1; 589 *err = -1;
546 return; 590 return;
547 } 591 }
548 dst->value = skb->sk->sk_frag.offset; 592 dst->value = sk->sk_frag.offset;
549} 593}
550 594
551META_COLLECTOR(int_sk_write_pend) 595META_COLLECTOR(int_sk_write_pend)
552{ 596{
553 if (skip_nonlocal(skb)) { 597 const struct sock *sk = skb_to_full_sk(skb);
598
599 if (!sk) {
554 *err = -1; 600 *err = -1;
555 return; 601 return;
556 } 602 }
557 dst->value = skb->sk->sk_write_pending; 603 dst->value = sk->sk_write_pending;
558} 604}
559 605
560/************************************************************************** 606/**************************************************************************
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 094a874b48bc..3fee70d9814f 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -11,7 +11,7 @@
11 * Note: Quantum tunneling is not supported. 11 * Note: Quantum tunneling is not supported.
12 */ 12 */
13 13
14#include <linux/module.h> 14#include <linux/init.h>
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
@@ -37,17 +37,8 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
37 .owner = THIS_MODULE, 37 .owner = THIS_MODULE,
38}; 38};
39 39
40static int __init blackhole_module_init(void) 40static int __init blackhole_init(void)
41{ 41{
42 return register_qdisc(&blackhole_qdisc_ops); 42 return register_qdisc(&blackhole_qdisc_ops);
43} 43}
44 44device_initcall(blackhole_init)
45static void __exit blackhole_module_exit(void)
46{
47 unregister_qdisc(&blackhole_qdisc_ops);
48}
49
50module_init(blackhole_module_init)
51module_exit(blackhole_module_exit)
52
53MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 02bfd3d1c4f0..5ffb8b8337c7 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -553,65 +553,6 @@ static void choke_destroy(struct Qdisc *sch)
553 choke_free(q->tab); 553 choke_free(q->tab);
554} 554}
555 555
556static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
557{
558 return NULL;
559}
560
561static unsigned long choke_get(struct Qdisc *sch, u32 classid)
562{
563 return 0;
564}
565
566static void choke_put(struct Qdisc *q, unsigned long cl)
567{
568}
569
570static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
571 u32 classid)
572{
573 return 0;
574}
575
576static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
577 unsigned long cl)
578{
579 struct choke_sched_data *q = qdisc_priv(sch);
580
581 if (cl)
582 return NULL;
583 return &q->filter_list;
584}
585
586static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
587 struct sk_buff *skb, struct tcmsg *tcm)
588{
589 tcm->tcm_handle |= TC_H_MIN(cl);
590 return 0;
591}
592
593static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
594{
595 if (!arg->stop) {
596 if (arg->fn(sch, 1, arg) < 0) {
597 arg->stop = 1;
598 return;
599 }
600 arg->count++;
601 }
602}
603
604static const struct Qdisc_class_ops choke_class_ops = {
605 .leaf = choke_leaf,
606 .get = choke_get,
607 .put = choke_put,
608 .tcf_chain = choke_find_tcf,
609 .bind_tcf = choke_bind,
610 .unbind_tcf = choke_put,
611 .dump = choke_dump_class,
612 .walk = choke_walk,
613};
614
615static struct sk_buff *choke_peek_head(struct Qdisc *sch) 556static struct sk_buff *choke_peek_head(struct Qdisc *sch)
616{ 557{
617 struct choke_sched_data *q = qdisc_priv(sch); 558 struct choke_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index c4d45fd8c551..f357f34d02d2 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -35,14 +35,20 @@
35 35
36#define NO_DEFAULT_INDEX (1 << 16) 36#define NO_DEFAULT_INDEX (1 << 16)
37 37
38struct mask_value {
39 u8 mask;
40 u8 value;
41};
42
38struct dsmark_qdisc_data { 43struct dsmark_qdisc_data {
39 struct Qdisc *q; 44 struct Qdisc *q;
40 struct tcf_proto __rcu *filter_list; 45 struct tcf_proto __rcu *filter_list;
41 u8 *mask; /* "owns" the array */ 46 struct mask_value *mv;
42 u8 *value;
43 u16 indices; 47 u16 indices;
48 u8 set_tc_index;
44 u32 default_index; /* index range is 0...0xffff */ 49 u32 default_index; /* index range is 0...0xffff */
45 int set_tc_index; 50#define DSMARK_EMBEDDED_SZ 16
51 struct mask_value embedded[DSMARK_EMBEDDED_SZ];
46}; 52};
47 53
48static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) 54static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
@@ -116,7 +122,6 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
116 struct nlattr *opt = tca[TCA_OPTIONS]; 122 struct nlattr *opt = tca[TCA_OPTIONS];
117 struct nlattr *tb[TCA_DSMARK_MAX + 1]; 123 struct nlattr *tb[TCA_DSMARK_MAX + 1];
118 int err = -EINVAL; 124 int err = -EINVAL;
119 u8 mask = 0;
120 125
121 pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", 126 pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
122 __func__, sch, p, classid, parent, *arg); 127 __func__, sch, p, classid, parent, *arg);
@@ -133,14 +138,11 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
133 if (err < 0) 138 if (err < 0)
134 goto errout; 139 goto errout;
135 140
136 if (tb[TCA_DSMARK_MASK])
137 mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
138
139 if (tb[TCA_DSMARK_VALUE]) 141 if (tb[TCA_DSMARK_VALUE])
140 p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); 142 p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]);
141 143
142 if (tb[TCA_DSMARK_MASK]) 144 if (tb[TCA_DSMARK_MASK])
143 p->mask[*arg - 1] = mask; 145 p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
144 146
145 err = 0; 147 err = 0;
146 148
@@ -155,8 +157,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
155 if (!dsmark_valid_index(p, arg)) 157 if (!dsmark_valid_index(p, arg))
156 return -EINVAL; 158 return -EINVAL;
157 159
158 p->mask[arg - 1] = 0xff; 160 p->mv[arg - 1].mask = 0xff;
159 p->value[arg - 1] = 0; 161 p->mv[arg - 1].value = 0;
160 162
161 return 0; 163 return 0;
162} 164}
@@ -173,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
173 return; 175 return;
174 176
175 for (i = 0; i < p->indices; i++) { 177 for (i = 0; i < p->indices; i++) {
176 if (p->mask[i] == 0xff && !p->value[i]) 178 if (p->mv[i].mask == 0xff && !p->mv[i].value)
177 goto ignore; 179 goto ignore;
178 if (walker->count >= walker->skip) { 180 if (walker->count >= walker->skip) {
179 if (walker->fn(sch, i + 1, walker) < 0) { 181 if (walker->fn(sch, i + 1, walker) < 0) {
@@ -291,12 +293,12 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
291 293
292 switch (tc_skb_protocol(skb)) { 294 switch (tc_skb_protocol(skb)) {
293 case htons(ETH_P_IP): 295 case htons(ETH_P_IP):
294 ipv4_change_dsfield(ip_hdr(skb), p->mask[index], 296 ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
295 p->value[index]); 297 p->mv[index].value);
296 break; 298 break;
297 case htons(ETH_P_IPV6): 299 case htons(ETH_P_IPV6):
298 ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index], 300 ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask,
299 p->value[index]); 301 p->mv[index].value);
300 break; 302 break;
301 default: 303 default:
302 /* 304 /*
@@ -304,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
304 * This way, we can send non-IP traffic through dsmark 306 * This way, we can send non-IP traffic through dsmark
305 * and don't need yet another qdisc as a bypass. 307 * and don't need yet another qdisc as a bypass.
306 */ 308 */
307 if (p->mask[index] != 0xff || p->value[index]) 309 if (p->mv[index].mask != 0xff || p->mv[index].value)
308 pr_warn("%s: unsupported protocol %d\n", 310 pr_warn("%s: unsupported protocol %d\n",
309 __func__, ntohs(tc_skb_protocol(skb))); 311 __func__, ntohs(tc_skb_protocol(skb)));
310 break; 312 break;
@@ -346,7 +348,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
346 int err = -EINVAL; 348 int err = -EINVAL;
347 u32 default_index = NO_DEFAULT_INDEX; 349 u32 default_index = NO_DEFAULT_INDEX;
348 u16 indices; 350 u16 indices;
349 u8 *mask; 351 int i;
350 352
351 pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); 353 pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
352 354
@@ -366,18 +368,18 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
366 if (tb[TCA_DSMARK_DEFAULT_INDEX]) 368 if (tb[TCA_DSMARK_DEFAULT_INDEX])
367 default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]); 369 default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
368 370
369 mask = kmalloc(indices * 2, GFP_KERNEL); 371 if (indices <= DSMARK_EMBEDDED_SZ)
370 if (mask == NULL) { 372 p->mv = p->embedded;
373 else
374 p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL);
375 if (!p->mv) {
371 err = -ENOMEM; 376 err = -ENOMEM;
372 goto errout; 377 goto errout;
373 } 378 }
374 379 for (i = 0; i < indices; i++) {
375 p->mask = mask; 380 p->mv[i].mask = 0xff;
376 memset(p->mask, 0xff, indices); 381 p->mv[i].value = 0;
377 382 }
378 p->value = p->mask + indices;
379 memset(p->value, 0, indices);
380
381 p->indices = indices; 383 p->indices = indices;
382 p->default_index = default_index; 384 p->default_index = default_index;
383 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); 385 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
@@ -410,7 +412,8 @@ static void dsmark_destroy(struct Qdisc *sch)
410 412
411 tcf_destroy_chain(&p->filter_list); 413 tcf_destroy_chain(&p->filter_list);
412 qdisc_destroy(p->q); 414 qdisc_destroy(p->q);
413 kfree(p->mask); 415 if (p->mv != p->embedded)
416 kfree(p->mv);
414} 417}
415 418
416static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, 419static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -430,8 +433,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
430 opts = nla_nest_start(skb, TCA_OPTIONS); 433 opts = nla_nest_start(skb, TCA_OPTIONS);
431 if (opts == NULL) 434 if (opts == NULL)
432 goto nla_put_failure; 435 goto nla_put_failure;
433 if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]) || 436 if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
434 nla_put_u8(skb, TCA_DSMARK_VALUE, p->value[cl - 1])) 437 nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value))
435 goto nla_put_failure; 438 goto nla_put_failure;
436 439
437 return nla_nest_end(skb, opts); 440 return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f377702d4b91..109b2322778f 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -224,13 +224,16 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
224 if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) 224 if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
225 return &q->internal; 225 return &q->internal;
226 226
227 /* SYNACK messages are attached to a listener socket. 227 /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
228 * 1) They are not part of a 'flow' yet 228 * or a listener (SYNCOOKIE mode)
229 * 2) We do not want to rate limit them (eg SYNFLOOD attack), 229 * 1) request sockets are not full blown,
230 * they do not contain sk_pacing_rate
231 * 2) They are not part of a 'flow' yet
232 * 3) We do not want to rate limit them (eg SYNFLOOD attack),
230 * especially if the listener set SO_MAX_PACING_RATE 233 * especially if the listener set SO_MAX_PACING_RATE
231 * 3) We pretend they are orphaned 234 * 4) We pretend they are orphaned
232 */ 235 */
233 if (!sk || sk->sk_state == TCP_LISTEN) { 236 if (!sk || sk_listener(sk)) {
234 unsigned long hash = skb_get_hash(skb) & q->orphan_mask; 237 unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
235 238
236 /* By forcing low order bit to 1, we make sure to not 239 /* By forcing low order bit to 1, we make sure to not
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 9d15cb6b8cb1..86b04e31e60b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -368,6 +368,15 @@ static unsigned int hhf_drop(struct Qdisc *sch)
368 return bucket - q->buckets; 368 return bucket - q->buckets;
369} 369}
370 370
371static unsigned int hhf_qdisc_drop(struct Qdisc *sch)
372{
373 unsigned int prev_backlog;
374
375 prev_backlog = sch->qstats.backlog;
376 hhf_drop(sch);
377 return prev_backlog - sch->qstats.backlog;
378}
379
371static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) 380static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
372{ 381{
373 struct hhf_sched_data *q = qdisc_priv(sch); 382 struct hhf_sched_data *q = qdisc_priv(sch);
@@ -696,7 +705,7 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
696 .enqueue = hhf_enqueue, 705 .enqueue = hhf_enqueue,
697 .dequeue = hhf_dequeue, 706 .dequeue = hhf_dequeue,
698 .peek = qdisc_peek_dequeued, 707 .peek = qdisc_peek_dequeued,
699 .drop = hhf_drop, 708 .drop = hhf_qdisc_drop,
700 .init = hhf_init, 709 .init = hhf_init,
701 .reset = hhf_reset, 710 .reset = hhf_reset,
702 .destroy = hhf_destroy, 711 .destroy = hhf_destroy,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b00f1f9611d6..559afd0ee7de 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1590,7 +1590,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
1590/* Set an association id for a given association */ 1590/* Set an association id for a given association */
1591int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) 1591int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
1592{ 1592{
1593 bool preload = !!(gfp & __GFP_WAIT); 1593 bool preload = gfpflags_allow_blocking(gfp);
1594 int ret; 1594 int ret;
1595 1595
1596 /* If the id is already assigned, keep it. */ 1596 /* If the id is already assigned, keep it. */
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d730e1..1543e39f47c3 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
809 if (!has_sha1) 809 if (!has_sha1)
810 return -EINVAL; 810 return -EINVAL;
811 811
812 memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], 812 for (i = 0; i < hmacs->shmac_num_idents; i++)
813 hmacs->shmac_num_idents * sizeof(__u16)); 813 ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
814 ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + 814 ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
815 hmacs->shmac_num_idents * sizeof(__u16)); 815 hmacs->shmac_num_idents * sizeof(__u16));
816 return 0; 816 return 0;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 7954c52e1794..763e06a55155 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2494,7 +2494,7 @@ static int sctp_process_param(struct sctp_association *asoc,
2494 __u16 sat; 2494 __u16 sat;
2495 int retval = 1; 2495 int retval = 1;
2496 sctp_scope_t scope; 2496 sctp_scope_t scope;
2497 time_t stale; 2497 u32 stale;
2498 struct sctp_af *af; 2498 struct sctp_af *af;
2499 union sctp_addr_param *addr_param; 2499 union sctp_addr_param *addr_param;
2500 struct sctp_transport *t; 2500 struct sctp_transport *t;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d7eaa7354cf7..6f46aa16cb76 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2306,7 +2306,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
2306 sctp_cmd_seq_t *commands) 2306 sctp_cmd_seq_t *commands)
2307{ 2307{
2308 struct sctp_chunk *chunk = arg; 2308 struct sctp_chunk *chunk = arg;
2309 time_t stale; 2309 u32 stale;
2310 sctp_cookie_preserve_param_t bht; 2310 sctp_cookie_preserve_param_t bht;
2311 sctp_errhdr_t *err; 2311 sctp_errhdr_t *err;
2312 struct sctp_chunk *reply; 2312 struct sctp_chunk *reply;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 17bef01b9aa3..897c01c029ca 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4475,7 +4475,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
4475 } 4475 }
4476 4476
4477 newfile = sock_alloc_file(newsock, 0, NULL); 4477 newfile = sock_alloc_file(newsock, 0, NULL);
4478 if (unlikely(IS_ERR(newfile))) { 4478 if (IS_ERR(newfile)) {
4479 put_unused_fd(retval); 4479 put_unused_fd(retval);
4480 sock_release(newsock); 4480 sock_release(newsock);
4481 return PTR_ERR(newfile); 4481 return PTR_ERR(newfile);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a0a431824f63..aab9e3f29755 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -331,7 +331,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
331 * 1/8, rto_alpha would be expressed as 3. 331 * 1/8, rto_alpha would be expressed as 3.
332 */ 332 */
333 tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) 333 tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
334 + (((__u32)abs64((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); 334 + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
335 tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) 335 tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
336 + (rtt >> net->sctp.rto_alpha); 336 + (rtt >> net->sctp.rto_alpha);
337 } else { 337 } else {
diff --git a/net/socket.c b/net/socket.c
index 9963a0b53a64..dd2c247c99e3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -373,7 +373,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
373 373
374 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, 374 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
375 &socket_file_ops); 375 &socket_file_ops);
376 if (unlikely(IS_ERR(file))) { 376 if (IS_ERR(file)) {
377 /* drop dentry, keep inode */ 377 /* drop dentry, keep inode */
378 ihold(d_inode(path.dentry)); 378 ihold(d_inode(path.dentry));
379 path_put(&path); 379 path_put(&path);
@@ -1303,7 +1303,7 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1303 } 1303 }
1304 1304
1305 newfile1 = sock_alloc_file(sock1, flags, NULL); 1305 newfile1 = sock_alloc_file(sock1, flags, NULL);
1306 if (unlikely(IS_ERR(newfile1))) { 1306 if (IS_ERR(newfile1)) {
1307 err = PTR_ERR(newfile1); 1307 err = PTR_ERR(newfile1);
1308 goto out_put_unused_both; 1308 goto out_put_unused_both;
1309 } 1309 }
@@ -1467,7 +1467,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1467 goto out_put; 1467 goto out_put;
1468 } 1468 }
1469 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); 1469 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1470 if (unlikely(IS_ERR(newfile))) { 1470 if (IS_ERR(newfile)) {
1471 err = PTR_ERR(newfile); 1471 err = PTR_ERR(newfile);
1472 put_unused_fd(newfd); 1472 put_unused_fd(newfd);
1473 sock_release(newsock); 1473 sock_release(newsock);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dace13d7638e..799e65b944b9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1411,17 +1411,16 @@ gss_key_timeout(struct rpc_cred *rc)
1411{ 1411{
1412 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); 1412 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
1413 struct gss_cl_ctx *ctx; 1413 struct gss_cl_ctx *ctx;
1414 unsigned long now = jiffies; 1414 unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ);
1415 unsigned long expire; 1415 int ret = 0;
1416 1416
1417 rcu_read_lock(); 1417 rcu_read_lock();
1418 ctx = rcu_dereference(gss_cred->gc_ctx); 1418 ctx = rcu_dereference(gss_cred->gc_ctx);
1419 if (ctx) 1419 if (!ctx || time_after(timeout, ctx->gc_expiry))
1420 expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ); 1420 ret = -EACCES;
1421 rcu_read_unlock(); 1421 rcu_read_unlock();
1422 if (!ctx || time_after(now, expire)) 1422
1423 return -EACCES; 1423 return ret;
1424 return 0;
1425} 1424}
1426 1425
1427static int 1426static int
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 6255d141133b..229956bf8457 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -138,6 +138,14 @@ out_free:
138 */ 138 */
139int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) 139int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
140{ 140{
141 if (!xprt->ops->bc_setup)
142 return 0;
143 return xprt->ops->bc_setup(xprt, min_reqs);
144}
145EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
146
147int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
148{
141 struct rpc_rqst *req; 149 struct rpc_rqst *req;
142 struct list_head tmp_list; 150 struct list_head tmp_list;
143 int i; 151 int i;
@@ -192,7 +200,6 @@ out_free:
192 dprintk("RPC: setup backchannel transport failed\n"); 200 dprintk("RPC: setup backchannel transport failed\n");
193 return -ENOMEM; 201 return -ENOMEM;
194} 202}
195EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
196 203
197/** 204/**
198 * xprt_destroy_backchannel - Destroys the backchannel preallocated structures. 205 * xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
@@ -205,6 +212,13 @@ EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
205 */ 212 */
206void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) 213void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
207{ 214{
215 if (xprt->ops->bc_destroy)
216 xprt->ops->bc_destroy(xprt, max_reqs);
217}
218EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
219
220void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
221{
208 struct rpc_rqst *req = NULL, *tmp = NULL; 222 struct rpc_rqst *req = NULL, *tmp = NULL;
209 223
210 dprintk("RPC: destroy backchannel transport\n"); 224 dprintk("RPC: destroy backchannel transport\n");
@@ -227,7 +241,6 @@ out:
227 dprintk("RPC: backchannel list empty= %s\n", 241 dprintk("RPC: backchannel list empty= %s\n",
228 list_empty(&xprt->bc_pa_list) ? "true" : "false"); 242 list_empty(&xprt->bc_pa_list) ? "true" : "false");
229} 243}
230EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
231 244
232static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) 245static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
233{ 246{
@@ -264,6 +277,13 @@ void xprt_free_bc_request(struct rpc_rqst *req)
264{ 277{
265 struct rpc_xprt *xprt = req->rq_xprt; 278 struct rpc_xprt *xprt = req->rq_xprt;
266 279
280 xprt->ops->bc_free_rqst(req);
281}
282
283void xprt_free_bc_rqst(struct rpc_rqst *req)
284{
285 struct rpc_xprt *xprt = req->rq_xprt;
286
267 dprintk("RPC: free backchannel req=%p\n", req); 287 dprintk("RPC: free backchannel req=%p\n", req);
268 288
269 req->rq_connect_cookie = xprt->connect_cookie - 1; 289 req->rq_connect_cookie = xprt->connect_cookie - 1;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..5e4f815c2b34 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
41static bool cache_defer_req(struct cache_req *req, struct cache_head *item); 41static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
42static void cache_revisit_request(struct cache_head *item); 42static void cache_revisit_request(struct cache_head *item);
43 43
44static void cache_init(struct cache_head *h) 44static void cache_init(struct cache_head *h, struct cache_detail *detail)
45{ 45{
46 time_t now = seconds_since_boot(); 46 time_t now = seconds_since_boot();
47 INIT_HLIST_NODE(&h->cache_list); 47 INIT_HLIST_NODE(&h->cache_list);
48 h->flags = 0; 48 h->flags = 0;
49 kref_init(&h->ref); 49 kref_init(&h->ref);
50 h->expiry_time = now + CACHE_NEW_EXPIRY; 50 h->expiry_time = now + CACHE_NEW_EXPIRY;
51 if (now <= detail->flush_time)
52 /* ensure it isn't already expired */
53 now = detail->flush_time + 1;
51 h->last_refresh = now; 54 h->last_refresh = now;
52} 55}
53 56
@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
81 * we might get lose if we need to 84 * we might get lose if we need to
82 * cache_put it soon. 85 * cache_put it soon.
83 */ 86 */
84 cache_init(new); 87 cache_init(new, detail);
85 detail->init(new, key); 88 detail->init(new, key);
86 89
87 write_lock(&detail->hash_lock); 90 write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
116 119
117static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); 120static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
118 121
119static void cache_fresh_locked(struct cache_head *head, time_t expiry) 122static void cache_fresh_locked(struct cache_head *head, time_t expiry,
123 struct cache_detail *detail)
120{ 124{
125 time_t now = seconds_since_boot();
126 if (now <= detail->flush_time)
127 /* ensure it isn't immediately treated as expired */
128 now = detail->flush_time + 1;
121 head->expiry_time = expiry; 129 head->expiry_time = expiry;
122 head->last_refresh = seconds_since_boot(); 130 head->last_refresh = now;
123 smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */ 131 smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
124 set_bit(CACHE_VALID, &head->flags); 132 set_bit(CACHE_VALID, &head->flags);
125} 133}
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
149 set_bit(CACHE_NEGATIVE, &old->flags); 157 set_bit(CACHE_NEGATIVE, &old->flags);
150 else 158 else
151 detail->update(old, new); 159 detail->update(old, new);
152 cache_fresh_locked(old, new->expiry_time); 160 cache_fresh_locked(old, new->expiry_time, detail);
153 write_unlock(&detail->hash_lock); 161 write_unlock(&detail->hash_lock);
154 cache_fresh_unlocked(old, detail); 162 cache_fresh_unlocked(old, detail);
155 return old; 163 return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
162 cache_put(old, detail); 170 cache_put(old, detail);
163 return NULL; 171 return NULL;
164 } 172 }
165 cache_init(tmp); 173 cache_init(tmp, detail);
166 detail->init(tmp, old); 174 detail->init(tmp, old);
167 175
168 write_lock(&detail->hash_lock); 176 write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
173 hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); 181 hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
174 detail->entries++; 182 detail->entries++;
175 cache_get(tmp); 183 cache_get(tmp);
176 cache_fresh_locked(tmp, new->expiry_time); 184 cache_fresh_locked(tmp, new->expiry_time, detail);
177 cache_fresh_locked(old, 0); 185 cache_fresh_locked(old, 0, detail);
178 write_unlock(&detail->hash_lock); 186 write_unlock(&detail->hash_lock);
179 cache_fresh_unlocked(tmp, detail); 187 cache_fresh_unlocked(tmp, detail);
180 cache_fresh_unlocked(old, detail); 188 cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
219 rv = cache_is_valid(h); 227 rv = cache_is_valid(h);
220 if (rv == -EAGAIN) { 228 if (rv == -EAGAIN) {
221 set_bit(CACHE_NEGATIVE, &h->flags); 229 set_bit(CACHE_NEGATIVE, &h->flags);
222 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); 230 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
231 detail);
223 rv = -ENOENT; 232 rv = -ENOENT;
224 } 233 }
225 write_unlock(&detail->hash_lock); 234 write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
487 496
488void cache_purge(struct cache_detail *detail) 497void cache_purge(struct cache_detail *detail)
489{ 498{
490 detail->flush_time = LONG_MAX; 499 time_t now = seconds_since_boot();
500 if (detail->flush_time >= now)
501 now = detail->flush_time + 1;
502 /* 'now' is the maximum value any 'last_refresh' can have */
503 detail->flush_time = now;
491 detail->nextcheck = seconds_since_boot(); 504 detail->nextcheck = seconds_since_boot();
492 cache_flush(); 505 cache_flush();
493 detail->flush_time = 1;
494} 506}
495EXPORT_SYMBOL_GPL(cache_purge); 507EXPORT_SYMBOL_GPL(cache_purge);
496 508
@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1436{ 1448{
1437 char tbuf[20]; 1449 char tbuf[20];
1438 char *bp, *ep; 1450 char *bp, *ep;
1451 time_t then, now;
1439 1452
1440 if (*ppos || count > sizeof(tbuf)-1) 1453 if (*ppos || count > sizeof(tbuf)-1)
1441 return -EINVAL; 1454 return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1447 return -EINVAL; 1460 return -EINVAL;
1448 1461
1449 bp = tbuf; 1462 bp = tbuf;
1450 cd->flush_time = get_expiry(&bp); 1463 then = get_expiry(&bp);
1451 cd->nextcheck = seconds_since_boot(); 1464 now = seconds_since_boot();
1465 cd->nextcheck = now;
1466 /* Can only set flush_time to 1 second beyond "now", or
1467 * possibly 1 second beyond flushtime. This is because
1468 * flush_time never goes backwards so it mustn't get too far
1469 * ahead of time.
1470 */
1471 if (then >= now) {
1472 /* Want to flush everything, so behave like cache_purge() */
1473 if (cd->flush_time >= now)
1474 now = cd->flush_time + 1;
1475 then = now;
1476 }
1477
1478 cd->flush_time = then;
1452 cache_flush(); 1479 cache_flush();
1453 1480
1454 *ppos += count; 1481 *ppos += count;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a8f579df14d8..bc5b7b5032ca 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1367,11 +1367,6 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1367 /* reset result send buffer "put" position */ 1367 /* reset result send buffer "put" position */
1368 resv->iov_len = 0; 1368 resv->iov_len = 0;
1369 1369
1370 if (rqstp->rq_prot != IPPROTO_TCP) {
1371 printk(KERN_ERR "No support for Non-TCP transports!\n");
1372 BUG();
1373 }
1374
1375 /* 1370 /*
1376 * Skip the next two words because they've already been 1371 * Skip the next two words because they've already been
1377 * processed in the transport 1372 * processed in the transport
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0c8120229a03..1413cdcc131c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -181,7 +181,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
181 struct page **ppage = xdr->pages; 181 struct page **ppage = xdr->pages;
182 size_t base = xdr->page_base; 182 size_t base = xdr->page_base;
183 unsigned int pglen = xdr->page_len; 183 unsigned int pglen = xdr->page_len;
184 unsigned int flags = MSG_MORE; 184 unsigned int flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
185 int slen; 185 int slen;
186 int len = 0; 186 int len = 0;
187 187
@@ -399,6 +399,31 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
399 return svc_port_is_privileged(svc_addr(rqstp)); 399 return svc_port_is_privileged(svc_addr(rqstp));
400} 400}
401 401
402static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
403{
404 if (!wq)
405 return false;
406 /*
407 * There should normally be a memory * barrier here--see
408 * wq_has_sleeper().
409 *
410 * It appears that isn't currently necessary, though, basically
411 * because callers all appear to have sufficient memory barriers
412 * between the time the relevant change is made and the
413 * time they call these callbacks.
414 *
415 * The nfsd code itself doesn't actually explicitly wait on
416 * these waitqueues, but it may wait on them for example in
417 * sendpage() or sendmsg() calls. (And those may be the only
418 * places, since it it uses nonblocking reads.)
419 *
420 * Maybe we should add the memory barriers anyway, but these are
421 * hot paths so we'd need to be convinced there's no sigificant
422 * penalty.
423 */
424 return waitqueue_active(wq);
425}
426
402/* 427/*
403 * INET callback when data has been received on the socket. 428 * INET callback when data has been received on the socket.
404 */ 429 */
@@ -414,7 +439,7 @@ static void svc_udp_data_ready(struct sock *sk)
414 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 439 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
415 svc_xprt_enqueue(&svsk->sk_xprt); 440 svc_xprt_enqueue(&svsk->sk_xprt);
416 } 441 }
417 if (wq && waitqueue_active(wq)) 442 if (sunrpc_waitqueue_active(wq))
418 wake_up_interruptible(wq); 443 wake_up_interruptible(wq);
419} 444}
420 445
@@ -432,7 +457,7 @@ static void svc_write_space(struct sock *sk)
432 svc_xprt_enqueue(&svsk->sk_xprt); 457 svc_xprt_enqueue(&svsk->sk_xprt);
433 } 458 }
434 459
435 if (wq && waitqueue_active(wq)) { 460 if (sunrpc_waitqueue_active(wq)) {
436 dprintk("RPC svc_write_space: someone sleeping on %p\n", 461 dprintk("RPC svc_write_space: someone sleeping on %p\n",
437 svsk); 462 svsk);
438 wake_up_interruptible(wq); 463 wake_up_interruptible(wq);
@@ -787,7 +812,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
787 } 812 }
788 813
789 wq = sk_sleep(sk); 814 wq = sk_sleep(sk);
790 if (wq && waitqueue_active(wq)) 815 if (sunrpc_waitqueue_active(wq))
791 wake_up_interruptible_all(wq); 816 wake_up_interruptible_all(wq);
792} 817}
793 818
@@ -808,7 +833,7 @@ static void svc_tcp_state_change(struct sock *sk)
808 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 833 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
809 svc_xprt_enqueue(&svsk->sk_xprt); 834 svc_xprt_enqueue(&svsk->sk_xprt);
810 } 835 }
811 if (wq && waitqueue_active(wq)) 836 if (sunrpc_waitqueue_active(wq))
812 wake_up_interruptible_all(wq); 837 wake_up_interruptible_all(wq);
813} 838}
814 839
@@ -823,7 +848,7 @@ static void svc_tcp_data_ready(struct sock *sk)
823 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 848 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
824 svc_xprt_enqueue(&svsk->sk_xprt); 849 svc_xprt_enqueue(&svsk->sk_xprt);
825 } 850 }
826 if (wq && waitqueue_active(wq)) 851 if (sunrpc_waitqueue_active(wq))
827 wake_up_interruptible(wq); 852 wake_up_interruptible(wq);
828} 853}
829 854
@@ -1367,7 +1392,6 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
1367 1392
1368/* 1393/*
1369 * Initialize socket for RPC use and create svc_sock struct 1394 * Initialize socket for RPC use and create svc_sock struct
1370 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
1371 */ 1395 */
1372static struct svc_sock *svc_setup_socket(struct svc_serv *serv, 1396static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1373 struct socket *sock, 1397 struct socket *sock,
@@ -1594,7 +1618,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1594 sk->sk_write_space = svsk->sk_owspace; 1618 sk->sk_write_space = svsk->sk_owspace;
1595 1619
1596 wq = sk_sleep(sk); 1620 wq = sk_sleep(sk);
1597 if (wq && waitqueue_active(wq)) 1621 if (sunrpc_waitqueue_active(wq))
1598 wake_up_interruptible(wq); 1622 wake_up_interruptible(wq);
1599} 1623}
1600 1624
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 887f0183b4c6..c88d9bc06f5c 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -76,7 +76,7 @@ static int
76proc_dodebug(struct ctl_table *table, int write, 76proc_dodebug(struct ctl_table *table, int write,
77 void __user *buffer, size_t *lenp, loff_t *ppos) 77 void __user *buffer, size_t *lenp, loff_t *ppos)
78{ 78{
79 char tmpbuf[20], c, *s; 79 char tmpbuf[20], c, *s = NULL;
80 char __user *p; 80 char __user *p;
81 unsigned int value; 81 unsigned int value;
82 size_t left, len; 82 size_t left, len;
@@ -103,23 +103,24 @@ proc_dodebug(struct ctl_table *table, int write,
103 return -EFAULT; 103 return -EFAULT;
104 tmpbuf[left] = '\0'; 104 tmpbuf[left] = '\0';
105 105
106 for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--) 106 value = simple_strtol(tmpbuf, &s, 0);
107 value = 10 * value + (*s - '0'); 107 if (s) {
108 if (*s && !isspace(*s)) 108 left -= (s - tmpbuf);
109 return -EINVAL; 109 if (left && !isspace(*s))
110 while (left && isspace(*s)) 110 return -EINVAL;
111 left--, s++; 111 while (left && isspace(*s))
112 left--, s++;
113 } else
114 left = 0;
112 *(unsigned int *) table->data = value; 115 *(unsigned int *) table->data = value;
113 /* Display the RPC tasks on writing to rpc_debug */ 116 /* Display the RPC tasks on writing to rpc_debug */
114 if (strcmp(table->procname, "rpc_debug") == 0) 117 if (strcmp(table->procname, "rpc_debug") == 0)
115 rpc_show_tasks(&init_net); 118 rpc_show_tasks(&init_net);
116 } else { 119 } else {
117 if (!access_ok(VERIFY_WRITE, buffer, left)) 120 len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data);
118 return -EFAULT;
119 len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
120 if (len > left) 121 if (len > left)
121 len = left; 122 len = left;
122 if (__copy_to_user(buffer, tmpbuf, len)) 123 if (copy_to_user(buffer, tmpbuf, len))
123 return -EFAULT; 124 return -EFAULT;
124 if ((left -= len) > 0) { 125 if ((left -= len) > 0) {
125 if (put_user('\n', (char __user *)buffer + len)) 126 if (put_user('\n', (char __user *)buffer + len))
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 48913de240bd..33f99d3004f2 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -5,3 +5,4 @@ rpcrdma-y := transport.o rpc_rdma.o verbs.o \
5 svc_rdma.o svc_rdma_transport.o \ 5 svc_rdma.o svc_rdma_transport.o \
6 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ 6 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
7 module.o 7 module.o
8rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
new file mode 100644
index 000000000000..2dcb44f69e53
--- /dev/null
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -0,0 +1,394 @@
1/*
2 * Copyright (c) 2015 Oracle. All rights reserved.
3 *
4 * Support for backward direction RPCs on RPC/RDMA.
5 */
6
7#include <linux/module.h>
8#include <linux/sunrpc/xprt.h>
9#include <linux/sunrpc/svc.h>
10#include <linux/sunrpc/svc_xprt.h>
11
12#include "xprt_rdma.h"
13
14#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
15# define RPCDBG_FACILITY RPCDBG_TRANS
16#endif
17
18#define RPCRDMA_BACKCHANNEL_DEBUG
19
20static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
21 struct rpc_rqst *rqst)
22{
23 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
24 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
25
26 spin_lock(&buf->rb_reqslock);
27 list_del(&req->rl_all);
28 spin_unlock(&buf->rb_reqslock);
29
30 rpcrdma_destroy_req(&r_xprt->rx_ia, req);
31
32 kfree(rqst);
33}
34
35static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
36 struct rpc_rqst *rqst)
37{
38 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
39 struct rpcrdma_regbuf *rb;
40 struct rpcrdma_req *req;
41 struct xdr_buf *buf;
42 size_t size;
43
44 req = rpcrdma_create_req(r_xprt);
45 if (!req)
46 return -ENOMEM;
47 req->rl_backchannel = true;
48
49 size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
50 rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
51 if (IS_ERR(rb))
52 goto out_fail;
53 req->rl_rdmabuf = rb;
54
55 size += RPCRDMA_INLINE_READ_THRESHOLD(rqst);
56 rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
57 if (IS_ERR(rb))
58 goto out_fail;
59 rb->rg_owner = req;
60 req->rl_sendbuf = rb;
61 /* so that rpcr_to_rdmar works when receiving a request */
62 rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base;
63
64 buf = &rqst->rq_snd_buf;
65 buf->head[0].iov_base = rqst->rq_buffer;
66 buf->head[0].iov_len = 0;
67 buf->tail[0].iov_base = NULL;
68 buf->tail[0].iov_len = 0;
69 buf->page_len = 0;
70 buf->len = 0;
71 buf->buflen = size;
72
73 return 0;
74
75out_fail:
76 rpcrdma_bc_free_rqst(r_xprt, rqst);
77 return -ENOMEM;
78}
79
80/* Allocate and add receive buffers to the rpcrdma_buffer's
81 * existing list of rep's. These are released when the
82 * transport is destroyed.
83 */
84static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
85 unsigned int count)
86{
87 struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
88 struct rpcrdma_rep *rep;
89 unsigned long flags;
90 int rc = 0;
91
92 while (count--) {
93 rep = rpcrdma_create_rep(r_xprt);
94 if (IS_ERR(rep)) {
95 pr_err("RPC: %s: reply buffer alloc failed\n",
96 __func__);
97 rc = PTR_ERR(rep);
98 break;
99 }
100
101 spin_lock_irqsave(&buffers->rb_lock, flags);
102 list_add(&rep->rr_list, &buffers->rb_recv_bufs);
103 spin_unlock_irqrestore(&buffers->rb_lock, flags);
104 }
105
106 return rc;
107}
108
109/**
110 * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
111 * @xprt: transport associated with these backchannel resources
112 * @reqs: number of concurrent incoming requests to expect
113 *
114 * Returns 0 on success; otherwise a negative errno
115 */
116int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
117{
118 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
119 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
120 struct rpc_rqst *rqst;
121 unsigned int i;
122 int rc;
123
124 /* The backchannel reply path returns each rpc_rqst to the
125 * bc_pa_list _after_ the reply is sent. If the server is
126 * faster than the client, it can send another backward
127 * direction request before the rpc_rqst is returned to the
128 * list. The client rejects the request in this case.
129 *
130 * Twice as many rpc_rqsts are prepared to ensure there is
131 * always an rpc_rqst available as soon as a reply is sent.
132 */
133 if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
134 goto out_err;
135
136 for (i = 0; i < (reqs << 1); i++) {
137 rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
138 if (!rqst) {
139 pr_err("RPC: %s: Failed to create bc rpc_rqst\n",
140 __func__);
141 goto out_free;
142 }
143
144 rqst->rq_xprt = &r_xprt->rx_xprt;
145 INIT_LIST_HEAD(&rqst->rq_list);
146 INIT_LIST_HEAD(&rqst->rq_bc_list);
147
148 if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
149 goto out_free;
150
151 spin_lock_bh(&xprt->bc_pa_lock);
152 list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
153 spin_unlock_bh(&xprt->bc_pa_lock);
154 }
155
156 rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
157 if (rc)
158 goto out_free;
159
160 rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
161 if (rc)
162 goto out_free;
163
164 buffer->rb_bc_srv_max_requests = reqs;
165 request_module("svcrdma");
166
167 return 0;
168
169out_free:
170 xprt_rdma_bc_destroy(xprt, reqs);
171
172out_err:
173 pr_err("RPC: %s: setup backchannel transport failed\n", __func__);
174 return -ENOMEM;
175}
176
177/**
178 * xprt_rdma_bc_up - Create transport endpoint for backchannel service
179 * @serv: server endpoint
180 * @net: network namespace
181 *
182 * The "xprt" is an implied argument: it supplies the name of the
183 * backchannel transport class.
184 *
185 * Returns zero on success, negative errno on failure
186 */
187int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
188{
189 int ret;
190
191 ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0);
192 if (ret < 0)
193 return ret;
194 return 0;
195}
196
197/**
198 * rpcrdma_bc_marshal_reply - Send backwards direction reply
199 * @rqst: buffer containing RPC reply data
200 *
201 * Returns zero on success.
202 */
203int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
204{
205 struct rpc_xprt *xprt = rqst->rq_xprt;
206 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
207 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
208 struct rpcrdma_msg *headerp;
209 size_t rpclen;
210
211 headerp = rdmab_to_msg(req->rl_rdmabuf);
212 headerp->rm_xid = rqst->rq_xid;
213 headerp->rm_vers = rpcrdma_version;
214 headerp->rm_credit =
215 cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
216 headerp->rm_type = rdma_msg;
217 headerp->rm_body.rm_chunks[0] = xdr_zero;
218 headerp->rm_body.rm_chunks[1] = xdr_zero;
219 headerp->rm_body.rm_chunks[2] = xdr_zero;
220
221 rpclen = rqst->rq_svec[0].iov_len;
222
223 pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
224 __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
225 pr_info("RPC: %s: RPC/RDMA: %*ph\n",
226 __func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
227 pr_info("RPC: %s: RPC: %*ph\n",
228 __func__, (int)rpclen, rqst->rq_svec[0].iov_base);
229
230 req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
231 req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
232 req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
233
234 req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
235 req->rl_send_iov[1].length = rpclen;
236 req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
237
238 req->rl_niovs = 2;
239 return 0;
240}
241
242/**
243 * xprt_rdma_bc_destroy - Release resources for handling backchannel requests
244 * @xprt: transport associated with these backchannel resources
245 * @reqs: number of incoming requests to destroy; ignored
246 */
247void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
248{
249 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
250 struct rpc_rqst *rqst, *tmp;
251
252 spin_lock_bh(&xprt->bc_pa_lock);
253 list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
254 list_del(&rqst->rq_bc_pa_list);
255 spin_unlock_bh(&xprt->bc_pa_lock);
256
257 rpcrdma_bc_free_rqst(r_xprt, rqst);
258
259 spin_lock_bh(&xprt->bc_pa_lock);
260 }
261 spin_unlock_bh(&xprt->bc_pa_lock);
262}
263
264/**
265 * xprt_rdma_bc_free_rqst - Release a backchannel rqst
266 * @rqst: request to release
267 */
268void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
269{
270 struct rpc_xprt *xprt = rqst->rq_xprt;
271
272 smp_mb__before_atomic();
273 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
274 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
275 smp_mb__after_atomic();
276
277 spin_lock_bh(&xprt->bc_pa_lock);
278 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
279 spin_unlock_bh(&xprt->bc_pa_lock);
280}
281
282/**
283 * rpcrdma_bc_receive_call - Handle a backward direction call
284 * @xprt: transport receiving the call
285 * @rep: receive buffer containing the call
286 *
287 * Called in the RPC reply handler, which runs in a tasklet.
288 * Be quick about it.
289 *
290 * Operational assumptions:
291 * o Backchannel credits are ignored, just as the NFS server
292 * forechannel currently does
293 * o The ULP manages a replay cache (eg, NFSv4.1 sessions).
294 * No replay detection is done at the transport level
295 */
296void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
297 struct rpcrdma_rep *rep)
298{
299 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
300 struct rpcrdma_msg *headerp;
301 struct svc_serv *bc_serv;
302 struct rpcrdma_req *req;
303 struct rpc_rqst *rqst;
304 struct xdr_buf *buf;
305 size_t size;
306 __be32 *p;
307
308 headerp = rdmab_to_msg(rep->rr_rdmabuf);
309#ifdef RPCRDMA_BACKCHANNEL_DEBUG
310 pr_info("RPC: %s: callback XID %08x, length=%u\n",
311 __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len);
312 pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp);
313#endif
314
315 /* Sanity check:
316 * Need at least enough bytes for RPC/RDMA header, as code
317 * here references the header fields by array offset. Also,
318 * backward calls are always inline, so ensure there
319 * are some bytes beyond the RPC/RDMA header.
320 */
321 if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24)
322 goto out_short;
323 p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN);
324 size = rep->rr_len - RPCRDMA_HDRLEN_MIN;
325
326 /* Grab a free bc rqst */
327 spin_lock(&xprt->bc_pa_lock);
328 if (list_empty(&xprt->bc_pa_list)) {
329 spin_unlock(&xprt->bc_pa_lock);
330 goto out_overflow;
331 }
332 rqst = list_first_entry(&xprt->bc_pa_list,
333 struct rpc_rqst, rq_bc_pa_list);
334 list_del(&rqst->rq_bc_pa_list);
335 spin_unlock(&xprt->bc_pa_lock);
336#ifdef RPCRDMA_BACKCHANNEL_DEBUG
337 pr_info("RPC: %s: using rqst %p\n", __func__, rqst);
338#endif
339
340 /* Prepare rqst */
341 rqst->rq_reply_bytes_recvd = 0;
342 rqst->rq_bytes_sent = 0;
343 rqst->rq_xid = headerp->rm_xid;
344 set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
345
346 buf = &rqst->rq_rcv_buf;
347 memset(buf, 0, sizeof(*buf));
348 buf->head[0].iov_base = p;
349 buf->head[0].iov_len = size;
350 buf->len = size;
351
352 /* The receive buffer has to be hooked to the rpcrdma_req
353 * so that it can be reposted after the server is done
354 * parsing it but just before sending the backward
355 * direction reply.
356 */
357 req = rpcr_to_rdmar(rqst);
358#ifdef RPCRDMA_BACKCHANNEL_DEBUG
359 pr_info("RPC: %s: attaching rep %p to req %p\n",
360 __func__, rep, req);
361#endif
362 req->rl_reply = rep;
363
364 /* Defeat the retransmit detection logic in send_request */
365 req->rl_connect_cookie = 0;
366
367 /* Queue rqst for ULP's callback service */
368 bc_serv = xprt->bc_serv;
369 spin_lock(&bc_serv->sv_cb_lock);
370 list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
371 spin_unlock(&bc_serv->sv_cb_lock);
372
373 wake_up(&bc_serv->sv_cb_waitq);
374
375 r_xprt->rx_stats.bcall_count++;
376 return;
377
378out_overflow:
379 pr_warn("RPC/RDMA backchannel overflow\n");
380 xprt_disconnect_done(xprt);
381 /* This receive buffer gets reposted automatically
382 * when the connection is re-established.
383 */
384 return;
385
386out_short:
387 pr_warn("RPC/RDMA short backward direction call\n");
388
389 if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
390 xprt_disconnect_done(xprt);
391 else
392 pr_warn("RPC: %s: reposting rep %p\n",
393 __func__, rep);
394}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5318951b3b53..88cf9e7269c2 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
151 f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); 151 f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
152 if (IS_ERR(f->fr_mr)) 152 if (IS_ERR(f->fr_mr))
153 goto out_mr_err; 153 goto out_mr_err;
154 f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); 154
155 if (IS_ERR(f->fr_pgl)) 155 f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
156 if (!f->sg)
156 goto out_list_err; 157 goto out_list_err;
158
159 sg_init_table(f->sg, depth);
160
157 return 0; 161 return 0;
158 162
159out_mr_err: 163out_mr_err:
@@ -163,9 +167,9 @@ out_mr_err:
163 return rc; 167 return rc;
164 168
165out_list_err: 169out_list_err:
166 rc = PTR_ERR(f->fr_pgl); 170 rc = -ENOMEM;
167 dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", 171 dprintk("RPC: %s: sg allocation failure\n",
168 __func__, rc); 172 __func__);
169 ib_dereg_mr(f->fr_mr); 173 ib_dereg_mr(f->fr_mr);
170 return rc; 174 return rc;
171} 175}
@@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
179 if (rc) 183 if (rc)
180 dprintk("RPC: %s: ib_dereg_mr status %i\n", 184 dprintk("RPC: %s: ib_dereg_mr status %i\n",
181 __func__, rc); 185 __func__, rc);
182 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); 186 kfree(r->r.frmr.sg);
183} 187}
184 188
185static int 189static int
@@ -252,8 +256,11 @@ frwr_sendcompletion(struct ib_wc *wc)
252 256
253 /* WARNING: Only wr_id and status are reliable at this point */ 257 /* WARNING: Only wr_id and status are reliable at this point */
254 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 258 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
255 pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n", 259 if (wc->status == IB_WC_WR_FLUSH_ERR)
256 __func__, r, ib_wc_status_msg(wc->status), wc->status); 260 dprintk("RPC: %s: frmr %p flushed\n", __func__, r);
261 else
262 pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
263 __func__, r, ib_wc_status_msg(wc->status), wc->status);
257 r->r.frmr.fr_state = FRMR_IS_STALE; 264 r->r.frmr.fr_state = FRMR_IS_STALE;
258} 265}
259 266
@@ -312,13 +319,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
312 struct rpcrdma_mw *mw; 319 struct rpcrdma_mw *mw;
313 struct rpcrdma_frmr *frmr; 320 struct rpcrdma_frmr *frmr;
314 struct ib_mr *mr; 321 struct ib_mr *mr;
315 struct ib_send_wr fastreg_wr, *bad_wr; 322 struct ib_reg_wr reg_wr;
323 struct ib_send_wr *bad_wr;
324 int rc, i, n, dma_nents;
316 u8 key; 325 u8 key;
317 int len, pageoff;
318 int i, rc;
319 int seg_len;
320 u64 pa;
321 int page_no;
322 326
323 mw = seg1->rl_mw; 327 mw = seg1->rl_mw;
324 seg1->rl_mw = NULL; 328 seg1->rl_mw = NULL;
@@ -331,64 +335,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
331 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); 335 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
332 frmr = &mw->r.frmr; 336 frmr = &mw->r.frmr;
333 frmr->fr_state = FRMR_IS_VALID; 337 frmr->fr_state = FRMR_IS_VALID;
338 mr = frmr->fr_mr;
334 339
335 pageoff = offset_in_page(seg1->mr_offset);
336 seg1->mr_offset -= pageoff; /* start of page */
337 seg1->mr_len += pageoff;
338 len = -pageoff;
339 if (nsegs > ia->ri_max_frmr_depth) 340 if (nsegs > ia->ri_max_frmr_depth)
340 nsegs = ia->ri_max_frmr_depth; 341 nsegs = ia->ri_max_frmr_depth;
341 342
342 for (page_no = i = 0; i < nsegs;) { 343 for (i = 0; i < nsegs;) {
343 rpcrdma_map_one(device, seg, direction); 344 if (seg->mr_page)
344 pa = seg->mr_dma; 345 sg_set_page(&frmr->sg[i],
345 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { 346 seg->mr_page,
346 frmr->fr_pgl->page_list[page_no++] = pa; 347 seg->mr_len,
347 pa += PAGE_SIZE; 348 offset_in_page(seg->mr_offset));
348 } 349 else
349 len += seg->mr_len; 350 sg_set_buf(&frmr->sg[i], seg->mr_offset,
351 seg->mr_len);
352
350 ++seg; 353 ++seg;
351 ++i; 354 ++i;
355
352 /* Check for holes */ 356 /* Check for holes */
353 if ((i < nsegs && offset_in_page(seg->mr_offset)) || 357 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
354 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 358 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
355 break; 359 break;
356 } 360 }
357 dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", 361 frmr->sg_nents = i;
358 __func__, mw, i, len); 362
359 363 dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
360 memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 364 if (!dma_nents) {
361 fastreg_wr.wr_id = (unsigned long)(void *)mw; 365 pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n",
362 fastreg_wr.opcode = IB_WR_FAST_REG_MR; 366 __func__, frmr->sg, frmr->sg_nents);
363 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; 367 return -ENOMEM;
364 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; 368 }
365 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 369
366 fastreg_wr.wr.fast_reg.page_list_len = page_no; 370 n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
367 fastreg_wr.wr.fast_reg.length = len; 371 if (unlikely(n != frmr->sg_nents)) {
368 fastreg_wr.wr.fast_reg.access_flags = writing ? 372 pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
369 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 373 __func__, frmr->fr_mr, n, frmr->sg_nents);
370 IB_ACCESS_REMOTE_READ; 374 rc = n < 0 ? n : -EINVAL;
371 mr = frmr->fr_mr; 375 goto out_senderr;
376 }
377
378 dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
379 __func__, mw, frmr->sg_nents, mr->length);
380
372 key = (u8)(mr->rkey & 0x000000FF); 381 key = (u8)(mr->rkey & 0x000000FF);
373 ib_update_fast_reg_key(mr, ++key); 382 ib_update_fast_reg_key(mr, ++key);
374 fastreg_wr.wr.fast_reg.rkey = mr->rkey; 383
384 reg_wr.wr.next = NULL;
385 reg_wr.wr.opcode = IB_WR_REG_MR;
386 reg_wr.wr.wr_id = (uintptr_t)mw;
387 reg_wr.wr.num_sge = 0;
388 reg_wr.wr.send_flags = 0;
389 reg_wr.mr = mr;
390 reg_wr.key = mr->rkey;
391 reg_wr.access = writing ?
392 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
393 IB_ACCESS_REMOTE_READ;
375 394
376 DECR_CQCOUNT(&r_xprt->rx_ep); 395 DECR_CQCOUNT(&r_xprt->rx_ep);
377 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); 396 rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr);
378 if (rc) 397 if (rc)
379 goto out_senderr; 398 goto out_senderr;
380 399
400 seg1->mr_dir = direction;
381 seg1->rl_mw = mw; 401 seg1->rl_mw = mw;
382 seg1->mr_rkey = mr->rkey; 402 seg1->mr_rkey = mr->rkey;
383 seg1->mr_base = seg1->mr_dma + pageoff; 403 seg1->mr_base = mr->iova;
384 seg1->mr_nsegs = i; 404 seg1->mr_nsegs = frmr->sg_nents;
385 seg1->mr_len = len; 405 seg1->mr_len = mr->length;
386 return i; 406
407 return frmr->sg_nents;
387 408
388out_senderr: 409out_senderr:
389 dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); 410 dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
390 while (i--) 411 ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction);
391 rpcrdma_unmap_one(device, --seg);
392 __frwr_queue_recovery(mw); 412 __frwr_queue_recovery(mw);
393 return rc; 413 return rc;
394} 414}
@@ -402,22 +422,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
402 struct rpcrdma_mr_seg *seg1 = seg; 422 struct rpcrdma_mr_seg *seg1 = seg;
403 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 423 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
404 struct rpcrdma_mw *mw = seg1->rl_mw; 424 struct rpcrdma_mw *mw = seg1->rl_mw;
425 struct rpcrdma_frmr *frmr = &mw->r.frmr;
405 struct ib_send_wr invalidate_wr, *bad_wr; 426 struct ib_send_wr invalidate_wr, *bad_wr;
406 int rc, nsegs = seg->mr_nsegs; 427 int rc, nsegs = seg->mr_nsegs;
407 428
408 dprintk("RPC: %s: FRMR %p\n", __func__, mw); 429 dprintk("RPC: %s: FRMR %p\n", __func__, mw);
409 430
410 seg1->rl_mw = NULL; 431 seg1->rl_mw = NULL;
411 mw->r.frmr.fr_state = FRMR_IS_INVALID; 432 frmr->fr_state = FRMR_IS_INVALID;
412 433
413 memset(&invalidate_wr, 0, sizeof(invalidate_wr)); 434 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
414 invalidate_wr.wr_id = (unsigned long)(void *)mw; 435 invalidate_wr.wr_id = (unsigned long)(void *)mw;
415 invalidate_wr.opcode = IB_WR_LOCAL_INV; 436 invalidate_wr.opcode = IB_WR_LOCAL_INV;
416 invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; 437 invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey;
417 DECR_CQCOUNT(&r_xprt->rx_ep); 438 DECR_CQCOUNT(&r_xprt->rx_ep);
418 439
419 while (seg1->mr_nsegs--) 440 ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
420 rpcrdma_unmap_one(ia->ri_device, seg++);
421 read_lock(&ia->ri_qplock); 441 read_lock(&ia->ri_qplock);
422 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); 442 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
423 read_unlock(&ia->ri_qplock); 443 read_unlock(&ia->ri_qplock);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index bc8bd6577467..c10d9699441c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -441,6 +441,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
441 enum rpcrdma_chunktype rtype, wtype; 441 enum rpcrdma_chunktype rtype, wtype;
442 struct rpcrdma_msg *headerp; 442 struct rpcrdma_msg *headerp;
443 443
444#if defined(CONFIG_SUNRPC_BACKCHANNEL)
445 if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
446 return rpcrdma_bc_marshal_reply(rqst);
447#endif
448
444 /* 449 /*
445 * rpclen gets amount of data in first buffer, which is the 450 * rpclen gets amount of data in first buffer, which is the
446 * pre-registered buffer. 451 * pre-registered buffer.
@@ -711,6 +716,37 @@ rpcrdma_connect_worker(struct work_struct *work)
711 spin_unlock_bh(&xprt->transport_lock); 716 spin_unlock_bh(&xprt->transport_lock);
712} 717}
713 718
719#if defined(CONFIG_SUNRPC_BACKCHANNEL)
720/* By convention, backchannel calls arrive via rdma_msg type
721 * messages, and never populate the chunk lists. This makes
722 * the RPC/RDMA header small and fixed in size, so it is
723 * straightforward to check the RPC header's direction field.
724 */
725static bool
726rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
727{
728 __be32 *p = (__be32 *)headerp;
729
730 if (headerp->rm_type != rdma_msg)
731 return false;
732 if (headerp->rm_body.rm_chunks[0] != xdr_zero)
733 return false;
734 if (headerp->rm_body.rm_chunks[1] != xdr_zero)
735 return false;
736 if (headerp->rm_body.rm_chunks[2] != xdr_zero)
737 return false;
738
739 /* sanity */
740 if (p[7] != headerp->rm_xid)
741 return false;
742 /* call direction */
743 if (p[8] != cpu_to_be32(RPC_CALL))
744 return false;
745
746 return true;
747}
748#endif /* CONFIG_SUNRPC_BACKCHANNEL */
749
714/* 750/*
715 * This function is called when an async event is posted to 751 * This function is called when an async event is posted to
716 * the connection which changes the connection state. All it 752 * the connection which changes the connection state. All it
@@ -723,8 +759,8 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
723 schedule_delayed_work(&ep->rep_connect_worker, 0); 759 schedule_delayed_work(&ep->rep_connect_worker, 0);
724} 760}
725 761
726/* 762/* Process received RPC/RDMA messages.
727 * Called as a tasklet to do req/reply match and complete a request 763 *
728 * Errors must result in the RPC task either being awakened, or 764 * Errors must result in the RPC task either being awakened, or
729 * allowed to timeout, to discover the errors at that time. 765 * allowed to timeout, to discover the errors at that time.
730 */ 766 */
@@ -741,52 +777,32 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
741 unsigned long cwnd; 777 unsigned long cwnd;
742 u32 credits; 778 u32 credits;
743 779
744 /* Check status. If bad, signal disconnect and return rep to pool */ 780 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
745 if (rep->rr_len == ~0U) { 781
746 rpcrdma_recv_buffer_put(rep); 782 if (rep->rr_len == RPCRDMA_BAD_LEN)
747 if (r_xprt->rx_ep.rep_connected == 1) { 783 goto out_badstatus;
748 r_xprt->rx_ep.rep_connected = -EIO; 784 if (rep->rr_len < RPCRDMA_HDRLEN_MIN)
749 rpcrdma_conn_func(&r_xprt->rx_ep); 785 goto out_shortreply;
750 } 786
751 return;
752 }
753 if (rep->rr_len < RPCRDMA_HDRLEN_MIN) {
754 dprintk("RPC: %s: short/invalid reply\n", __func__);
755 goto repost;
756 }
757 headerp = rdmab_to_msg(rep->rr_rdmabuf); 787 headerp = rdmab_to_msg(rep->rr_rdmabuf);
758 if (headerp->rm_vers != rpcrdma_version) { 788 if (headerp->rm_vers != rpcrdma_version)
759 dprintk("RPC: %s: invalid version %d\n", 789 goto out_badversion;
760 __func__, be32_to_cpu(headerp->rm_vers)); 790#if defined(CONFIG_SUNRPC_BACKCHANNEL)
761 goto repost; 791 if (rpcrdma_is_bcall(headerp))
762 } 792 goto out_bcall;
793#endif
763 794
764 /* Get XID and try for a match. */ 795 /* Match incoming rpcrdma_rep to an rpcrdma_req to
765 spin_lock(&xprt->transport_lock); 796 * get context for handling any incoming chunks.
797 */
798 spin_lock_bh(&xprt->transport_lock);
766 rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); 799 rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
767 if (rqst == NULL) { 800 if (!rqst)
768 spin_unlock(&xprt->transport_lock); 801 goto out_nomatch;
769 dprintk("RPC: %s: reply 0x%p failed "
770 "to match any request xid 0x%08x len %d\n",
771 __func__, rep, be32_to_cpu(headerp->rm_xid),
772 rep->rr_len);
773repost:
774 r_xprt->rx_stats.bad_reply_count++;
775 if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
776 rpcrdma_recv_buffer_put(rep);
777 802
778 return;
779 }
780
781 /* get request object */
782 req = rpcr_to_rdmar(rqst); 803 req = rpcr_to_rdmar(rqst);
783 if (req->rl_reply) { 804 if (req->rl_reply)
784 spin_unlock(&xprt->transport_lock); 805 goto out_duplicate;
785 dprintk("RPC: %s: duplicate reply 0x%p to RPC "
786 "request 0x%p: xid 0x%08x\n", __func__, rep, req,
787 be32_to_cpu(headerp->rm_xid));
788 goto repost;
789 }
790 806
791 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" 807 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
792 " RPC request 0x%p xid 0x%08x\n", 808 " RPC request 0x%p xid 0x%08x\n",
@@ -883,8 +899,50 @@ badheader:
883 if (xprt->cwnd > cwnd) 899 if (xprt->cwnd > cwnd)
884 xprt_release_rqst_cong(rqst->rq_task); 900 xprt_release_rqst_cong(rqst->rq_task);
885 901
902 xprt_complete_rqst(rqst->rq_task, status);
903 spin_unlock_bh(&xprt->transport_lock);
886 dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", 904 dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
887 __func__, xprt, rqst, status); 905 __func__, xprt, rqst, status);
888 xprt_complete_rqst(rqst->rq_task, status); 906 return;
889 spin_unlock(&xprt->transport_lock); 907
908out_badstatus:
909 rpcrdma_recv_buffer_put(rep);
910 if (r_xprt->rx_ep.rep_connected == 1) {
911 r_xprt->rx_ep.rep_connected = -EIO;
912 rpcrdma_conn_func(&r_xprt->rx_ep);
913 }
914 return;
915
916#if defined(CONFIG_SUNRPC_BACKCHANNEL)
917out_bcall:
918 rpcrdma_bc_receive_call(r_xprt, rep);
919 return;
920#endif
921
922out_shortreply:
923 dprintk("RPC: %s: short/invalid reply\n", __func__);
924 goto repost;
925
926out_badversion:
927 dprintk("RPC: %s: invalid version %d\n",
928 __func__, be32_to_cpu(headerp->rm_vers));
929 goto repost;
930
931out_nomatch:
932 spin_unlock_bh(&xprt->transport_lock);
933 dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n",
934 __func__, be32_to_cpu(headerp->rm_xid),
935 rep->rr_len);
936 goto repost;
937
938out_duplicate:
939 spin_unlock_bh(&xprt->transport_lock);
940 dprintk("RPC: %s: "
941 "duplicate reply %p to RPC request %p: xid 0x%08x\n",
942 __func__, rep, req, be32_to_cpu(headerp->rm_xid));
943
944repost:
945 r_xprt->rx_stats.bad_reply_count++;
946 if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
947 rpcrdma_recv_buffer_put(rep);
890} 948}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 2cd252f023a5..1b7051bdbdc8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -239,6 +239,9 @@ void svc_rdma_cleanup(void)
239 unregister_sysctl_table(svcrdma_table_header); 239 unregister_sysctl_table(svcrdma_table_header);
240 svcrdma_table_header = NULL; 240 svcrdma_table_header = NULL;
241 } 241 }
242#if defined(CONFIG_SUNRPC_BACKCHANNEL)
243 svc_unreg_xprt_class(&svc_rdma_bc_class);
244#endif
242 svc_unreg_xprt_class(&svc_rdma_class); 245 svc_unreg_xprt_class(&svc_rdma_class);
243 kmem_cache_destroy(svc_rdma_map_cachep); 246 kmem_cache_destroy(svc_rdma_map_cachep);
244 kmem_cache_destroy(svc_rdma_ctxt_cachep); 247 kmem_cache_destroy(svc_rdma_ctxt_cachep);
@@ -286,6 +289,9 @@ int svc_rdma_init(void)
286 289
287 /* Register RDMA with the SVC transport switch */ 290 /* Register RDMA with the SVC transport switch */
288 svc_reg_xprt_class(&svc_rdma_class); 291 svc_reg_xprt_class(&svc_rdma_class);
292#if defined(CONFIG_SUNRPC_BACKCHANNEL)
293 svc_reg_xprt_class(&svc_rdma_bc_class);
294#endif
289 return 0; 295 return 0;
290 err1: 296 err1:
291 kmem_cache_destroy(svc_rdma_map_cachep); 297 kmem_cache_destroy(svc_rdma_map_cachep);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 5f6ca47092b0..ff4f01e527ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
126 u64 rs_offset, 126 u64 rs_offset,
127 bool last) 127 bool last)
128{ 128{
129 struct ib_send_wr read_wr; 129 struct ib_rdma_wr read_wr;
130 int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; 130 int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
131 struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); 131 struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
132 int ret, read, pno; 132 int ret, read, pno;
@@ -180,16 +180,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
180 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 180 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
181 181
182 memset(&read_wr, 0, sizeof(read_wr)); 182 memset(&read_wr, 0, sizeof(read_wr));
183 read_wr.wr_id = (unsigned long)ctxt; 183 read_wr.wr.wr_id = (unsigned long)ctxt;
184 read_wr.opcode = IB_WR_RDMA_READ; 184 read_wr.wr.opcode = IB_WR_RDMA_READ;
185 ctxt->wr_op = read_wr.opcode; 185 ctxt->wr_op = read_wr.wr.opcode;
186 read_wr.send_flags = IB_SEND_SIGNALED; 186 read_wr.wr.send_flags = IB_SEND_SIGNALED;
187 read_wr.wr.rdma.rkey = rs_handle; 187 read_wr.rkey = rs_handle;
188 read_wr.wr.rdma.remote_addr = rs_offset; 188 read_wr.remote_addr = rs_offset;
189 read_wr.sg_list = ctxt->sge; 189 read_wr.wr.sg_list = ctxt->sge;
190 read_wr.num_sge = pages_needed; 190 read_wr.wr.num_sge = pages_needed;
191 191
192 ret = svc_rdma_send(xprt, &read_wr); 192 ret = svc_rdma_send(xprt, &read_wr.wr);
193 if (ret) { 193 if (ret) {
194 pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); 194 pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
195 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 195 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -219,14 +219,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
219 u64 rs_offset, 219 u64 rs_offset,
220 bool last) 220 bool last)
221{ 221{
222 struct ib_send_wr read_wr; 222 struct ib_rdma_wr read_wr;
223 struct ib_send_wr inv_wr; 223 struct ib_send_wr inv_wr;
224 struct ib_send_wr fastreg_wr; 224 struct ib_reg_wr reg_wr;
225 u8 key; 225 u8 key;
226 int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; 226 int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
227 struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); 227 struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
228 struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); 228 struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
229 int ret, read, pno; 229 int ret, read, pno, dma_nents, n;
230 u32 pg_off = *page_offset; 230 u32 pg_off = *page_offset;
231 u32 pg_no = *page_no; 231 u32 pg_no = *page_no;
232 232
@@ -235,17 +235,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
235 235
236 ctxt->direction = DMA_FROM_DEVICE; 236 ctxt->direction = DMA_FROM_DEVICE;
237 ctxt->frmr = frmr; 237 ctxt->frmr = frmr;
238 pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); 238 nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
239 read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, 239 read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
240 rs_length);
241 240
242 frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
243 frmr->direction = DMA_FROM_DEVICE; 241 frmr->direction = DMA_FROM_DEVICE;
244 frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); 242 frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
245 frmr->map_len = pages_needed << PAGE_SHIFT; 243 frmr->sg_nents = nents;
246 frmr->page_list_len = pages_needed;
247 244
248 for (pno = 0; pno < pages_needed; pno++) { 245 for (pno = 0; pno < nents; pno++) {
249 int len = min_t(int, rs_length, PAGE_SIZE - pg_off); 246 int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
250 247
251 head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; 248 head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -253,17 +250,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
253 head->arg.len += len; 250 head->arg.len += len;
254 if (!pg_off) 251 if (!pg_off)
255 head->count++; 252 head->count++;
253
254 sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
255 len, pg_off);
256
256 rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; 257 rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
257 rqstp->rq_next_page = rqstp->rq_respages + 1; 258 rqstp->rq_next_page = rqstp->rq_respages + 1;
258 frmr->page_list->page_list[pno] =
259 ib_dma_map_page(xprt->sc_cm_id->device,
260 head->arg.pages[pg_no], 0,
261 PAGE_SIZE, DMA_FROM_DEVICE);
262 ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
263 frmr->page_list->page_list[pno]);
264 if (ret)
265 goto err;
266 atomic_inc(&xprt->sc_dma_used);
267 259
268 /* adjust offset and wrap to next page if needed */ 260 /* adjust offset and wrap to next page if needed */
269 pg_off += len; 261 pg_off += len;
@@ -279,43 +271,57 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
279 else 271 else
280 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 272 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
281 273
274 dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
275 frmr->sg, frmr->sg_nents,
276 frmr->direction);
277 if (!dma_nents) {
278 pr_err("svcrdma: failed to dma map sg %p\n",
279 frmr->sg);
280 return -ENOMEM;
281 }
282 atomic_inc(&xprt->sc_dma_used);
283
284 n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
285 if (unlikely(n != frmr->sg_nents)) {
286 pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
287 frmr->mr, n, frmr->sg_nents);
288 return n < 0 ? n : -EINVAL;
289 }
290
282 /* Bump the key */ 291 /* Bump the key */
283 key = (u8)(frmr->mr->lkey & 0x000000FF); 292 key = (u8)(frmr->mr->lkey & 0x000000FF);
284 ib_update_fast_reg_key(frmr->mr, ++key); 293 ib_update_fast_reg_key(frmr->mr, ++key);
285 294
286 ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; 295 ctxt->sge[0].addr = frmr->mr->iova;
287 ctxt->sge[0].lkey = frmr->mr->lkey; 296 ctxt->sge[0].lkey = frmr->mr->lkey;
288 ctxt->sge[0].length = read; 297 ctxt->sge[0].length = frmr->mr->length;
289 ctxt->count = 1; 298 ctxt->count = 1;
290 ctxt->read_hdr = head; 299 ctxt->read_hdr = head;
291 300
292 /* Prepare FASTREG WR */ 301 /* Prepare REG WR */
293 memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 302 reg_wr.wr.opcode = IB_WR_REG_MR;
294 fastreg_wr.opcode = IB_WR_FAST_REG_MR; 303 reg_wr.wr.wr_id = 0;
295 fastreg_wr.send_flags = IB_SEND_SIGNALED; 304 reg_wr.wr.send_flags = IB_SEND_SIGNALED;
296 fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; 305 reg_wr.wr.num_sge = 0;
297 fastreg_wr.wr.fast_reg.page_list = frmr->page_list; 306 reg_wr.mr = frmr->mr;
298 fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; 307 reg_wr.key = frmr->mr->lkey;
299 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 308 reg_wr.access = frmr->access_flags;
300 fastreg_wr.wr.fast_reg.length = frmr->map_len; 309 reg_wr.wr.next = &read_wr.wr;
301 fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
302 fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
303 fastreg_wr.next = &read_wr;
304 310
305 /* Prepare RDMA_READ */ 311 /* Prepare RDMA_READ */
306 memset(&read_wr, 0, sizeof(read_wr)); 312 memset(&read_wr, 0, sizeof(read_wr));
307 read_wr.send_flags = IB_SEND_SIGNALED; 313 read_wr.wr.send_flags = IB_SEND_SIGNALED;
308 read_wr.wr.rdma.rkey = rs_handle; 314 read_wr.rkey = rs_handle;
309 read_wr.wr.rdma.remote_addr = rs_offset; 315 read_wr.remote_addr = rs_offset;
310 read_wr.sg_list = ctxt->sge; 316 read_wr.wr.sg_list = ctxt->sge;
311 read_wr.num_sge = 1; 317 read_wr.wr.num_sge = 1;
312 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { 318 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
313 read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 319 read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
314 read_wr.wr_id = (unsigned long)ctxt; 320 read_wr.wr.wr_id = (unsigned long)ctxt;
315 read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; 321 read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
316 } else { 322 } else {
317 read_wr.opcode = IB_WR_RDMA_READ; 323 read_wr.wr.opcode = IB_WR_RDMA_READ;
318 read_wr.next = &inv_wr; 324 read_wr.wr.next = &inv_wr;
319 /* Prepare invalidate */ 325 /* Prepare invalidate */
320 memset(&inv_wr, 0, sizeof(inv_wr)); 326 memset(&inv_wr, 0, sizeof(inv_wr));
321 inv_wr.wr_id = (unsigned long)ctxt; 327 inv_wr.wr_id = (unsigned long)ctxt;
@@ -323,10 +329,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
323 inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; 329 inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
324 inv_wr.ex.invalidate_rkey = frmr->mr->lkey; 330 inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
325 } 331 }
326 ctxt->wr_op = read_wr.opcode; 332 ctxt->wr_op = read_wr.wr.opcode;
327 333
328 /* Post the chain */ 334 /* Post the chain */
329 ret = svc_rdma_send(xprt, &fastreg_wr); 335 ret = svc_rdma_send(xprt, &reg_wr.wr);
330 if (ret) { 336 if (ret) {
331 pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); 337 pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
332 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 338 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -340,7 +346,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
340 atomic_inc(&rdma_stat_read); 346 atomic_inc(&rdma_stat_read);
341 return ret; 347 return ret;
342 err: 348 err:
343 svc_rdma_unmap_dma(ctxt); 349 ib_dma_unmap_sg(xprt->sc_cm_id->device,
350 frmr->sg, frmr->sg_nents, frmr->direction);
344 svc_rdma_put_context(ctxt, 0); 351 svc_rdma_put_context(ctxt, 0);
345 svc_rdma_put_frmr(xprt, frmr); 352 svc_rdma_put_frmr(xprt, frmr);
346 return ret; 353 return ret;
@@ -533,7 +540,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
533 rqstp->rq_arg.page_base = head->arg.page_base; 540 rqstp->rq_arg.page_base = head->arg.page_base;
534 541
535 /* rq_respages starts after the last arg page */ 542 /* rq_respages starts after the last arg page */
536 rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; 543 rqstp->rq_respages = &rqstp->rq_pages[page_no];
537 rqstp->rq_next_page = rqstp->rq_respages + 1; 544 rqstp->rq_next_page = rqstp->rq_respages + 1;
538 545
539 /* Rebuild rq_arg head and tail. */ 546 /* Rebuild rq_arg head and tail. */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1dfae8317065..969a1ab75fc3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
217 u32 xdr_off, int write_len, 217 u32 xdr_off, int write_len,
218 struct svc_rdma_req_map *vec) 218 struct svc_rdma_req_map *vec)
219{ 219{
220 struct ib_send_wr write_wr; 220 struct ib_rdma_wr write_wr;
221 struct ib_sge *sge; 221 struct ib_sge *sge;
222 int xdr_sge_no; 222 int xdr_sge_no;
223 int sge_no; 223 int sge_no;
@@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
282 /* Prepare WRITE WR */ 282 /* Prepare WRITE WR */
283 memset(&write_wr, 0, sizeof write_wr); 283 memset(&write_wr, 0, sizeof write_wr);
284 ctxt->wr_op = IB_WR_RDMA_WRITE; 284 ctxt->wr_op = IB_WR_RDMA_WRITE;
285 write_wr.wr_id = (unsigned long)ctxt; 285 write_wr.wr.wr_id = (unsigned long)ctxt;
286 write_wr.sg_list = &sge[0]; 286 write_wr.wr.sg_list = &sge[0];
287 write_wr.num_sge = sge_no; 287 write_wr.wr.num_sge = sge_no;
288 write_wr.opcode = IB_WR_RDMA_WRITE; 288 write_wr.wr.opcode = IB_WR_RDMA_WRITE;
289 write_wr.send_flags = IB_SEND_SIGNALED; 289 write_wr.wr.send_flags = IB_SEND_SIGNALED;
290 write_wr.wr.rdma.rkey = rmr; 290 write_wr.rkey = rmr;
291 write_wr.wr.rdma.remote_addr = to; 291 write_wr.remote_addr = to;
292 292
293 /* Post It */ 293 /* Post It */
294 atomic_inc(&rdma_stat_write); 294 atomic_inc(&rdma_stat_write);
295 if (svc_rdma_send(xprt, &write_wr)) 295 if (svc_rdma_send(xprt, &write_wr.wr))
296 goto err; 296 goto err;
297 return write_len - bc; 297 return write_len - bc;
298 err: 298 err:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fcc3eb80c265..b348b4adef29 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -56,6 +56,7 @@
56 56
57#define RPCDBG_FACILITY RPCDBG_SVCXPRT 57#define RPCDBG_FACILITY RPCDBG_SVCXPRT
58 58
59static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
59static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 60static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
60 struct net *net, 61 struct net *net,
61 struct sockaddr *sa, int salen, 62 struct sockaddr *sa, int salen,
@@ -95,6 +96,63 @@ struct svc_xprt_class svc_rdma_class = {
95 .xcl_ident = XPRT_TRANSPORT_RDMA, 96 .xcl_ident = XPRT_TRANSPORT_RDMA,
96}; 97};
97 98
99#if defined(CONFIG_SUNRPC_BACKCHANNEL)
100static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
101 struct sockaddr *, int, int);
102static void svc_rdma_bc_detach(struct svc_xprt *);
103static void svc_rdma_bc_free(struct svc_xprt *);
104
105static struct svc_xprt_ops svc_rdma_bc_ops = {
106 .xpo_create = svc_rdma_bc_create,
107 .xpo_detach = svc_rdma_bc_detach,
108 .xpo_free = svc_rdma_bc_free,
109 .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
110 .xpo_secure_port = svc_rdma_secure_port,
111};
112
113struct svc_xprt_class svc_rdma_bc_class = {
114 .xcl_name = "rdma-bc",
115 .xcl_owner = THIS_MODULE,
116 .xcl_ops = &svc_rdma_bc_ops,
117 .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
118};
119
120static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
121 struct net *net,
122 struct sockaddr *sa, int salen,
123 int flags)
124{
125 struct svcxprt_rdma *cma_xprt;
126 struct svc_xprt *xprt;
127
128 cma_xprt = rdma_create_xprt(serv, 0);
129 if (!cma_xprt)
130 return ERR_PTR(-ENOMEM);
131 xprt = &cma_xprt->sc_xprt;
132
133 svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
134 serv->sv_bc_xprt = xprt;
135
136 dprintk("svcrdma: %s(%p)\n", __func__, xprt);
137 return xprt;
138}
139
140static void svc_rdma_bc_detach(struct svc_xprt *xprt)
141{
142 dprintk("svcrdma: %s(%p)\n", __func__, xprt);
143}
144
145static void svc_rdma_bc_free(struct svc_xprt *xprt)
146{
147 struct svcxprt_rdma *rdma =
148 container_of(xprt, struct svcxprt_rdma, sc_xprt);
149
150 dprintk("svcrdma: %s(%p)\n", __func__, xprt);
151 if (xprt)
152 kfree(rdma);
153}
154#endif /* CONFIG_SUNRPC_BACKCHANNEL */
155
98struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 156struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
99{ 157{
100 struct svc_rdma_op_ctxt *ctxt; 158 struct svc_rdma_op_ctxt *ctxt;
@@ -692,8 +750,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
692 if (!cma_xprt) 750 if (!cma_xprt)
693 return ERR_PTR(-ENOMEM); 751 return ERR_PTR(-ENOMEM);
694 752
695 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, 753 listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
696 IB_QPT_RC); 754 RDMA_PS_TCP, IB_QPT_RC);
697 if (IS_ERR(listen_id)) { 755 if (IS_ERR(listen_id)) {
698 ret = PTR_ERR(listen_id); 756 ret = PTR_ERR(listen_id);
699 dprintk("svcrdma: rdma_create_id failed = %d\n", ret); 757 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -732,7 +790,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
732static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) 790static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
733{ 791{
734 struct ib_mr *mr; 792 struct ib_mr *mr;
735 struct ib_fast_reg_page_list *pl; 793 struct scatterlist *sg;
736 struct svc_rdma_fastreg_mr *frmr; 794 struct svc_rdma_fastreg_mr *frmr;
737 u32 num_sg; 795 u32 num_sg;
738 796
@@ -745,13 +803,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
745 if (IS_ERR(mr)) 803 if (IS_ERR(mr))
746 goto err_free_frmr; 804 goto err_free_frmr;
747 805
748 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, 806 sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
749 num_sg); 807 if (!sg)
750 if (IS_ERR(pl))
751 goto err_free_mr; 808 goto err_free_mr;
752 809
810 sg_init_table(sg, RPCSVC_MAXPAGES);
811
753 frmr->mr = mr; 812 frmr->mr = mr;
754 frmr->page_list = pl; 813 frmr->sg = sg;
755 INIT_LIST_HEAD(&frmr->frmr_list); 814 INIT_LIST_HEAD(&frmr->frmr_list);
756 return frmr; 815 return frmr;
757 816
@@ -771,8 +830,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
771 frmr = list_entry(xprt->sc_frmr_q.next, 830 frmr = list_entry(xprt->sc_frmr_q.next,
772 struct svc_rdma_fastreg_mr, frmr_list); 831 struct svc_rdma_fastreg_mr, frmr_list);
773 list_del_init(&frmr->frmr_list); 832 list_del_init(&frmr->frmr_list);
833 kfree(frmr->sg);
774 ib_dereg_mr(frmr->mr); 834 ib_dereg_mr(frmr->mr);
775 ib_free_fast_reg_page_list(frmr->page_list);
776 kfree(frmr); 835 kfree(frmr);
777 } 836 }
778} 837}
@@ -786,8 +845,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
786 frmr = list_entry(rdma->sc_frmr_q.next, 845 frmr = list_entry(rdma->sc_frmr_q.next,
787 struct svc_rdma_fastreg_mr, frmr_list); 846 struct svc_rdma_fastreg_mr, frmr_list);
788 list_del_init(&frmr->frmr_list); 847 list_del_init(&frmr->frmr_list);
789 frmr->map_len = 0; 848 frmr->sg_nents = 0;
790 frmr->page_list_len = 0;
791 } 849 }
792 spin_unlock_bh(&rdma->sc_frmr_q_lock); 850 spin_unlock_bh(&rdma->sc_frmr_q_lock);
793 if (frmr) 851 if (frmr)
@@ -796,25 +854,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
796 return rdma_alloc_frmr(rdma); 854 return rdma_alloc_frmr(rdma);
797} 855}
798 856
799static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
800 struct svc_rdma_fastreg_mr *frmr)
801{
802 int page_no;
803 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
804 dma_addr_t addr = frmr->page_list->page_list[page_no];
805 if (ib_dma_mapping_error(frmr->mr->device, addr))
806 continue;
807 atomic_dec(&xprt->sc_dma_used);
808 ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
809 frmr->direction);
810 }
811}
812
813void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, 857void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
814 struct svc_rdma_fastreg_mr *frmr) 858 struct svc_rdma_fastreg_mr *frmr)
815{ 859{
816 if (frmr) { 860 if (frmr) {
817 frmr_unmap_dma(rdma, frmr); 861 ib_dma_unmap_sg(rdma->sc_cm_id->device,
862 frmr->sg, frmr->sg_nents, frmr->direction);
863 atomic_dec(&rdma->sc_dma_used);
818 spin_lock_bh(&rdma->sc_frmr_q_lock); 864 spin_lock_bh(&rdma->sc_frmr_q_lock);
819 WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); 865 WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
820 list_add(&frmr->frmr_list, &rdma->sc_frmr_q); 866 list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 41e452bc580c..8c545f7d7525 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -676,7 +676,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
676static int 676static int
677xprt_rdma_enable_swap(struct rpc_xprt *xprt) 677xprt_rdma_enable_swap(struct rpc_xprt *xprt)
678{ 678{
679 return -EINVAL; 679 return 0;
680} 680}
681 681
682static void 682static void
@@ -705,7 +705,13 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
705 .print_stats = xprt_rdma_print_stats, 705 .print_stats = xprt_rdma_print_stats,
706 .enable_swap = xprt_rdma_enable_swap, 706 .enable_swap = xprt_rdma_enable_swap,
707 .disable_swap = xprt_rdma_disable_swap, 707 .disable_swap = xprt_rdma_disable_swap,
708 .inject_disconnect = xprt_rdma_inject_disconnect 708 .inject_disconnect = xprt_rdma_inject_disconnect,
709#if defined(CONFIG_SUNRPC_BACKCHANNEL)
710 .bc_setup = xprt_rdma_bc_setup,
711 .bc_up = xprt_rdma_bc_up,
712 .bc_free_rqst = xprt_rdma_bc_free_rqst,
713 .bc_destroy = xprt_rdma_bc_destroy,
714#endif
709}; 715};
710 716
711static struct xprt_class xprt_rdma = { 717static struct xprt_class xprt_rdma = {
@@ -732,6 +738,7 @@ void xprt_rdma_cleanup(void)
732 dprintk("RPC: %s: xprt_unregister returned %i\n", 738 dprintk("RPC: %s: xprt_unregister returned %i\n",
733 __func__, rc); 739 __func__, rc);
734 740
741 rpcrdma_destroy_wq();
735 frwr_destroy_recovery_wq(); 742 frwr_destroy_recovery_wq();
736} 743}
737 744
@@ -743,8 +750,15 @@ int xprt_rdma_init(void)
743 if (rc) 750 if (rc)
744 return rc; 751 return rc;
745 752
753 rc = rpcrdma_alloc_wq();
754 if (rc) {
755 frwr_destroy_recovery_wq();
756 return rc;
757 }
758
746 rc = xprt_register_transport(&xprt_rdma); 759 rc = xprt_register_transport(&xprt_rdma);
747 if (rc) { 760 if (rc) {
761 rpcrdma_destroy_wq();
748 frwr_destroy_recovery_wq(); 762 frwr_destroy_recovery_wq();
749 return rc; 763 return rc;
750 } 764 }
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8a477e27bad7..eadd1655145a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -68,47 +68,33 @@
68 * internal functions 68 * internal functions
69 */ 69 */
70 70
71/* 71static struct workqueue_struct *rpcrdma_receive_wq;
72 * handle replies in tasklet context, using a single, global list
73 * rdma tasklet function -- just turn around and call the func
74 * for all replies on the list
75 */
76
77static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
78static LIST_HEAD(rpcrdma_tasklets_g);
79 72
80static void 73int
81rpcrdma_run_tasklet(unsigned long data) 74rpcrdma_alloc_wq(void)
82{ 75{
83 struct rpcrdma_rep *rep; 76 struct workqueue_struct *recv_wq;
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
93 77
94 rpcrdma_reply_handler(rep); 78 recv_wq = alloc_workqueue("xprtrdma_receive",
79 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
80 0);
81 if (!recv_wq)
82 return -ENOMEM;
95 83
96 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); 84 rpcrdma_receive_wq = recv_wq;
97 } 85 return 0;
98 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
99} 86}
100 87
101static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); 88void
102 89rpcrdma_destroy_wq(void)
103static void
104rpcrdma_schedule_tasklet(struct list_head *sched_list)
105{ 90{
106 unsigned long flags; 91 struct workqueue_struct *wq;
107 92
108 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); 93 if (rpcrdma_receive_wq) {
109 list_splice_tail(sched_list, &rpcrdma_tasklets_g); 94 wq = rpcrdma_receive_wq;
110 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); 95 rpcrdma_receive_wq = NULL;
111 tasklet_schedule(&rpcrdma_tasklet_g); 96 destroy_workqueue(wq);
97 }
112} 98}
113 99
114static void 100static void
@@ -158,63 +144,54 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
158 } 144 }
159} 145}
160 146
161static int 147/* The common case is a single send completion is waiting. By
162rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) 148 * passing two WC entries to ib_poll_cq, a return code of 1
149 * means there is exactly one WC waiting and no more. We don't
150 * have to invoke ib_poll_cq again to know that the CQ has been
151 * properly drained.
152 */
153static void
154rpcrdma_sendcq_poll(struct ib_cq *cq)
163{ 155{
164 struct ib_wc *wcs; 156 struct ib_wc *pos, wcs[2];
165 int budget, count, rc; 157 int count, rc;
166 158
167 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
168 do { 159 do {
169 wcs = ep->rep_send_wcs; 160 pos = wcs;
170 161
171 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); 162 rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
172 if (rc <= 0) 163 if (rc < 0)
173 return rc; 164 break;
174 165
175 count = rc; 166 count = rc;
176 while (count-- > 0) 167 while (count-- > 0)
177 rpcrdma_sendcq_process_wc(wcs++); 168 rpcrdma_sendcq_process_wc(pos++);
178 } while (rc == RPCRDMA_POLLSIZE && --budget); 169 } while (rc == ARRAY_SIZE(wcs));
179 return 0; 170 return;
180} 171}
181 172
182/* 173/* Handle provider send completion upcalls.
183 * Handle send, fast_reg_mr, and local_inv completions.
184 *
185 * Send events are typically suppressed and thus do not result
186 * in an upcall. Occasionally one is signaled, however. This
187 * prevents the provider's completion queue from wrapping and
188 * losing a completion.
189 */ 174 */
190static void 175static void
191rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) 176rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
192{ 177{
193 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; 178 do {
194 int rc; 179 rpcrdma_sendcq_poll(cq);
195 180 } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
196 rc = rpcrdma_sendcq_poll(cq, ep); 181 IB_CQ_REPORT_MISSED_EVENTS) > 0);
197 if (rc) { 182}
198 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
199 __func__, rc);
200 return;
201 }
202 183
203 rc = ib_req_notify_cq(cq, 184static void
204 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); 185rpcrdma_receive_worker(struct work_struct *work)
205 if (rc == 0) 186{
206 return; 187 struct rpcrdma_rep *rep =
207 if (rc < 0) { 188 container_of(work, struct rpcrdma_rep, rr_work);
208 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
209 __func__, rc);
210 return;
211 }
212 189
213 rpcrdma_sendcq_poll(cq, ep); 190 rpcrdma_reply_handler(rep);
214} 191}
215 192
216static void 193static void
217rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) 194rpcrdma_recvcq_process_wc(struct ib_wc *wc)
218{ 195{
219 struct rpcrdma_rep *rep = 196 struct rpcrdma_rep *rep =
220 (struct rpcrdma_rep *)(unsigned long)wc->wr_id; 197 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
@@ -237,91 +214,60 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
237 prefetch(rdmab_to_msg(rep->rr_rdmabuf)); 214 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
238 215
239out_schedule: 216out_schedule:
240 list_add_tail(&rep->rr_list, sched_list); 217 queue_work(rpcrdma_receive_wq, &rep->rr_work);
241 return; 218 return;
219
242out_fail: 220out_fail:
243 if (wc->status != IB_WC_WR_FLUSH_ERR) 221 if (wc->status != IB_WC_WR_FLUSH_ERR)
244 pr_err("RPC: %s: rep %p: %s\n", 222 pr_err("RPC: %s: rep %p: %s\n",
245 __func__, rep, ib_wc_status_msg(wc->status)); 223 __func__, rep, ib_wc_status_msg(wc->status));
246 rep->rr_len = ~0U; 224 rep->rr_len = RPCRDMA_BAD_LEN;
247 goto out_schedule; 225 goto out_schedule;
248} 226}
249 227
250static int 228/* The wc array is on stack: automatic memory is always CPU-local.
251rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) 229 *
230 * struct ib_wc is 64 bytes, making the poll array potentially
231 * large. But this is at the bottom of the call chain. Further
232 * substantial work is done in another thread.
233 */
234static void
235rpcrdma_recvcq_poll(struct ib_cq *cq)
252{ 236{
253 struct list_head sched_list; 237 struct ib_wc *pos, wcs[4];
254 struct ib_wc *wcs; 238 int count, rc;
255 int budget, count, rc;
256 239
257 INIT_LIST_HEAD(&sched_list);
258 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
259 do { 240 do {
260 wcs = ep->rep_recv_wcs; 241 pos = wcs;
261 242
262 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); 243 rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
263 if (rc <= 0) 244 if (rc < 0)
264 goto out_schedule; 245 break;
265 246
266 count = rc; 247 count = rc;
267 while (count-- > 0) 248 while (count-- > 0)
268 rpcrdma_recvcq_process_wc(wcs++, &sched_list); 249 rpcrdma_recvcq_process_wc(pos++);
269 } while (rc == RPCRDMA_POLLSIZE && --budget); 250 } while (rc == ARRAY_SIZE(wcs));
270 rc = 0;
271
272out_schedule:
273 rpcrdma_schedule_tasklet(&sched_list);
274 return rc;
275} 251}
276 252
277/* 253/* Handle provider receive completion upcalls.
278 * Handle receive completions.
279 *
280 * It is reentrant but processes single events in order to maintain
281 * ordering of receives to keep server credits.
282 *
283 * It is the responsibility of the scheduled tasklet to return
284 * recv buffers to the pool. NOTE: this affects synchronization of
285 * connection shutdown. That is, the structures required for
286 * the completion of the reply handler must remain intact until
287 * all memory has been reclaimed.
288 */ 254 */
289static void 255static void
290rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) 256rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
291{ 257{
292 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; 258 do {
293 int rc; 259 rpcrdma_recvcq_poll(cq);
294 260 } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
295 rc = rpcrdma_recvcq_poll(cq, ep); 261 IB_CQ_REPORT_MISSED_EVENTS) > 0);
296 if (rc) {
297 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
298 __func__, rc);
299 return;
300 }
301
302 rc = ib_req_notify_cq(cq,
303 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
304 if (rc == 0)
305 return;
306 if (rc < 0) {
307 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
308 __func__, rc);
309 return;
310 }
311
312 rpcrdma_recvcq_poll(cq, ep);
313} 262}
314 263
315static void 264static void
316rpcrdma_flush_cqs(struct rpcrdma_ep *ep) 265rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
317{ 266{
318 struct ib_wc wc; 267 struct ib_wc wc;
319 LIST_HEAD(sched_list);
320 268
321 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) 269 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
322 rpcrdma_recvcq_process_wc(&wc, &sched_list); 270 rpcrdma_recvcq_process_wc(&wc);
323 if (!list_empty(&sched_list))
324 rpcrdma_schedule_tasklet(&sched_list);
325 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) 271 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
326 rpcrdma_sendcq_process_wc(&wc); 272 rpcrdma_sendcq_process_wc(&wc);
327} 273}
@@ -432,7 +378,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
432 378
433 init_completion(&ia->ri_done); 379 init_completion(&ia->ri_done);
434 380
435 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); 381 id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
382 IB_QPT_RC);
436 if (IS_ERR(id)) { 383 if (IS_ERR(id)) {
437 rc = PTR_ERR(id); 384 rc = PTR_ERR(id);
438 dprintk("RPC: %s: rdma_create_id() failed %i\n", 385 dprintk("RPC: %s: rdma_create_id() failed %i\n",
@@ -543,11 +490,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
543 } 490 }
544 491
545 if (memreg == RPCRDMA_FRMR) { 492 if (memreg == RPCRDMA_FRMR) {
546 /* Requires both frmr reg and local dma lkey */ 493 if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
547 if (((devattr->device_cap_flags & 494 (devattr->max_fast_reg_page_list_len == 0)) {
548 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
549 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
550 (devattr->max_fast_reg_page_list_len == 0)) {
551 dprintk("RPC: %s: FRMR registration " 495 dprintk("RPC: %s: FRMR registration "
552 "not supported by HCA\n", __func__); 496 "not supported by HCA\n", __func__);
553 memreg = RPCRDMA_MTHCAFMR; 497 memreg = RPCRDMA_MTHCAFMR;
@@ -557,6 +501,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
557 if (!ia->ri_device->alloc_fmr) { 501 if (!ia->ri_device->alloc_fmr) {
558 dprintk("RPC: %s: MTHCAFMR registration " 502 dprintk("RPC: %s: MTHCAFMR registration "
559 "not supported by HCA\n", __func__); 503 "not supported by HCA\n", __func__);
504 rc = -EINVAL;
560 goto out3; 505 goto out3;
561 } 506 }
562 } 507 }
@@ -624,6 +569,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
624 struct ib_device_attr *devattr = &ia->ri_devattr; 569 struct ib_device_attr *devattr = &ia->ri_devattr;
625 struct ib_cq *sendcq, *recvcq; 570 struct ib_cq *sendcq, *recvcq;
626 struct ib_cq_init_attr cq_attr = {}; 571 struct ib_cq_init_attr cq_attr = {};
572 unsigned int max_qp_wr;
627 int rc, err; 573 int rc, err;
628 574
629 if (devattr->max_sge < RPCRDMA_MAX_IOVS) { 575 if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
@@ -632,18 +578,27 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
632 return -ENOMEM; 578 return -ENOMEM;
633 } 579 }
634 580
581 if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
582 dprintk("RPC: %s: insufficient wqe's available\n",
583 __func__);
584 return -ENOMEM;
585 }
586 max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;
587
635 /* check provider's send/recv wr limits */ 588 /* check provider's send/recv wr limits */
636 if (cdata->max_requests > devattr->max_qp_wr) 589 if (cdata->max_requests > max_qp_wr)
637 cdata->max_requests = devattr->max_qp_wr; 590 cdata->max_requests = max_qp_wr;
638 591
639 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; 592 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
640 ep->rep_attr.qp_context = ep; 593 ep->rep_attr.qp_context = ep;
641 ep->rep_attr.srq = NULL; 594 ep->rep_attr.srq = NULL;
642 ep->rep_attr.cap.max_send_wr = cdata->max_requests; 595 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
596 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
643 rc = ia->ri_ops->ro_open(ia, ep, cdata); 597 rc = ia->ri_ops->ro_open(ia, ep, cdata);
644 if (rc) 598 if (rc)
645 return rc; 599 return rc;
646 ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 600 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
601 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
647 ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; 602 ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
648 ep->rep_attr.cap.max_recv_sge = 1; 603 ep->rep_attr.cap.max_recv_sge = 1;
649 ep->rep_attr.cap.max_inline_data = 0; 604 ep->rep_attr.cap.max_inline_data = 0;
@@ -671,7 +626,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
671 626
672 cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; 627 cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
673 sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, 628 sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
674 rpcrdma_cq_async_error_upcall, ep, &cq_attr); 629 rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
675 if (IS_ERR(sendcq)) { 630 if (IS_ERR(sendcq)) {
676 rc = PTR_ERR(sendcq); 631 rc = PTR_ERR(sendcq);
677 dprintk("RPC: %s: failed to create send CQ: %i\n", 632 dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -688,7 +643,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
688 643
689 cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; 644 cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
690 recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall, 645 recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
691 rpcrdma_cq_async_error_upcall, ep, &cq_attr); 646 rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
692 if (IS_ERR(recvcq)) { 647 if (IS_ERR(recvcq)) {
693 rc = PTR_ERR(recvcq); 648 rc = PTR_ERR(recvcq);
694 dprintk("RPC: %s: failed to create recv CQ: %i\n", 649 dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -887,7 +842,21 @@ retry:
887 } 842 }
888 rc = ep->rep_connected; 843 rc = ep->rep_connected;
889 } else { 844 } else {
845 struct rpcrdma_xprt *r_xprt;
846 unsigned int extras;
847
890 dprintk("RPC: %s: connected\n", __func__); 848 dprintk("RPC: %s: connected\n", __func__);
849
850 r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
851 extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
852
853 if (extras) {
854 rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
855 if (rc)
856 pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
857 __func__, rc);
858 rc = 0;
859 }
891 } 860 }
892 861
893out: 862out:
@@ -924,20 +893,25 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
924 } 893 }
925} 894}
926 895
927static struct rpcrdma_req * 896struct rpcrdma_req *
928rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) 897rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
929{ 898{
899 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
930 struct rpcrdma_req *req; 900 struct rpcrdma_req *req;
931 901
932 req = kzalloc(sizeof(*req), GFP_KERNEL); 902 req = kzalloc(sizeof(*req), GFP_KERNEL);
933 if (req == NULL) 903 if (req == NULL)
934 return ERR_PTR(-ENOMEM); 904 return ERR_PTR(-ENOMEM);
935 905
906 INIT_LIST_HEAD(&req->rl_free);
907 spin_lock(&buffer->rb_reqslock);
908 list_add(&req->rl_all, &buffer->rb_allreqs);
909 spin_unlock(&buffer->rb_reqslock);
936 req->rl_buffer = &r_xprt->rx_buf; 910 req->rl_buffer = &r_xprt->rx_buf;
937 return req; 911 return req;
938} 912}
939 913
940static struct rpcrdma_rep * 914struct rpcrdma_rep *
941rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) 915rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
942{ 916{
943 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 917 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
@@ -959,6 +933,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
959 933
960 rep->rr_device = ia->ri_device; 934 rep->rr_device = ia->ri_device;
961 rep->rr_rxprt = r_xprt; 935 rep->rr_rxprt = r_xprt;
936 INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
962 return rep; 937 return rep;
963 938
964out_free: 939out_free:
@@ -972,44 +947,21 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
972{ 947{
973 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 948 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
974 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 949 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
975 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
976 char *p;
977 size_t len;
978 int i, rc; 950 int i, rc;
979 951
980 buf->rb_max_requests = cdata->max_requests; 952 buf->rb_max_requests = r_xprt->rx_data.max_requests;
953 buf->rb_bc_srv_max_requests = 0;
981 spin_lock_init(&buf->rb_lock); 954 spin_lock_init(&buf->rb_lock);
982 955
983 /* Need to allocate:
984 * 1. arrays for send and recv pointers
985 * 2. arrays of struct rpcrdma_req to fill in pointers
986 * 3. array of struct rpcrdma_rep for replies
987 * Send/recv buffers in req/rep need to be registered
988 */
989 len = buf->rb_max_requests *
990 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
991
992 p = kzalloc(len, GFP_KERNEL);
993 if (p == NULL) {
994 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
995 __func__, len);
996 rc = -ENOMEM;
997 goto out;
998 }
999 buf->rb_pool = p; /* for freeing it later */
1000
1001 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1002 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1003 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1004 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1005
1006 rc = ia->ri_ops->ro_init(r_xprt); 956 rc = ia->ri_ops->ro_init(r_xprt);
1007 if (rc) 957 if (rc)
1008 goto out; 958 goto out;
1009 959
960 INIT_LIST_HEAD(&buf->rb_send_bufs);
961 INIT_LIST_HEAD(&buf->rb_allreqs);
962 spin_lock_init(&buf->rb_reqslock);
1010 for (i = 0; i < buf->rb_max_requests; i++) { 963 for (i = 0; i < buf->rb_max_requests; i++) {
1011 struct rpcrdma_req *req; 964 struct rpcrdma_req *req;
1012 struct rpcrdma_rep *rep;
1013 965
1014 req = rpcrdma_create_req(r_xprt); 966 req = rpcrdma_create_req(r_xprt);
1015 if (IS_ERR(req)) { 967 if (IS_ERR(req)) {
@@ -1018,7 +970,13 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1018 rc = PTR_ERR(req); 970 rc = PTR_ERR(req);
1019 goto out; 971 goto out;
1020 } 972 }
1021 buf->rb_send_bufs[i] = req; 973 req->rl_backchannel = false;
974 list_add(&req->rl_free, &buf->rb_send_bufs);
975 }
976
977 INIT_LIST_HEAD(&buf->rb_recv_bufs);
978 for (i = 0; i < buf->rb_max_requests + 2; i++) {
979 struct rpcrdma_rep *rep;
1022 980
1023 rep = rpcrdma_create_rep(r_xprt); 981 rep = rpcrdma_create_rep(r_xprt);
1024 if (IS_ERR(rep)) { 982 if (IS_ERR(rep)) {
@@ -1027,7 +985,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
1027 rc = PTR_ERR(rep); 985 rc = PTR_ERR(rep);
1028 goto out; 986 goto out;
1029 } 987 }
1030 buf->rb_recv_bufs[i] = rep; 988 list_add(&rep->rr_list, &buf->rb_recv_bufs);
1031 } 989 }
1032 990
1033 return 0; 991 return 0;
@@ -1036,22 +994,38 @@ out:
1036 return rc; 994 return rc;
1037} 995}
1038 996
997static struct rpcrdma_req *
998rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
999{
1000 struct rpcrdma_req *req;
1001
1002 req = list_first_entry(&buf->rb_send_bufs,
1003 struct rpcrdma_req, rl_free);
1004 list_del(&req->rl_free);
1005 return req;
1006}
1007
1008static struct rpcrdma_rep *
1009rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
1010{
1011 struct rpcrdma_rep *rep;
1012
1013 rep = list_first_entry(&buf->rb_recv_bufs,
1014 struct rpcrdma_rep, rr_list);
1015 list_del(&rep->rr_list);
1016 return rep;
1017}
1018
1039static void 1019static void
1040rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) 1020rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1041{ 1021{
1042 if (!rep)
1043 return;
1044
1045 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); 1022 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
1046 kfree(rep); 1023 kfree(rep);
1047} 1024}
1048 1025
1049static void 1026void
1050rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) 1027rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1051{ 1028{
1052 if (!req)
1053 return;
1054
1055 rpcrdma_free_regbuf(ia, req->rl_sendbuf); 1029 rpcrdma_free_regbuf(ia, req->rl_sendbuf);
1056 rpcrdma_free_regbuf(ia, req->rl_rdmabuf); 1030 rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
1057 kfree(req); 1031 kfree(req);
@@ -1061,25 +1035,29 @@ void
1061rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1035rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1062{ 1036{
1063 struct rpcrdma_ia *ia = rdmab_to_ia(buf); 1037 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1064 int i;
1065 1038
1066 /* clean up in reverse order from create 1039 while (!list_empty(&buf->rb_recv_bufs)) {
1067 * 1. recv mr memory (mr free, then kfree) 1040 struct rpcrdma_rep *rep;
1068 * 2. send mr memory (mr free, then kfree)
1069 * 3. MWs
1070 */
1071 dprintk("RPC: %s: entering\n", __func__);
1072 1041
1073 for (i = 0; i < buf->rb_max_requests; i++) { 1042 rep = rpcrdma_buffer_get_rep_locked(buf);
1074 if (buf->rb_recv_bufs) 1043 rpcrdma_destroy_rep(ia, rep);
1075 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1076 if (buf->rb_send_bufs)
1077 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
1078 } 1044 }
1079 1045
1080 ia->ri_ops->ro_destroy(buf); 1046 spin_lock(&buf->rb_reqslock);
1047 while (!list_empty(&buf->rb_allreqs)) {
1048 struct rpcrdma_req *req;
1049
1050 req = list_first_entry(&buf->rb_allreqs,
1051 struct rpcrdma_req, rl_all);
1052 list_del(&req->rl_all);
1053
1054 spin_unlock(&buf->rb_reqslock);
1055 rpcrdma_destroy_req(ia, req);
1056 spin_lock(&buf->rb_reqslock);
1057 }
1058 spin_unlock(&buf->rb_reqslock);
1081 1059
1082 kfree(buf->rb_pool); 1060 ia->ri_ops->ro_destroy(buf);
1083} 1061}
1084 1062
1085struct rpcrdma_mw * 1063struct rpcrdma_mw *
@@ -1111,53 +1089,34 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
1111 spin_unlock(&buf->rb_mwlock); 1089 spin_unlock(&buf->rb_mwlock);
1112} 1090}
1113 1091
1114static void
1115rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1116{
1117 buf->rb_send_bufs[--buf->rb_send_index] = req;
1118 req->rl_niovs = 0;
1119 if (req->rl_reply) {
1120 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1121 req->rl_reply = NULL;
1122 }
1123}
1124
1125/* 1092/*
1126 * Get a set of request/reply buffers. 1093 * Get a set of request/reply buffers.
1127 * 1094 *
1128 * Reply buffer (if needed) is attached to send buffer upon return. 1095 * Reply buffer (if available) is attached to send buffer upon return.
1129 * Rule:
1130 * rb_send_index and rb_recv_index MUST always be pointing to the
1131 * *next* available buffer (non-NULL). They are incremented after
1132 * removing buffers, and decremented *before* returning them.
1133 */ 1096 */
1134struct rpcrdma_req * 1097struct rpcrdma_req *
1135rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1098rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1136{ 1099{
1137 struct rpcrdma_req *req; 1100 struct rpcrdma_req *req;
1138 unsigned long flags;
1139
1140 spin_lock_irqsave(&buffers->rb_lock, flags);
1141 1101
1142 if (buffers->rb_send_index == buffers->rb_max_requests) { 1102 spin_lock(&buffers->rb_lock);
1143 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1103 if (list_empty(&buffers->rb_send_bufs))
1144 dprintk("RPC: %s: out of request buffers\n", __func__); 1104 goto out_reqbuf;
1145 return ((struct rpcrdma_req *)NULL); 1105 req = rpcrdma_buffer_get_req_locked(buffers);
1146 } 1106 if (list_empty(&buffers->rb_recv_bufs))
1147 1107 goto out_repbuf;
1148 req = buffers->rb_send_bufs[buffers->rb_send_index]; 1108 req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
1149 if (buffers->rb_send_index < buffers->rb_recv_index) { 1109 spin_unlock(&buffers->rb_lock);
1150 dprintk("RPC: %s: %d extra receives outstanding (ok)\n", 1110 return req;
1151 __func__,
1152 buffers->rb_recv_index - buffers->rb_send_index);
1153 req->rl_reply = NULL;
1154 } else {
1155 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1156 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1157 }
1158 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1159 1111
1160 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1112out_reqbuf:
1113 spin_unlock(&buffers->rb_lock);
1114 pr_warn("RPC: %s: out of request buffers\n", __func__);
1115 return NULL;
1116out_repbuf:
1117 spin_unlock(&buffers->rb_lock);
1118 pr_warn("RPC: %s: out of reply buffers\n", __func__);
1119 req->rl_reply = NULL;
1161 return req; 1120 return req;
1162} 1121}
1163 1122
@@ -1169,30 +1128,31 @@ void
1169rpcrdma_buffer_put(struct rpcrdma_req *req) 1128rpcrdma_buffer_put(struct rpcrdma_req *req)
1170{ 1129{
1171 struct rpcrdma_buffer *buffers = req->rl_buffer; 1130 struct rpcrdma_buffer *buffers = req->rl_buffer;
1172 unsigned long flags; 1131 struct rpcrdma_rep *rep = req->rl_reply;
1173 1132
1174 spin_lock_irqsave(&buffers->rb_lock, flags); 1133 req->rl_niovs = 0;
1175 rpcrdma_buffer_put_sendbuf(req, buffers); 1134 req->rl_reply = NULL;
1176 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1135
1136 spin_lock(&buffers->rb_lock);
1137 list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
1138 if (rep)
1139 list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
1140 spin_unlock(&buffers->rb_lock);
1177} 1141}
1178 1142
1179/* 1143/*
1180 * Recover reply buffers from pool. 1144 * Recover reply buffers from pool.
1181 * This happens when recovering from error conditions. 1145 * This happens when recovering from disconnect.
1182 * Post-increment counter/array index.
1183 */ 1146 */
1184void 1147void
1185rpcrdma_recv_buffer_get(struct rpcrdma_req *req) 1148rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1186{ 1149{
1187 struct rpcrdma_buffer *buffers = req->rl_buffer; 1150 struct rpcrdma_buffer *buffers = req->rl_buffer;
1188 unsigned long flags;
1189 1151
1190 spin_lock_irqsave(&buffers->rb_lock, flags); 1152 spin_lock(&buffers->rb_lock);
1191 if (buffers->rb_recv_index < buffers->rb_max_requests) { 1153 if (!list_empty(&buffers->rb_recv_bufs))
1192 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; 1154 req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
1193 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; 1155 spin_unlock(&buffers->rb_lock);
1194 }
1195 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1196} 1156}
1197 1157
1198/* 1158/*
@@ -1203,11 +1163,10 @@ void
1203rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) 1163rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1204{ 1164{
1205 struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; 1165 struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
1206 unsigned long flags;
1207 1166
1208 spin_lock_irqsave(&buffers->rb_lock, flags); 1167 spin_lock(&buffers->rb_lock);
1209 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; 1168 list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
1210 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1169 spin_unlock(&buffers->rb_lock);
1211} 1170}
1212 1171
1213/* 1172/*
@@ -1364,6 +1323,47 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1364 return rc; 1323 return rc;
1365} 1324}
1366 1325
1326/**
1327 * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
1328 * @r_xprt: transport associated with these backchannel resources
1329 * @min_reqs: minimum number of incoming requests expected
1330 *
1331 * Returns zero if all requested buffers were posted, or a negative errno.
1332 */
1333int
1334rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
1335{
1336 struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
1337 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1338 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
1339 struct rpcrdma_rep *rep;
1340 unsigned long flags;
1341 int rc;
1342
1343 while (count--) {
1344 spin_lock_irqsave(&buffers->rb_lock, flags);
1345 if (list_empty(&buffers->rb_recv_bufs))
1346 goto out_reqbuf;
1347 rep = rpcrdma_buffer_get_rep_locked(buffers);
1348 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1349
1350 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1351 if (rc)
1352 goto out_rc;
1353 }
1354
1355 return 0;
1356
1357out_reqbuf:
1358 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1359 pr_warn("%s: no extra receive buffers\n", __func__);
1360 return -ENOMEM;
1361
1362out_rc:
1363 rpcrdma_recv_buffer_put(rep);
1364 return rc;
1365}
1366
1367/* How many chunk list items fit within our inline buffers? 1367/* How many chunk list items fit within our inline buffers?
1368 */ 1368 */
1369unsigned int 1369unsigned int
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c09414e6f91b..ac7f8d4f632a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -77,9 +77,6 @@ struct rpcrdma_ia {
77 * RDMA Endpoint -- one per transport instance 77 * RDMA Endpoint -- one per transport instance
78 */ 78 */
79 79
80#define RPCRDMA_WC_BUDGET (128)
81#define RPCRDMA_POLLSIZE (16)
82
83struct rpcrdma_ep { 80struct rpcrdma_ep {
84 atomic_t rep_cqcount; 81 atomic_t rep_cqcount;
85 int rep_cqinit; 82 int rep_cqinit;
@@ -89,8 +86,6 @@ struct rpcrdma_ep {
89 struct rdma_conn_param rep_remote_cma; 86 struct rdma_conn_param rep_remote_cma;
90 struct sockaddr_storage rep_remote_addr; 87 struct sockaddr_storage rep_remote_addr;
91 struct delayed_work rep_connect_worker; 88 struct delayed_work rep_connect_worker;
92 struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE];
93 struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
94}; 89};
95 90
96/* 91/*
@@ -106,6 +101,16 @@ struct rpcrdma_ep {
106 */ 101 */
107#define RPCRDMA_IGNORE_COMPLETION (0ULL) 102#define RPCRDMA_IGNORE_COMPLETION (0ULL)
108 103
104/* Pre-allocate extra Work Requests for handling backward receives
105 * and sends. This is a fixed value because the Work Queues are
106 * allocated when the forward channel is set up.
107 */
108#if defined(CONFIG_SUNRPC_BACKCHANNEL)
109#define RPCRDMA_BACKWARD_WRS (8)
110#else
111#define RPCRDMA_BACKWARD_WRS (0)
112#endif
113
109/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV 114/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
110 * 115 *
111 * The below structure appears at the front of a large region of kmalloc'd 116 * The below structure appears at the front of a large region of kmalloc'd
@@ -169,10 +174,13 @@ struct rpcrdma_rep {
169 unsigned int rr_len; 174 unsigned int rr_len;
170 struct ib_device *rr_device; 175 struct ib_device *rr_device;
171 struct rpcrdma_xprt *rr_rxprt; 176 struct rpcrdma_xprt *rr_rxprt;
177 struct work_struct rr_work;
172 struct list_head rr_list; 178 struct list_head rr_list;
173 struct rpcrdma_regbuf *rr_rdmabuf; 179 struct rpcrdma_regbuf *rr_rdmabuf;
174}; 180};
175 181
182#define RPCRDMA_BAD_LEN (~0U)
183
176/* 184/*
177 * struct rpcrdma_mw - external memory region metadata 185 * struct rpcrdma_mw - external memory region metadata
178 * 186 *
@@ -193,7 +201,8 @@ enum rpcrdma_frmr_state {
193}; 201};
194 202
195struct rpcrdma_frmr { 203struct rpcrdma_frmr {
196 struct ib_fast_reg_page_list *fr_pgl; 204 struct scatterlist *sg;
205 int sg_nents;
197 struct ib_mr *fr_mr; 206 struct ib_mr *fr_mr;
198 enum rpcrdma_frmr_state fr_state; 207 enum rpcrdma_frmr_state fr_state;
199 struct work_struct fr_work; 208 struct work_struct fr_work;
@@ -255,6 +264,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
255#define RPCRDMA_MAX_IOVS (2) 264#define RPCRDMA_MAX_IOVS (2)
256 265
257struct rpcrdma_req { 266struct rpcrdma_req {
267 struct list_head rl_free;
258 unsigned int rl_niovs; 268 unsigned int rl_niovs;
259 unsigned int rl_nchunks; 269 unsigned int rl_nchunks;
260 unsigned int rl_connect_cookie; 270 unsigned int rl_connect_cookie;
@@ -264,6 +274,9 @@ struct rpcrdma_req {
264 struct rpcrdma_regbuf *rl_rdmabuf; 274 struct rpcrdma_regbuf *rl_rdmabuf;
265 struct rpcrdma_regbuf *rl_sendbuf; 275 struct rpcrdma_regbuf *rl_sendbuf;
266 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; 276 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
277
278 struct list_head rl_all;
279 bool rl_backchannel;
267}; 280};
268 281
269static inline struct rpcrdma_req * 282static inline struct rpcrdma_req *
@@ -288,12 +301,14 @@ struct rpcrdma_buffer {
288 struct list_head rb_all; 301 struct list_head rb_all;
289 char *rb_pool; 302 char *rb_pool;
290 303
291 spinlock_t rb_lock; /* protect buf arrays */ 304 spinlock_t rb_lock; /* protect buf lists */
305 struct list_head rb_send_bufs;
306 struct list_head rb_recv_bufs;
292 u32 rb_max_requests; 307 u32 rb_max_requests;
293 int rb_send_index; 308
294 int rb_recv_index; 309 u32 rb_bc_srv_max_requests;
295 struct rpcrdma_req **rb_send_bufs; 310 spinlock_t rb_reqslock; /* protect rb_allreqs */
296 struct rpcrdma_rep **rb_recv_bufs; 311 struct list_head rb_allreqs;
297}; 312};
298#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) 313#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
299 314
@@ -339,6 +354,7 @@ struct rpcrdma_stats {
339 unsigned long failed_marshal_count; 354 unsigned long failed_marshal_count;
340 unsigned long bad_reply_count; 355 unsigned long bad_reply_count;
341 unsigned long nomsg_call_count; 356 unsigned long nomsg_call_count;
357 unsigned long bcall_count;
342}; 358};
343 359
344/* 360/*
@@ -414,6 +430,9 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
414/* 430/*
415 * Buffer calls - xprtrdma/verbs.c 431 * Buffer calls - xprtrdma/verbs.c
416 */ 432 */
433struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
434struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
435void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *);
417int rpcrdma_buffer_create(struct rpcrdma_xprt *); 436int rpcrdma_buffer_create(struct rpcrdma_xprt *);
418void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 437void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
419 438
@@ -430,10 +449,14 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
430 struct rpcrdma_regbuf *); 449 struct rpcrdma_regbuf *);
431 450
432unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); 451unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
452int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
433 453
434int frwr_alloc_recovery_wq(void); 454int frwr_alloc_recovery_wq(void);
435void frwr_destroy_recovery_wq(void); 455void frwr_destroy_recovery_wq(void);
436 456
457int rpcrdma_alloc_wq(void);
458void rpcrdma_destroy_wq(void);
459
437/* 460/*
438 * Wrappers for chunk registration, shared by read/write chunk code. 461 * Wrappers for chunk registration, shared by read/write chunk code.
439 */ 462 */
@@ -494,6 +517,18 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
494int xprt_rdma_init(void); 517int xprt_rdma_init(void);
495void xprt_rdma_cleanup(void); 518void xprt_rdma_cleanup(void);
496 519
520/* Backchannel calls - xprtrdma/backchannel.c
521 */
522#if defined(CONFIG_SUNRPC_BACKCHANNEL)
523int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
524int xprt_rdma_bc_up(struct svc_serv *, struct net *);
525int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
526void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
527int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
528void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
529void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
530#endif /* CONFIG_SUNRPC_BACKCHANNEL */
531
497/* Temporary NFS request map cache. Created in svc_rdma.c */ 532/* Temporary NFS request map cache. Created in svc_rdma.c */
498extern struct kmem_cache *svc_rdma_map_cachep; 533extern struct kmem_cache *svc_rdma_map_cachep;
499/* WR context cache. Created in svc_rdma.c */ 534/* WR context cache. Created in svc_rdma.c */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1a85e0ed0b48..1d1a70498910 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -360,8 +360,10 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
360 int flags = XS_SENDMSG_FLAGS; 360 int flags = XS_SENDMSG_FLAGS;
361 361
362 remainder -= len; 362 remainder -= len;
363 if (remainder != 0 || more) 363 if (more)
364 flags |= MSG_MORE; 364 flags |= MSG_MORE;
365 if (remainder != 0)
366 flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
365 err = do_sendpage(sock, *ppage, base, len, flags); 367 err = do_sendpage(sock, *ppage, base, len, flags);
366 if (remainder == 0 || err != len) 368 if (remainder == 0 || err != len)
367 break; 369 break;
@@ -823,6 +825,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
823 825
824 kernel_sock_shutdown(sock, SHUT_RDWR); 826 kernel_sock_shutdown(sock, SHUT_RDWR);
825 827
828 mutex_lock(&transport->recv_mutex);
826 write_lock_bh(&sk->sk_callback_lock); 829 write_lock_bh(&sk->sk_callback_lock);
827 transport->inet = NULL; 830 transport->inet = NULL;
828 transport->sock = NULL; 831 transport->sock = NULL;
@@ -833,6 +836,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
833 xprt_clear_connected(xprt); 836 xprt_clear_connected(xprt);
834 write_unlock_bh(&sk->sk_callback_lock); 837 write_unlock_bh(&sk->sk_callback_lock);
835 xs_sock_reset_connection_flags(xprt); 838 xs_sock_reset_connection_flags(xprt);
839 mutex_unlock(&transport->recv_mutex);
836 840
837 trace_rpc_socket_close(xprt, sock); 841 trace_rpc_socket_close(xprt, sock);
838 sock_release(sock); 842 sock_release(sock);
@@ -886,6 +890,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
886 890
887 cancel_delayed_work_sync(&transport->connect_worker); 891 cancel_delayed_work_sync(&transport->connect_worker);
888 xs_close(xprt); 892 xs_close(xprt);
893 cancel_work_sync(&transport->recv_worker);
889 xs_xprt_free(xprt); 894 xs_xprt_free(xprt);
890 module_put(THIS_MODULE); 895 module_put(THIS_MODULE);
891} 896}
@@ -906,44 +911,36 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
906} 911}
907 912
908/** 913/**
909 * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets 914 * xs_local_data_read_skb
910 * @sk: socket with data to read 915 * @xprt: transport
916 * @sk: socket
917 * @skb: skbuff
911 * 918 *
912 * Currently this assumes we can read the whole reply in a single gulp. 919 * Currently this assumes we can read the whole reply in a single gulp.
913 */ 920 */
914static void xs_local_data_ready(struct sock *sk) 921static void xs_local_data_read_skb(struct rpc_xprt *xprt,
922 struct sock *sk,
923 struct sk_buff *skb)
915{ 924{
916 struct rpc_task *task; 925 struct rpc_task *task;
917 struct rpc_xprt *xprt;
918 struct rpc_rqst *rovr; 926 struct rpc_rqst *rovr;
919 struct sk_buff *skb; 927 int repsize, copied;
920 int err, repsize, copied;
921 u32 _xid; 928 u32 _xid;
922 __be32 *xp; 929 __be32 *xp;
923 930
924 read_lock_bh(&sk->sk_callback_lock);
925 dprintk("RPC: %s...\n", __func__);
926 xprt = xprt_from_sock(sk);
927 if (xprt == NULL)
928 goto out;
929
930 skb = skb_recv_datagram(sk, 0, 1, &err);
931 if (skb == NULL)
932 goto out;
933
934 repsize = skb->len - sizeof(rpc_fraghdr); 931 repsize = skb->len - sizeof(rpc_fraghdr);
935 if (repsize < 4) { 932 if (repsize < 4) {
936 dprintk("RPC: impossible RPC reply size %d\n", repsize); 933 dprintk("RPC: impossible RPC reply size %d\n", repsize);
937 goto dropit; 934 return;
938 } 935 }
939 936
940 /* Copy the XID from the skb... */ 937 /* Copy the XID from the skb... */
941 xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid); 938 xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
942 if (xp == NULL) 939 if (xp == NULL)
943 goto dropit; 940 return;
944 941
945 /* Look up and lock the request corresponding to the given XID */ 942 /* Look up and lock the request corresponding to the given XID */
946 spin_lock(&xprt->transport_lock); 943 spin_lock_bh(&xprt->transport_lock);
947 rovr = xprt_lookup_rqst(xprt, *xp); 944 rovr = xprt_lookup_rqst(xprt, *xp);
948 if (!rovr) 945 if (!rovr)
949 goto out_unlock; 946 goto out_unlock;
@@ -961,50 +958,68 @@ static void xs_local_data_ready(struct sock *sk)
961 xprt_complete_rqst(task, copied); 958 xprt_complete_rqst(task, copied);
962 959
963 out_unlock: 960 out_unlock:
964 spin_unlock(&xprt->transport_lock); 961 spin_unlock_bh(&xprt->transport_lock);
965 dropit: 962}
966 skb_free_datagram(sk, skb); 963
967 out: 964static void xs_local_data_receive(struct sock_xprt *transport)
968 read_unlock_bh(&sk->sk_callback_lock); 965{
966 struct sk_buff *skb;
967 struct sock *sk;
968 int err;
969
970 mutex_lock(&transport->recv_mutex);
971 sk = transport->inet;
972 if (sk == NULL)
973 goto out;
974 for (;;) {
975 skb = skb_recv_datagram(sk, 0, 1, &err);
976 if (skb == NULL)
977 break;
978 xs_local_data_read_skb(&transport->xprt, sk, skb);
979 skb_free_datagram(sk, skb);
980 }
981out:
982 mutex_unlock(&transport->recv_mutex);
983}
984
985static void xs_local_data_receive_workfn(struct work_struct *work)
986{
987 struct sock_xprt *transport =
988 container_of(work, struct sock_xprt, recv_worker);
989 xs_local_data_receive(transport);
969} 990}
970 991
971/** 992/**
972 * xs_udp_data_ready - "data ready" callback for UDP sockets 993 * xs_udp_data_read_skb - receive callback for UDP sockets
973 * @sk: socket with data to read 994 * @xprt: transport
995 * @sk: socket
996 * @skb: skbuff
974 * 997 *
975 */ 998 */
976static void xs_udp_data_ready(struct sock *sk) 999static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
1000 struct sock *sk,
1001 struct sk_buff *skb)
977{ 1002{
978 struct rpc_task *task; 1003 struct rpc_task *task;
979 struct rpc_xprt *xprt;
980 struct rpc_rqst *rovr; 1004 struct rpc_rqst *rovr;
981 struct sk_buff *skb; 1005 int repsize, copied;
982 int err, repsize, copied;
983 u32 _xid; 1006 u32 _xid;
984 __be32 *xp; 1007 __be32 *xp;
985 1008
986 read_lock_bh(&sk->sk_callback_lock);
987 dprintk("RPC: xs_udp_data_ready...\n");
988 if (!(xprt = xprt_from_sock(sk)))
989 goto out;
990
991 if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
992 goto out;
993
994 repsize = skb->len - sizeof(struct udphdr); 1009 repsize = skb->len - sizeof(struct udphdr);
995 if (repsize < 4) { 1010 if (repsize < 4) {
996 dprintk("RPC: impossible RPC reply size %d!\n", repsize); 1011 dprintk("RPC: impossible RPC reply size %d!\n", repsize);
997 goto dropit; 1012 return;
998 } 1013 }
999 1014
1000 /* Copy the XID from the skb... */ 1015 /* Copy the XID from the skb... */
1001 xp = skb_header_pointer(skb, sizeof(struct udphdr), 1016 xp = skb_header_pointer(skb, sizeof(struct udphdr),
1002 sizeof(_xid), &_xid); 1017 sizeof(_xid), &_xid);
1003 if (xp == NULL) 1018 if (xp == NULL)
1004 goto dropit; 1019 return;
1005 1020
1006 /* Look up and lock the request corresponding to the given XID */ 1021 /* Look up and lock the request corresponding to the given XID */
1007 spin_lock(&xprt->transport_lock); 1022 spin_lock_bh(&xprt->transport_lock);
1008 rovr = xprt_lookup_rqst(xprt, *xp); 1023 rovr = xprt_lookup_rqst(xprt, *xp);
1009 if (!rovr) 1024 if (!rovr)
1010 goto out_unlock; 1025 goto out_unlock;
@@ -1025,10 +1040,54 @@ static void xs_udp_data_ready(struct sock *sk)
1025 xprt_complete_rqst(task, copied); 1040 xprt_complete_rqst(task, copied);
1026 1041
1027 out_unlock: 1042 out_unlock:
1028 spin_unlock(&xprt->transport_lock); 1043 spin_unlock_bh(&xprt->transport_lock);
1029 dropit: 1044}
1030 skb_free_datagram(sk, skb); 1045
1031 out: 1046static void xs_udp_data_receive(struct sock_xprt *transport)
1047{
1048 struct sk_buff *skb;
1049 struct sock *sk;
1050 int err;
1051
1052 mutex_lock(&transport->recv_mutex);
1053 sk = transport->inet;
1054 if (sk == NULL)
1055 goto out;
1056 for (;;) {
1057 skb = skb_recv_datagram(sk, 0, 1, &err);
1058 if (skb == NULL)
1059 break;
1060 xs_udp_data_read_skb(&transport->xprt, sk, skb);
1061 skb_free_datagram(sk, skb);
1062 }
1063out:
1064 mutex_unlock(&transport->recv_mutex);
1065}
1066
1067static void xs_udp_data_receive_workfn(struct work_struct *work)
1068{
1069 struct sock_xprt *transport =
1070 container_of(work, struct sock_xprt, recv_worker);
1071 xs_udp_data_receive(transport);
1072}
1073
1074/**
1075 * xs_data_ready - "data ready" callback for UDP sockets
1076 * @sk: socket with data to read
1077 *
1078 */
1079static void xs_data_ready(struct sock *sk)
1080{
1081 struct rpc_xprt *xprt;
1082
1083 read_lock_bh(&sk->sk_callback_lock);
1084 dprintk("RPC: xs_data_ready...\n");
1085 xprt = xprt_from_sock(sk);
1086 if (xprt != NULL) {
1087 struct sock_xprt *transport = container_of(xprt,
1088 struct sock_xprt, xprt);
1089 queue_work(rpciod_workqueue, &transport->recv_worker);
1090 }
1032 read_unlock_bh(&sk->sk_callback_lock); 1091 read_unlock_bh(&sk->sk_callback_lock);
1033} 1092}
1034 1093
@@ -1243,12 +1302,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
1243 dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); 1302 dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid));
1244 1303
1245 /* Find and lock the request corresponding to this xid */ 1304 /* Find and lock the request corresponding to this xid */
1246 spin_lock(&xprt->transport_lock); 1305 spin_lock_bh(&xprt->transport_lock);
1247 req = xprt_lookup_rqst(xprt, transport->tcp_xid); 1306 req = xprt_lookup_rqst(xprt, transport->tcp_xid);
1248 if (!req) { 1307 if (!req) {
1249 dprintk("RPC: XID %08x request not found!\n", 1308 dprintk("RPC: XID %08x request not found!\n",
1250 ntohl(transport->tcp_xid)); 1309 ntohl(transport->tcp_xid));
1251 spin_unlock(&xprt->transport_lock); 1310 spin_unlock_bh(&xprt->transport_lock);
1252 return -1; 1311 return -1;
1253 } 1312 }
1254 1313
@@ -1257,7 +1316,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
1257 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1316 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
1258 xprt_complete_rqst(req->rq_task, transport->tcp_copied); 1317 xprt_complete_rqst(req->rq_task, transport->tcp_copied);
1259 1318
1260 spin_unlock(&xprt->transport_lock); 1319 spin_unlock_bh(&xprt->transport_lock);
1261 return 0; 1320 return 0;
1262} 1321}
1263 1322
@@ -1277,10 +1336,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
1277 struct rpc_rqst *req; 1336 struct rpc_rqst *req;
1278 1337
1279 /* Look up and lock the request corresponding to the given XID */ 1338 /* Look up and lock the request corresponding to the given XID */
1280 spin_lock(&xprt->transport_lock); 1339 spin_lock_bh(&xprt->transport_lock);
1281 req = xprt_lookup_bc_request(xprt, transport->tcp_xid); 1340 req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
1282 if (req == NULL) { 1341 if (req == NULL) {
1283 spin_unlock(&xprt->transport_lock); 1342 spin_unlock_bh(&xprt->transport_lock);
1284 printk(KERN_WARNING "Callback slot table overflowed\n"); 1343 printk(KERN_WARNING "Callback slot table overflowed\n");
1285 xprt_force_disconnect(xprt); 1344 xprt_force_disconnect(xprt);
1286 return -1; 1345 return -1;
@@ -1291,7 +1350,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
1291 1350
1292 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1351 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
1293 xprt_complete_bc_request(req, transport->tcp_copied); 1352 xprt_complete_bc_request(req, transport->tcp_copied);
1294 spin_unlock(&xprt->transport_lock); 1353 spin_unlock_bh(&xprt->transport_lock);
1295 1354
1296 return 0; 1355 return 0;
1297} 1356}
@@ -1306,6 +1365,17 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
1306 xs_tcp_read_reply(xprt, desc) : 1365 xs_tcp_read_reply(xprt, desc) :
1307 xs_tcp_read_callback(xprt, desc); 1366 xs_tcp_read_callback(xprt, desc);
1308} 1367}
1368
1369static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
1370{
1371 int ret;
1372
1373 ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
1374 SVC_SOCK_ANONYMOUS);
1375 if (ret < 0)
1376 return ret;
1377 return 0;
1378}
1309#else 1379#else
1310static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, 1380static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
1311 struct xdr_skb_reader *desc) 1381 struct xdr_skb_reader *desc)
@@ -1391,6 +1461,44 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
1391 return len - desc.count; 1461 return len - desc.count;
1392} 1462}
1393 1463
1464static void xs_tcp_data_receive(struct sock_xprt *transport)
1465{
1466 struct rpc_xprt *xprt = &transport->xprt;
1467 struct sock *sk;
1468 read_descriptor_t rd_desc = {
1469 .count = 2*1024*1024,
1470 .arg.data = xprt,
1471 };
1472 unsigned long total = 0;
1473 int read = 0;
1474
1475 mutex_lock(&transport->recv_mutex);
1476 sk = transport->inet;
1477 if (sk == NULL)
1478 goto out;
1479
1480 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
1481 for (;;) {
1482 lock_sock(sk);
1483 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
1484 release_sock(sk);
1485 if (read <= 0)
1486 break;
1487 total += read;
1488 rd_desc.count = 65536;
1489 }
1490out:
1491 mutex_unlock(&transport->recv_mutex);
1492 trace_xs_tcp_data_ready(xprt, read, total);
1493}
1494
1495static void xs_tcp_data_receive_workfn(struct work_struct *work)
1496{
1497 struct sock_xprt *transport =
1498 container_of(work, struct sock_xprt, recv_worker);
1499 xs_tcp_data_receive(transport);
1500}
1501
1394/** 1502/**
1395 * xs_tcp_data_ready - "data ready" callback for TCP sockets 1503 * xs_tcp_data_ready - "data ready" callback for TCP sockets
1396 * @sk: socket with data to read 1504 * @sk: socket with data to read
@@ -1398,34 +1506,24 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
1398 */ 1506 */
1399static void xs_tcp_data_ready(struct sock *sk) 1507static void xs_tcp_data_ready(struct sock *sk)
1400{ 1508{
1509 struct sock_xprt *transport;
1401 struct rpc_xprt *xprt; 1510 struct rpc_xprt *xprt;
1402 read_descriptor_t rd_desc;
1403 int read;
1404 unsigned long total = 0;
1405 1511
1406 dprintk("RPC: xs_tcp_data_ready...\n"); 1512 dprintk("RPC: xs_tcp_data_ready...\n");
1407 1513
1408 read_lock_bh(&sk->sk_callback_lock); 1514 read_lock_bh(&sk->sk_callback_lock);
1409 if (!(xprt = xprt_from_sock(sk))) { 1515 if (!(xprt = xprt_from_sock(sk)))
1410 read = 0;
1411 goto out; 1516 goto out;
1412 } 1517 transport = container_of(xprt, struct sock_xprt, xprt);
1518
1413 /* Any data means we had a useful conversation, so 1519 /* Any data means we had a useful conversation, so
1414 * the we don't need to delay the next reconnect 1520 * the we don't need to delay the next reconnect
1415 */ 1521 */
1416 if (xprt->reestablish_timeout) 1522 if (xprt->reestablish_timeout)
1417 xprt->reestablish_timeout = 0; 1523 xprt->reestablish_timeout = 0;
1524 queue_work(rpciod_workqueue, &transport->recv_worker);
1418 1525
1419 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
1420 rd_desc.arg.data = xprt;
1421 do {
1422 rd_desc.count = 65536;
1423 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
1424 if (read > 0)
1425 total += read;
1426 } while (read > 0);
1427out: 1526out:
1428 trace_xs_tcp_data_ready(xprt, read, total);
1429 read_unlock_bh(&sk->sk_callback_lock); 1527 read_unlock_bh(&sk->sk_callback_lock);
1430} 1528}
1431 1529
@@ -1873,7 +1971,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
1873 xs_save_old_callbacks(transport, sk); 1971 xs_save_old_callbacks(transport, sk);
1874 1972
1875 sk->sk_user_data = xprt; 1973 sk->sk_user_data = xprt;
1876 sk->sk_data_ready = xs_local_data_ready; 1974 sk->sk_data_ready = xs_data_ready;
1877 sk->sk_write_space = xs_udp_write_space; 1975 sk->sk_write_space = xs_udp_write_space;
1878 sk->sk_error_report = xs_error_report; 1976 sk->sk_error_report = xs_error_report;
1879 sk->sk_allocation = GFP_NOIO; 1977 sk->sk_allocation = GFP_NOIO;
@@ -2059,7 +2157,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2059 xs_save_old_callbacks(transport, sk); 2157 xs_save_old_callbacks(transport, sk);
2060 2158
2061 sk->sk_user_data = xprt; 2159 sk->sk_user_data = xprt;
2062 sk->sk_data_ready = xs_udp_data_ready; 2160 sk->sk_data_ready = xs_data_ready;
2063 sk->sk_write_space = xs_udp_write_space; 2161 sk->sk_write_space = xs_udp_write_space;
2064 sk->sk_allocation = GFP_NOIO; 2162 sk->sk_allocation = GFP_NOIO;
2065 2163
@@ -2472,7 +2570,7 @@ static int bc_send_request(struct rpc_task *task)
2472{ 2570{
2473 struct rpc_rqst *req = task->tk_rqstp; 2571 struct rpc_rqst *req = task->tk_rqstp;
2474 struct svc_xprt *xprt; 2572 struct svc_xprt *xprt;
2475 u32 len; 2573 int len;
2476 2574
2477 dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid)); 2575 dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
2478 /* 2576 /*
@@ -2580,6 +2678,12 @@ static struct rpc_xprt_ops xs_tcp_ops = {
2580 .enable_swap = xs_enable_swap, 2678 .enable_swap = xs_enable_swap,
2581 .disable_swap = xs_disable_swap, 2679 .disable_swap = xs_disable_swap,
2582 .inject_disconnect = xs_inject_disconnect, 2680 .inject_disconnect = xs_inject_disconnect,
2681#ifdef CONFIG_SUNRPC_BACKCHANNEL
2682 .bc_setup = xprt_setup_bc,
2683 .bc_up = xs_tcp_bc_up,
2684 .bc_free_rqst = xprt_free_bc_rqst,
2685 .bc_destroy = xprt_destroy_bc,
2686#endif
2583}; 2687};
2584 2688
2585/* 2689/*
@@ -2650,6 +2754,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2650 } 2754 }
2651 2755
2652 new = container_of(xprt, struct sock_xprt, xprt); 2756 new = container_of(xprt, struct sock_xprt, xprt);
2757 mutex_init(&new->recv_mutex);
2653 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2758 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
2654 xprt->addrlen = args->addrlen; 2759 xprt->addrlen = args->addrlen;
2655 if (args->srcaddr) 2760 if (args->srcaddr)
@@ -2703,6 +2808,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
2703 xprt->ops = &xs_local_ops; 2808 xprt->ops = &xs_local_ops;
2704 xprt->timeout = &xs_local_default_timeout; 2809 xprt->timeout = &xs_local_default_timeout;
2705 2810
2811 INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn);
2706 INIT_DELAYED_WORK(&transport->connect_worker, 2812 INIT_DELAYED_WORK(&transport->connect_worker,
2707 xs_dummy_setup_socket); 2813 xs_dummy_setup_socket);
2708 2814
@@ -2774,21 +2880,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2774 2880
2775 xprt->timeout = &xs_udp_default_timeout; 2881 xprt->timeout = &xs_udp_default_timeout;
2776 2882
2883 INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn);
2884 INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
2885
2777 switch (addr->sa_family) { 2886 switch (addr->sa_family) {
2778 case AF_INET: 2887 case AF_INET:
2779 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 2888 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
2780 xprt_set_bound(xprt); 2889 xprt_set_bound(xprt);
2781 2890
2782 INIT_DELAYED_WORK(&transport->connect_worker,
2783 xs_udp_setup_socket);
2784 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2891 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
2785 break; 2892 break;
2786 case AF_INET6: 2893 case AF_INET6:
2787 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2894 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
2788 xprt_set_bound(xprt); 2895 xprt_set_bound(xprt);
2789 2896
2790 INIT_DELAYED_WORK(&transport->connect_worker,
2791 xs_udp_setup_socket);
2792 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2897 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2793 break; 2898 break;
2794 default: 2899 default:
@@ -2853,21 +2958,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2853 xprt->ops = &xs_tcp_ops; 2958 xprt->ops = &xs_tcp_ops;
2854 xprt->timeout = &xs_tcp_default_timeout; 2959 xprt->timeout = &xs_tcp_default_timeout;
2855 2960
2961 INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
2962 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
2963
2856 switch (addr->sa_family) { 2964 switch (addr->sa_family) {
2857 case AF_INET: 2965 case AF_INET:
2858 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 2966 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
2859 xprt_set_bound(xprt); 2967 xprt_set_bound(xprt);
2860 2968
2861 INIT_DELAYED_WORK(&transport->connect_worker,
2862 xs_tcp_setup_socket);
2863 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2969 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
2864 break; 2970 break;
2865 case AF_INET6: 2971 case AF_INET6:
2866 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2972 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
2867 xprt_set_bound(xprt); 2973 xprt_set_bound(xprt);
2868 2974
2869 INIT_DELAYED_WORK(&transport->connect_worker,
2870 xs_tcp_setup_socket);
2871 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2975 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2872 break; 2976 break;
2873 default: 2977 default:
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index fda38f830a10..f34e535e93bd 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/switchdev/switchdev.c - Switch device API 2 * net/switchdev/switchdev.c - Switch device API
3 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> 3 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> 4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
@@ -15,11 +15,166 @@
15#include <linux/mutex.h> 15#include <linux/mutex.h>
16#include <linux/notifier.h> 16#include <linux/notifier.h>
17#include <linux/netdevice.h> 17#include <linux/netdevice.h>
18#include <linux/etherdevice.h>
18#include <linux/if_bridge.h> 19#include <linux/if_bridge.h>
20#include <linux/list.h>
21#include <linux/workqueue.h>
22#include <linux/if_vlan.h>
19#include <net/ip_fib.h> 23#include <net/ip_fib.h>
20#include <net/switchdev.h> 24#include <net/switchdev.h>
21 25
22/** 26/**
27 * switchdev_trans_item_enqueue - Enqueue data item to transaction queue
28 *
29 * @trans: transaction
30 * @data: pointer to data being queued
31 * @destructor: data destructor
32 * @tritem: transaction item being queued
33 *
34 * Enqeueue data item to transaction queue. tritem is typically placed in
35 * cointainter pointed at by data pointer. Destructor is called on
36 * transaction abort and after successful commit phase in case
37 * the caller did not dequeue the item before.
38 */
39void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
40 void *data, void (*destructor)(void const *),
41 struct switchdev_trans_item *tritem)
42{
43 tritem->data = data;
44 tritem->destructor = destructor;
45 list_add_tail(&tritem->list, &trans->item_list);
46}
47EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
48
49static struct switchdev_trans_item *
50__switchdev_trans_item_dequeue(struct switchdev_trans *trans)
51{
52 struct switchdev_trans_item *tritem;
53
54 if (list_empty(&trans->item_list))
55 return NULL;
56 tritem = list_first_entry(&trans->item_list,
57 struct switchdev_trans_item, list);
58 list_del(&tritem->list);
59 return tritem;
60}
61
62/**
63 * switchdev_trans_item_dequeue - Dequeue data item from transaction queue
64 *
65 * @trans: transaction
66 */
67void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
68{
69 struct switchdev_trans_item *tritem;
70
71 tritem = __switchdev_trans_item_dequeue(trans);
72 BUG_ON(!tritem);
73 return tritem->data;
74}
75EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
76
77static void switchdev_trans_init(struct switchdev_trans *trans)
78{
79 INIT_LIST_HEAD(&trans->item_list);
80}
81
82static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
83{
84 struct switchdev_trans_item *tritem;
85
86 while ((tritem = __switchdev_trans_item_dequeue(trans)))
87 tritem->destructor(tritem->data);
88}
89
90static void switchdev_trans_items_warn_destroy(struct net_device *dev,
91 struct switchdev_trans *trans)
92{
93 WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
94 dev->name);
95 switchdev_trans_items_destroy(trans);
96}
97
98static LIST_HEAD(deferred);
99static DEFINE_SPINLOCK(deferred_lock);
100
101typedef void switchdev_deferred_func_t(struct net_device *dev,
102 const void *data);
103
104struct switchdev_deferred_item {
105 struct list_head list;
106 struct net_device *dev;
107 switchdev_deferred_func_t *func;
108 unsigned long data[0];
109};
110
111static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
112{
113 struct switchdev_deferred_item *dfitem;
114
115 spin_lock_bh(&deferred_lock);
116 if (list_empty(&deferred)) {
117 dfitem = NULL;
118 goto unlock;
119 }
120 dfitem = list_first_entry(&deferred,
121 struct switchdev_deferred_item, list);
122 list_del(&dfitem->list);
123unlock:
124 spin_unlock_bh(&deferred_lock);
125 return dfitem;
126}
127
128/**
129 * switchdev_deferred_process - Process ops in deferred queue
130 *
131 * Called to flush the ops currently queued in deferred ops queue.
132 * rtnl_lock must be held.
133 */
134void switchdev_deferred_process(void)
135{
136 struct switchdev_deferred_item *dfitem;
137
138 ASSERT_RTNL();
139
140 while ((dfitem = switchdev_deferred_dequeue())) {
141 dfitem->func(dfitem->dev, dfitem->data);
142 dev_put(dfitem->dev);
143 kfree(dfitem);
144 }
145}
146EXPORT_SYMBOL_GPL(switchdev_deferred_process);
147
148static void switchdev_deferred_process_work(struct work_struct *work)
149{
150 rtnl_lock();
151 switchdev_deferred_process();
152 rtnl_unlock();
153}
154
155static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
156
157static int switchdev_deferred_enqueue(struct net_device *dev,
158 const void *data, size_t data_len,
159 switchdev_deferred_func_t *func)
160{
161 struct switchdev_deferred_item *dfitem;
162
163 dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
164 if (!dfitem)
165 return -ENOMEM;
166 dfitem->dev = dev;
167 dfitem->func = func;
168 memcpy(dfitem->data, data, data_len);
169 dev_hold(dev);
170 spin_lock_bh(&deferred_lock);
171 list_add_tail(&dfitem->list, &deferred);
172 spin_unlock_bh(&deferred_lock);
173 schedule_work(&deferred_process_work);
174 return 0;
175}
176
177/**
23 * switchdev_port_attr_get - Get port attribute 178 * switchdev_port_attr_get - Get port attribute
24 * 179 *
25 * @dev: port device 180 * @dev: port device
@@ -31,7 +186,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
31 struct net_device *lower_dev; 186 struct net_device *lower_dev;
32 struct list_head *iter; 187 struct list_head *iter;
33 struct switchdev_attr first = { 188 struct switchdev_attr first = {
34 .id = SWITCHDEV_ATTR_UNDEFINED 189 .id = SWITCHDEV_ATTR_ID_UNDEFINED
35 }; 190 };
36 int err = -EOPNOTSUPP; 191 int err = -EOPNOTSUPP;
37 192
@@ -51,7 +206,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
51 err = switchdev_port_attr_get(lower_dev, attr); 206 err = switchdev_port_attr_get(lower_dev, attr);
52 if (err) 207 if (err)
53 break; 208 break;
54 if (first.id == SWITCHDEV_ATTR_UNDEFINED) 209 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
55 first = *attr; 210 first = *attr;
56 else if (memcmp(&first, attr, sizeof(*attr))) 211 else if (memcmp(&first, attr, sizeof(*attr)))
57 return -ENODATA; 212 return -ENODATA;
@@ -62,18 +217,21 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
62EXPORT_SYMBOL_GPL(switchdev_port_attr_get); 217EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
63 218
64static int __switchdev_port_attr_set(struct net_device *dev, 219static int __switchdev_port_attr_set(struct net_device *dev,
65 struct switchdev_attr *attr) 220 const struct switchdev_attr *attr,
221 struct switchdev_trans *trans)
66{ 222{
67 const struct switchdev_ops *ops = dev->switchdev_ops; 223 const struct switchdev_ops *ops = dev->switchdev_ops;
68 struct net_device *lower_dev; 224 struct net_device *lower_dev;
69 struct list_head *iter; 225 struct list_head *iter;
70 int err = -EOPNOTSUPP; 226 int err = -EOPNOTSUPP;
71 227
72 if (ops && ops->switchdev_port_attr_set) 228 if (ops && ops->switchdev_port_attr_set) {
73 return ops->switchdev_port_attr_set(dev, attr); 229 err = ops->switchdev_port_attr_set(dev, attr, trans);
230 goto done;
231 }
74 232
75 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 233 if (attr->flags & SWITCHDEV_F_NO_RECURSE)
76 return err; 234 goto done;
77 235
78 /* Switch device port(s) may be stacked under 236 /* Switch device port(s) may be stacked under
79 * bond/team/vlan dev, so recurse down to set attr on 237 * bond/team/vlan dev, so recurse down to set attr on
@@ -81,80 +239,25 @@ static int __switchdev_port_attr_set(struct net_device *dev,
81 */ 239 */
82 240
83 netdev_for_each_lower_dev(dev, lower_dev, iter) { 241 netdev_for_each_lower_dev(dev, lower_dev, iter) {
84 err = __switchdev_port_attr_set(lower_dev, attr); 242 err = __switchdev_port_attr_set(lower_dev, attr, trans);
85 if (err) 243 if (err)
86 break; 244 break;
87 } 245 }
88 246
89 return err; 247done:
90} 248 if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
91 249 err = 0;
92struct switchdev_attr_set_work {
93 struct work_struct work;
94 struct net_device *dev;
95 struct switchdev_attr attr;
96};
97
98static void switchdev_port_attr_set_work(struct work_struct *work)
99{
100 struct switchdev_attr_set_work *asw =
101 container_of(work, struct switchdev_attr_set_work, work);
102 int err;
103
104 rtnl_lock();
105 err = switchdev_port_attr_set(asw->dev, &asw->attr);
106 if (err && err != -EOPNOTSUPP)
107 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
108 err, asw->attr.id);
109 rtnl_unlock();
110
111 dev_put(asw->dev);
112 kfree(work);
113}
114 250
115static int switchdev_port_attr_set_defer(struct net_device *dev, 251 return err;
116 struct switchdev_attr *attr)
117{
118 struct switchdev_attr_set_work *asw;
119
120 asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
121 if (!asw)
122 return -ENOMEM;
123
124 INIT_WORK(&asw->work, switchdev_port_attr_set_work);
125
126 dev_hold(dev);
127 asw->dev = dev;
128 memcpy(&asw->attr, attr, sizeof(asw->attr));
129
130 schedule_work(&asw->work);
131
132 return 0;
133} 252}
134 253
135/** 254static int switchdev_port_attr_set_now(struct net_device *dev,
136 * switchdev_port_attr_set - Set port attribute 255 const struct switchdev_attr *attr)
137 *
138 * @dev: port device
139 * @attr: attribute to set
140 *
141 * Use a 2-phase prepare-commit transaction model to ensure
142 * system is not left in a partially updated state due to
143 * failure from driver/device.
144 */
145int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
146{ 256{
257 struct switchdev_trans trans;
147 int err; 258 int err;
148 259
149 if (!rtnl_is_locked()) { 260 switchdev_trans_init(&trans);
150 /* Running prepare-commit transaction across stacked
151 * devices requires nothing moves, so if rtnl_lock is
152 * not held, schedule a worker thread to hold rtnl_lock
153 * while setting attr.
154 */
155
156 return switchdev_port_attr_set_defer(dev, attr);
157 }
158 261
159 /* Phase I: prepare for attr set. Driver/device should fail 262 /* Phase I: prepare for attr set. Driver/device should fail
160 * here if there are going to be issues in the commit phase, 263 * here if there are going to be issues in the commit phase,
@@ -163,18 +266,16 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
163 * but should not commit the attr. 266 * but should not commit the attr.
164 */ 267 */
165 268
166 attr->trans = SWITCHDEV_TRANS_PREPARE; 269 trans.ph_prepare = true;
167 err = __switchdev_port_attr_set(dev, attr); 270 err = __switchdev_port_attr_set(dev, attr, &trans);
168 if (err) { 271 if (err) {
169 /* Prepare phase failed: abort the transaction. Any 272 /* Prepare phase failed: abort the transaction. Any
170 * resources reserved in the prepare phase are 273 * resources reserved in the prepare phase are
171 * released. 274 * released.
172 */ 275 */
173 276
174 if (err != -EOPNOTSUPP) { 277 if (err != -EOPNOTSUPP)
175 attr->trans = SWITCHDEV_TRANS_ABORT; 278 switchdev_trans_items_destroy(&trans);
176 __switchdev_port_attr_set(dev, attr);
177 }
178 279
179 return err; 280 return err;
180 } 281 }
@@ -184,17 +285,75 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
184 * because the driver said everythings was OK in phase I. 285 * because the driver said everythings was OK in phase I.
185 */ 286 */
186 287
187 attr->trans = SWITCHDEV_TRANS_COMMIT; 288 trans.ph_prepare = false;
188 err = __switchdev_port_attr_set(dev, attr); 289 err = __switchdev_port_attr_set(dev, attr, &trans);
189 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 290 WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
190 dev->name, attr->id); 291 dev->name, attr->id);
292 switchdev_trans_items_warn_destroy(dev, &trans);
191 293
192 return err; 294 return err;
193} 295}
296
297static void switchdev_port_attr_set_deferred(struct net_device *dev,
298 const void *data)
299{
300 const struct switchdev_attr *attr = data;
301 int err;
302
303 err = switchdev_port_attr_set_now(dev, attr);
304 if (err && err != -EOPNOTSUPP)
305 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
306 err, attr->id);
307}
308
309static int switchdev_port_attr_set_defer(struct net_device *dev,
310 const struct switchdev_attr *attr)
311{
312 return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
313 switchdev_port_attr_set_deferred);
314}
315
316/**
317 * switchdev_port_attr_set - Set port attribute
318 *
319 * @dev: port device
320 * @attr: attribute to set
321 *
322 * Use a 2-phase prepare-commit transaction model to ensure
323 * system is not left in a partially updated state due to
324 * failure from driver/device.
325 *
326 * rtnl_lock must be held and must not be in atomic section,
327 * in case SWITCHDEV_F_DEFER flag is not set.
328 */
329int switchdev_port_attr_set(struct net_device *dev,
330 const struct switchdev_attr *attr)
331{
332 if (attr->flags & SWITCHDEV_F_DEFER)
333 return switchdev_port_attr_set_defer(dev, attr);
334 ASSERT_RTNL();
335 return switchdev_port_attr_set_now(dev, attr);
336}
194EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 337EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
195 338
339static size_t switchdev_obj_size(const struct switchdev_obj *obj)
340{
341 switch (obj->id) {
342 case SWITCHDEV_OBJ_ID_PORT_VLAN:
343 return sizeof(struct switchdev_obj_port_vlan);
344 case SWITCHDEV_OBJ_ID_IPV4_FIB:
345 return sizeof(struct switchdev_obj_ipv4_fib);
346 case SWITCHDEV_OBJ_ID_PORT_FDB:
347 return sizeof(struct switchdev_obj_port_fdb);
348 default:
349 BUG();
350 }
351 return 0;
352}
353
196static int __switchdev_port_obj_add(struct net_device *dev, 354static int __switchdev_port_obj_add(struct net_device *dev,
197 struct switchdev_obj *obj) 355 const struct switchdev_obj *obj,
356 struct switchdev_trans *trans)
198{ 357{
199 const struct switchdev_ops *ops = dev->switchdev_ops; 358 const struct switchdev_ops *ops = dev->switchdev_ops;
200 struct net_device *lower_dev; 359 struct net_device *lower_dev;
@@ -202,7 +361,7 @@ static int __switchdev_port_obj_add(struct net_device *dev,
202 int err = -EOPNOTSUPP; 361 int err = -EOPNOTSUPP;
203 362
204 if (ops && ops->switchdev_port_obj_add) 363 if (ops && ops->switchdev_port_obj_add)
205 return ops->switchdev_port_obj_add(dev, obj); 364 return ops->switchdev_port_obj_add(dev, obj, trans);
206 365
207 /* Switch device port(s) may be stacked under 366 /* Switch device port(s) may be stacked under
208 * bond/team/vlan dev, so recurse down to add object on 367 * bond/team/vlan dev, so recurse down to add object on
@@ -210,7 +369,7 @@ static int __switchdev_port_obj_add(struct net_device *dev,
210 */ 369 */
211 370
212 netdev_for_each_lower_dev(dev, lower_dev, iter) { 371 netdev_for_each_lower_dev(dev, lower_dev, iter) {
213 err = __switchdev_port_obj_add(lower_dev, obj); 372 err = __switchdev_port_obj_add(lower_dev, obj, trans);
214 if (err) 373 if (err)
215 break; 374 break;
216 } 375 }
@@ -218,24 +377,16 @@ static int __switchdev_port_obj_add(struct net_device *dev,
218 return err; 377 return err;
219} 378}
220 379
221/** 380static int switchdev_port_obj_add_now(struct net_device *dev,
222 * switchdev_port_obj_add - Add port object 381 const struct switchdev_obj *obj)
223 *
224 * @dev: port device
225 * @obj: object to add
226 *
227 * Use a 2-phase prepare-commit transaction model to ensure
228 * system is not left in a partially updated state due to
229 * failure from driver/device.
230 *
231 * rtnl_lock must be held.
232 */
233int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
234{ 382{
383 struct switchdev_trans trans;
235 int err; 384 int err;
236 385
237 ASSERT_RTNL(); 386 ASSERT_RTNL();
238 387
388 switchdev_trans_init(&trans);
389
239 /* Phase I: prepare for obj add. Driver/device should fail 390 /* Phase I: prepare for obj add. Driver/device should fail
240 * here if there are going to be issues in the commit phase, 391 * here if there are going to be issues in the commit phase,
241 * such as lack of resources or support. The driver/device 392 * such as lack of resources or support. The driver/device
@@ -243,18 +394,16 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
243 * but should not commit the obj. 394 * but should not commit the obj.
244 */ 395 */
245 396
246 obj->trans = SWITCHDEV_TRANS_PREPARE; 397 trans.ph_prepare = true;
247 err = __switchdev_port_obj_add(dev, obj); 398 err = __switchdev_port_obj_add(dev, obj, &trans);
248 if (err) { 399 if (err) {
249 /* Prepare phase failed: abort the transaction. Any 400 /* Prepare phase failed: abort the transaction. Any
250 * resources reserved in the prepare phase are 401 * resources reserved in the prepare phase are
251 * released. 402 * released.
252 */ 403 */
253 404
254 if (err != -EOPNOTSUPP) { 405 if (err != -EOPNOTSUPP)
255 obj->trans = SWITCHDEV_TRANS_ABORT; 406 switchdev_trans_items_destroy(&trans);
256 __switchdev_port_obj_add(dev, obj);
257 }
258 407
259 return err; 408 return err;
260 } 409 }
@@ -264,21 +413,59 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
264 * because the driver said everythings was OK in phase I. 413 * because the driver said everythings was OK in phase I.
265 */ 414 */
266 415
267 obj->trans = SWITCHDEV_TRANS_COMMIT; 416 trans.ph_prepare = false;
268 err = __switchdev_port_obj_add(dev, obj); 417 err = __switchdev_port_obj_add(dev, obj, &trans);
269 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 418 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
419 switchdev_trans_items_warn_destroy(dev, &trans);
270 420
271 return err; 421 return err;
272} 422}
273EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 423
424static void switchdev_port_obj_add_deferred(struct net_device *dev,
425 const void *data)
426{
427 const struct switchdev_obj *obj = data;
428 int err;
429
430 err = switchdev_port_obj_add_now(dev, obj);
431 if (err && err != -EOPNOTSUPP)
432 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
433 err, obj->id);
434}
435
436static int switchdev_port_obj_add_defer(struct net_device *dev,
437 const struct switchdev_obj *obj)
438{
439 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
440 switchdev_port_obj_add_deferred);
441}
274 442
275/** 443/**
276 * switchdev_port_obj_del - Delete port object 444 * switchdev_port_obj_add - Add port object
277 * 445 *
278 * @dev: port device 446 * @dev: port device
279 * @obj: object to delete 447 * @id: object ID
448 * @obj: object to add
449 *
450 * Use a 2-phase prepare-commit transaction model to ensure
451 * system is not left in a partially updated state due to
452 * failure from driver/device.
453 *
454 * rtnl_lock must be held and must not be in atomic section,
455 * in case SWITCHDEV_F_DEFER flag is not set.
280 */ 456 */
281int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) 457int switchdev_port_obj_add(struct net_device *dev,
458 const struct switchdev_obj *obj)
459{
460 if (obj->flags & SWITCHDEV_F_DEFER)
461 return switchdev_port_obj_add_defer(dev, obj);
462 ASSERT_RTNL();
463 return switchdev_port_obj_add_now(dev, obj);
464}
465EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
466
467static int switchdev_port_obj_del_now(struct net_device *dev,
468 const struct switchdev_obj *obj)
282{ 469{
283 const struct switchdev_ops *ops = dev->switchdev_ops; 470 const struct switchdev_ops *ops = dev->switchdev_ops;
284 struct net_device *lower_dev; 471 struct net_device *lower_dev;
@@ -294,30 +481,75 @@ int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
294 */ 481 */
295 482
296 netdev_for_each_lower_dev(dev, lower_dev, iter) { 483 netdev_for_each_lower_dev(dev, lower_dev, iter) {
297 err = switchdev_port_obj_del(lower_dev, obj); 484 err = switchdev_port_obj_del_now(lower_dev, obj);
298 if (err) 485 if (err)
299 break; 486 break;
300 } 487 }
301 488
302 return err; 489 return err;
303} 490}
491
492static void switchdev_port_obj_del_deferred(struct net_device *dev,
493 const void *data)
494{
495 const struct switchdev_obj *obj = data;
496 int err;
497
498 err = switchdev_port_obj_del_now(dev, obj);
499 if (err && err != -EOPNOTSUPP)
500 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
501 err, obj->id);
502}
503
504static int switchdev_port_obj_del_defer(struct net_device *dev,
505 const struct switchdev_obj *obj)
506{
507 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
508 switchdev_port_obj_del_deferred);
509}
510
511/**
512 * switchdev_port_obj_del - Delete port object
513 *
514 * @dev: port device
515 * @id: object ID
516 * @obj: object to delete
517 *
518 * rtnl_lock must be held and must not be in atomic section,
519 * in case SWITCHDEV_F_DEFER flag is not set.
520 */
521int switchdev_port_obj_del(struct net_device *dev,
522 const struct switchdev_obj *obj)
523{
524 if (obj->flags & SWITCHDEV_F_DEFER)
525 return switchdev_port_obj_del_defer(dev, obj);
526 ASSERT_RTNL();
527 return switchdev_port_obj_del_now(dev, obj);
528}
304EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 529EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
305 530
306/** 531/**
307 * switchdev_port_obj_dump - Dump port objects 532 * switchdev_port_obj_dump - Dump port objects
308 * 533 *
309 * @dev: port device 534 * @dev: port device
535 * @id: object ID
310 * @obj: object to dump 536 * @obj: object to dump
537 * @cb: function to call with a filled object
538 *
539 * rtnl_lock must be held.
311 */ 540 */
312int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) 541int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
542 switchdev_obj_dump_cb_t *cb)
313{ 543{
314 const struct switchdev_ops *ops = dev->switchdev_ops; 544 const struct switchdev_ops *ops = dev->switchdev_ops;
315 struct net_device *lower_dev; 545 struct net_device *lower_dev;
316 struct list_head *iter; 546 struct list_head *iter;
317 int err = -EOPNOTSUPP; 547 int err = -EOPNOTSUPP;
318 548
549 ASSERT_RTNL();
550
319 if (ops && ops->switchdev_port_obj_dump) 551 if (ops && ops->switchdev_port_obj_dump)
320 return ops->switchdev_port_obj_dump(dev, obj); 552 return ops->switchdev_port_obj_dump(dev, obj, cb);
321 553
322 /* Switch device port(s) may be stacked under 554 /* Switch device port(s) may be stacked under
323 * bond/team/vlan dev, so recurse down to dump objects on 555 * bond/team/vlan dev, so recurse down to dump objects on
@@ -325,7 +557,7 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
325 */ 557 */
326 558
327 netdev_for_each_lower_dev(dev, lower_dev, iter) { 559 netdev_for_each_lower_dev(dev, lower_dev, iter) {
328 err = switchdev_port_obj_dump(lower_dev, obj); 560 err = switchdev_port_obj_dump(lower_dev, obj, cb);
329 break; 561 break;
330 } 562 }
331 563
@@ -397,7 +629,7 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
397EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 629EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
398 630
399struct switchdev_vlan_dump { 631struct switchdev_vlan_dump {
400 struct switchdev_obj obj; 632 struct switchdev_obj_port_vlan vlan;
401 struct sk_buff *skb; 633 struct sk_buff *skb;
402 u32 filter_mask; 634 u32 filter_mask;
403 u16 flags; 635 u16 flags;
@@ -405,8 +637,7 @@ struct switchdev_vlan_dump {
405 u16 end; 637 u16 end;
406}; 638};
407 639
408static int switchdev_port_vlan_dump_put(struct net_device *dev, 640static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
409 struct switchdev_vlan_dump *dump)
410{ 641{
411 struct bridge_vlan_info vinfo; 642 struct bridge_vlan_info vinfo;
412 643
@@ -436,12 +667,11 @@ static int switchdev_port_vlan_dump_put(struct net_device *dev,
436 return 0; 667 return 0;
437} 668}
438 669
439static int switchdev_port_vlan_dump_cb(struct net_device *dev, 670static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
440 struct switchdev_obj *obj)
441{ 671{
672 struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
442 struct switchdev_vlan_dump *dump = 673 struct switchdev_vlan_dump *dump =
443 container_of(obj, struct switchdev_vlan_dump, obj); 674 container_of(vlan, struct switchdev_vlan_dump, vlan);
444 struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
445 int err = 0; 675 int err = 0;
446 676
447 if (vlan->vid_begin > vlan->vid_end) 677 if (vlan->vid_begin > vlan->vid_end)
@@ -452,7 +682,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
452 for (dump->begin = dump->end = vlan->vid_begin; 682 for (dump->begin = dump->end = vlan->vid_begin;
453 dump->begin <= vlan->vid_end; 683 dump->begin <= vlan->vid_end;
454 dump->begin++, dump->end++) { 684 dump->begin++, dump->end++) {
455 err = switchdev_port_vlan_dump_put(dev, dump); 685 err = switchdev_port_vlan_dump_put(dump);
456 if (err) 686 if (err)
457 return err; 687 return err;
458 } 688 }
@@ -464,7 +694,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
464 /* prepend */ 694 /* prepend */
465 dump->begin = vlan->vid_begin; 695 dump->begin = vlan->vid_begin;
466 } else { 696 } else {
467 err = switchdev_port_vlan_dump_put(dev, dump); 697 err = switchdev_port_vlan_dump_put(dump);
468 dump->flags = vlan->flags; 698 dump->flags = vlan->flags;
469 dump->begin = vlan->vid_begin; 699 dump->begin = vlan->vid_begin;
470 dump->end = vlan->vid_end; 700 dump->end = vlan->vid_end;
@@ -476,7 +706,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
476 /* append */ 706 /* append */
477 dump->end = vlan->vid_end; 707 dump->end = vlan->vid_end;
478 } else { 708 } else {
479 err = switchdev_port_vlan_dump_put(dev, dump); 709 err = switchdev_port_vlan_dump_put(dump);
480 dump->flags = vlan->flags; 710 dump->flags = vlan->flags;
481 dump->begin = vlan->vid_begin; 711 dump->begin = vlan->vid_begin;
482 dump->end = vlan->vid_end; 712 dump->end = vlan->vid_end;
@@ -493,10 +723,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
493 u32 filter_mask) 723 u32 filter_mask)
494{ 724{
495 struct switchdev_vlan_dump dump = { 725 struct switchdev_vlan_dump dump = {
496 .obj = { 726 .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
497 .id = SWITCHDEV_OBJ_PORT_VLAN,
498 .cb = switchdev_port_vlan_dump_cb,
499 },
500 .skb = skb, 727 .skb = skb,
501 .filter_mask = filter_mask, 728 .filter_mask = filter_mask,
502 }; 729 };
@@ -504,12 +731,13 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
504 731
505 if ((filter_mask & RTEXT_FILTER_BRVLAN) || 732 if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
506 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { 733 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
507 err = switchdev_port_obj_dump(dev, &dump.obj); 734 err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
735 switchdev_port_vlan_dump_cb);
508 if (err) 736 if (err)
509 goto err_out; 737 goto err_out;
510 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) 738 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
511 /* last one */ 739 /* last one */
512 err = switchdev_port_vlan_dump_put(dev, &dump); 740 err = switchdev_port_vlan_dump_put(&dump);
513 } 741 }
514 742
515err_out: 743err_out:
@@ -529,10 +757,10 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
529 int nlflags) 757 int nlflags)
530{ 758{
531 struct switchdev_attr attr = { 759 struct switchdev_attr attr = {
532 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, 760 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
533 }; 761 };
534 u16 mode = BRIDGE_MODE_UNDEF; 762 u16 mode = BRIDGE_MODE_UNDEF;
535 u32 mask = BR_LEARNING | BR_LEARNING_SYNC; 763 u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
536 int err; 764 int err;
537 765
538 err = switchdev_port_attr_get(dev, &attr); 766 err = switchdev_port_attr_get(dev, &attr);
@@ -550,7 +778,7 @@ static int switchdev_port_br_setflag(struct net_device *dev,
550 unsigned long brport_flag) 778 unsigned long brport_flag)
551{ 779{
552 struct switchdev_attr attr = { 780 struct switchdev_attr attr = {
553 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, 781 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
554 }; 782 };
555 u8 flag = nla_get_u8(nlattr); 783 u8 flag = nla_get_u8(nlattr);
556 int err; 784 int err;
@@ -603,6 +831,9 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
603 err = switchdev_port_br_setflag(dev, attr, 831 err = switchdev_port_br_setflag(dev, attr,
604 BR_LEARNING_SYNC); 832 BR_LEARNING_SYNC);
605 break; 833 break;
834 case IFLA_BRPORT_UNICAST_FLOOD:
835 err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
836 break;
606 default: 837 default:
607 err = -EOPNOTSUPP; 838 err = -EOPNOTSUPP;
608 break; 839 break;
@@ -617,14 +848,13 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
617static int switchdev_port_br_afspec(struct net_device *dev, 848static int switchdev_port_br_afspec(struct net_device *dev,
618 struct nlattr *afspec, 849 struct nlattr *afspec,
619 int (*f)(struct net_device *dev, 850 int (*f)(struct net_device *dev,
620 struct switchdev_obj *obj)) 851 const struct switchdev_obj *obj))
621{ 852{
622 struct nlattr *attr; 853 struct nlattr *attr;
623 struct bridge_vlan_info *vinfo; 854 struct bridge_vlan_info *vinfo;
624 struct switchdev_obj obj = { 855 struct switchdev_obj_port_vlan vlan = {
625 .id = SWITCHDEV_OBJ_PORT_VLAN, 856 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
626 }; 857 };
627 struct switchdev_obj_vlan *vlan = &obj.u.vlan;
628 int rem; 858 int rem;
629 int err; 859 int err;
630 860
@@ -634,30 +864,35 @@ static int switchdev_port_br_afspec(struct net_device *dev,
634 if (nla_len(attr) != sizeof(struct bridge_vlan_info)) 864 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
635 return -EINVAL; 865 return -EINVAL;
636 vinfo = nla_data(attr); 866 vinfo = nla_data(attr);
637 vlan->flags = vinfo->flags; 867 if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
868 return -EINVAL;
869 vlan.flags = vinfo->flags;
638 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { 870 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
639 if (vlan->vid_begin) 871 if (vlan.vid_begin)
872 return -EINVAL;
873 vlan.vid_begin = vinfo->vid;
874 /* don't allow range of pvids */
875 if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
640 return -EINVAL; 876 return -EINVAL;
641 vlan->vid_begin = vinfo->vid;
642 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { 877 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
643 if (!vlan->vid_begin) 878 if (!vlan.vid_begin)
644 return -EINVAL; 879 return -EINVAL;
645 vlan->vid_end = vinfo->vid; 880 vlan.vid_end = vinfo->vid;
646 if (vlan->vid_end <= vlan->vid_begin) 881 if (vlan.vid_end <= vlan.vid_begin)
647 return -EINVAL; 882 return -EINVAL;
648 err = f(dev, &obj); 883 err = f(dev, &vlan.obj);
649 if (err) 884 if (err)
650 return err; 885 return err;
651 memset(vlan, 0, sizeof(*vlan)); 886 vlan.vid_begin = 0;
652 } else { 887 } else {
653 if (vlan->vid_begin) 888 if (vlan.vid_begin)
654 return -EINVAL; 889 return -EINVAL;
655 vlan->vid_begin = vinfo->vid; 890 vlan.vid_begin = vinfo->vid;
656 vlan->vid_end = vinfo->vid; 891 vlan.vid_end = vinfo->vid;
657 err = f(dev, &obj); 892 err = f(dev, &vlan.obj);
658 if (err) 893 if (err)
659 return err; 894 return err;
660 memset(vlan, 0, sizeof(*vlan)); 895 vlan.vid_begin = 0;
661 } 896 }
662 } 897 }
663 898
@@ -739,15 +974,13 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
739 struct net_device *dev, const unsigned char *addr, 974 struct net_device *dev, const unsigned char *addr,
740 u16 vid, u16 nlm_flags) 975 u16 vid, u16 nlm_flags)
741{ 976{
742 struct switchdev_obj obj = { 977 struct switchdev_obj_port_fdb fdb = {
743 .id = SWITCHDEV_OBJ_PORT_FDB, 978 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
744 .u.fdb = { 979 .vid = vid,
745 .addr = addr,
746 .vid = vid,
747 },
748 }; 980 };
749 981
750 return switchdev_port_obj_add(dev, &obj); 982 ether_addr_copy(fdb.addr, addr);
983 return switchdev_port_obj_add(dev, &fdb.obj);
751} 984}
752EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); 985EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
753 986
@@ -766,30 +999,29 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
766 struct net_device *dev, const unsigned char *addr, 999 struct net_device *dev, const unsigned char *addr,
767 u16 vid) 1000 u16 vid)
768{ 1001{
769 struct switchdev_obj obj = { 1002 struct switchdev_obj_port_fdb fdb = {
770 .id = SWITCHDEV_OBJ_PORT_FDB, 1003 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
771 .u.fdb = { 1004 .vid = vid,
772 .addr = addr,
773 .vid = vid,
774 },
775 }; 1005 };
776 1006
777 return switchdev_port_obj_del(dev, &obj); 1007 ether_addr_copy(fdb.addr, addr);
1008 return switchdev_port_obj_del(dev, &fdb.obj);
778} 1009}
779EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); 1010EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
780 1011
781struct switchdev_fdb_dump { 1012struct switchdev_fdb_dump {
782 struct switchdev_obj obj; 1013 struct switchdev_obj_port_fdb fdb;
1014 struct net_device *dev;
783 struct sk_buff *skb; 1015 struct sk_buff *skb;
784 struct netlink_callback *cb; 1016 struct netlink_callback *cb;
785 int idx; 1017 int idx;
786}; 1018};
787 1019
788static int switchdev_port_fdb_dump_cb(struct net_device *dev, 1020static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
789 struct switchdev_obj *obj)
790{ 1021{
1022 struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
791 struct switchdev_fdb_dump *dump = 1023 struct switchdev_fdb_dump *dump =
792 container_of(obj, struct switchdev_fdb_dump, obj); 1024 container_of(fdb, struct switchdev_fdb_dump, fdb);
793 u32 portid = NETLINK_CB(dump->cb->skb).portid; 1025 u32 portid = NETLINK_CB(dump->cb->skb).portid;
794 u32 seq = dump->cb->nlh->nlmsg_seq; 1026 u32 seq = dump->cb->nlh->nlmsg_seq;
795 struct nlmsghdr *nlh; 1027 struct nlmsghdr *nlh;
@@ -809,13 +1041,13 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev,
809 ndm->ndm_pad2 = 0; 1041 ndm->ndm_pad2 = 0;
810 ndm->ndm_flags = NTF_SELF; 1042 ndm->ndm_flags = NTF_SELF;
811 ndm->ndm_type = 0; 1043 ndm->ndm_type = 0;
812 ndm->ndm_ifindex = dev->ifindex; 1044 ndm->ndm_ifindex = dump->dev->ifindex;
813 ndm->ndm_state = obj->u.fdb.ndm_state; 1045 ndm->ndm_state = fdb->ndm_state;
814 1046
815 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) 1047 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
816 goto nla_put_failure; 1048 goto nla_put_failure;
817 1049
818 if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid)) 1050 if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
819 goto nla_put_failure; 1051 goto nla_put_failure;
820 1052
821 nlmsg_end(dump->skb, nlh); 1053 nlmsg_end(dump->skb, nlh);
@@ -845,16 +1077,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
845 struct net_device *filter_dev, int idx) 1077 struct net_device *filter_dev, int idx)
846{ 1078{
847 struct switchdev_fdb_dump dump = { 1079 struct switchdev_fdb_dump dump = {
848 .obj = { 1080 .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
849 .id = SWITCHDEV_OBJ_PORT_FDB, 1081 .dev = dev,
850 .cb = switchdev_port_fdb_dump_cb,
851 },
852 .skb = skb, 1082 .skb = skb,
853 .cb = cb, 1083 .cb = cb,
854 .idx = idx, 1084 .idx = idx,
855 }; 1085 };
856 1086
857 switchdev_port_obj_dump(dev, &dump.obj); 1087 switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
858 return dump.idx; 1088 return dump.idx;
859} 1089}
860EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); 1090EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
@@ -885,12 +1115,14 @@ static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
885static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) 1115static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
886{ 1116{
887 struct switchdev_attr attr = { 1117 struct switchdev_attr attr = {
888 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1118 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
889 }; 1119 };
890 struct switchdev_attr prev_attr; 1120 struct switchdev_attr prev_attr;
891 struct net_device *dev = NULL; 1121 struct net_device *dev = NULL;
892 int nhsel; 1122 int nhsel;
893 1123
1124 ASSERT_RTNL();
1125
894 /* For this route, all nexthop devs must be on the same switch. */ 1126 /* For this route, all nexthop devs must be on the same switch. */
895 1127
896 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1128 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
@@ -932,21 +1164,20 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
932int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, 1164int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
933 u8 tos, u8 type, u32 nlflags, u32 tb_id) 1165 u8 tos, u8 type, u32 nlflags, u32 tb_id)
934{ 1166{
935 struct switchdev_obj fib_obj = { 1167 struct switchdev_obj_ipv4_fib ipv4_fib = {
936 .id = SWITCHDEV_OBJ_IPV4_FIB, 1168 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
937 .u.ipv4_fib = { 1169 .dst = dst,
938 .dst = dst, 1170 .dst_len = dst_len,
939 .dst_len = dst_len, 1171 .tos = tos,
940 .fi = fi, 1172 .type = type,
941 .tos = tos, 1173 .nlflags = nlflags,
942 .type = type, 1174 .tb_id = tb_id,
943 .nlflags = nlflags,
944 .tb_id = tb_id,
945 },
946 }; 1175 };
947 struct net_device *dev; 1176 struct net_device *dev;
948 int err = 0; 1177 int err = 0;
949 1178
1179 memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
1180
950 /* Don't offload route if using custom ip rules or if 1181 /* Don't offload route if using custom ip rules or if
951 * IPv4 FIB offloading has been disabled completely. 1182 * IPv4 FIB offloading has been disabled completely.
952 */ 1183 */
@@ -963,7 +1194,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
963 if (!dev) 1194 if (!dev)
964 return 0; 1195 return 0;
965 1196
966 err = switchdev_port_obj_add(dev, &fib_obj); 1197 err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
967 if (!err) 1198 if (!err)
968 fi->fib_flags |= RTNH_F_OFFLOAD; 1199 fi->fib_flags |= RTNH_F_OFFLOAD;
969 1200
@@ -986,21 +1217,20 @@ EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
986int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, 1217int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
987 u8 tos, u8 type, u32 tb_id) 1218 u8 tos, u8 type, u32 tb_id)
988{ 1219{
989 struct switchdev_obj fib_obj = { 1220 struct switchdev_obj_ipv4_fib ipv4_fib = {
990 .id = SWITCHDEV_OBJ_IPV4_FIB, 1221 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
991 .u.ipv4_fib = { 1222 .dst = dst,
992 .dst = dst, 1223 .dst_len = dst_len,
993 .dst_len = dst_len, 1224 .tos = tos,
994 .fi = fi, 1225 .type = type,
995 .tos = tos, 1226 .nlflags = 0,
996 .type = type, 1227 .tb_id = tb_id,
997 .nlflags = 0,
998 .tb_id = tb_id,
999 },
1000 }; 1228 };
1001 struct net_device *dev; 1229 struct net_device *dev;
1002 int err = 0; 1230 int err = 0;
1003 1231
1232 memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
1233
1004 if (!(fi->fib_flags & RTNH_F_OFFLOAD)) 1234 if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1005 return 0; 1235 return 0;
1006 1236
@@ -1008,7 +1238,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
1008 if (!dev) 1238 if (!dev)
1009 return 0; 1239 return 0;
1010 1240
1011 err = switchdev_port_obj_del(dev, &fib_obj); 1241 err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
1012 if (!err) 1242 if (!err)
1013 fi->fib_flags &= ~RTNH_F_OFFLOAD; 1243 fi->fib_flags &= ~RTNH_F_OFFLOAD;
1014 1244
@@ -1040,11 +1270,11 @@ static bool switchdev_port_same_parent_id(struct net_device *a,
1040 struct net_device *b) 1270 struct net_device *b)
1041{ 1271{
1042 struct switchdev_attr a_attr = { 1272 struct switchdev_attr a_attr = {
1043 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1273 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1044 .flags = SWITCHDEV_F_NO_RECURSE, 1274 .flags = SWITCHDEV_F_NO_RECURSE,
1045 }; 1275 };
1046 struct switchdev_attr b_attr = { 1276 struct switchdev_attr b_attr = {
1047 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1277 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1048 .flags = SWITCHDEV_F_NO_RECURSE, 1278 .flags = SWITCHDEV_F_NO_RECURSE,
1049 }; 1279 };
1050 1280
@@ -1123,10 +1353,11 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
1123 u32 mark = dev->ifindex; 1353 u32 mark = dev->ifindex;
1124 u32 reset_mark = 0; 1354 u32 reset_mark = 0;
1125 1355
1126 if (group_dev && joining) { 1356 if (group_dev) {
1127 mark = switchdev_port_fwd_mark_get(dev, group_dev); 1357 ASSERT_RTNL();
1128 } else if (group_dev && !joining) { 1358 if (joining)
1129 if (dev->offload_fwd_mark == mark) 1359 mark = switchdev_port_fwd_mark_get(dev, group_dev);
1360 else if (dev->offload_fwd_mark == mark)
1130 /* Ohoh, this port was the mark reference port, 1361 /* Ohoh, this port was the mark reference port,
1131 * but it's leaving the group, so reset the 1362 * but it's leaving the group, so reset the
1132 * mark for the remaining ports in the group. 1363 * mark for the remaining ports in the group.
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index e7000be321b0..ed98c1fc3de1 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -94,10 +94,14 @@ __init int net_sysctl_init(void)
94 goto out; 94 goto out;
95 ret = register_pernet_subsys(&sysctl_pernet_ops); 95 ret = register_pernet_subsys(&sysctl_pernet_ops);
96 if (ret) 96 if (ret)
97 goto out; 97 goto out1;
98 register_sysctl_root(&net_sysctl_root); 98 register_sysctl_root(&net_sysctl_root);
99out: 99out:
100 return ret; 100 return ret;
101out1:
102 unregister_sysctl_table(net_header);
103 net_header = NULL;
104 goto out;
101} 105}
102 106
103struct ctl_table_header *register_net_sysctl(struct net *net, 107struct ctl_table_header *register_net_sysctl(struct net *net,
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 41042de3ae9b..9dc239dfe192 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -35,741 +35,301 @@
35 * POSSIBILITY OF SUCH DAMAGE. 35 * POSSIBILITY OF SUCH DAMAGE.
36 */ 36 */
37 37
38#include <linux/tipc_config.h>
38#include "socket.h" 39#include "socket.h"
39#include "msg.h" 40#include "msg.h"
40#include "bcast.h" 41#include "bcast.h"
41#include "name_distr.h" 42#include "name_distr.h"
42#include "core.h" 43#include "link.h"
44#include "node.h"
43 45
44#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ 46#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
45#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ 47#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
46 48
47const char tipc_bclink_name[] = "broadcast-link"; 49const char tipc_bclink_name[] = "broadcast-link";
48 50
49static void tipc_nmap_diff(struct tipc_node_map *nm_a, 51/**
50 struct tipc_node_map *nm_b, 52 * struct tipc_bc_base - base structure for keeping broadcast send state
51 struct tipc_node_map *nm_diff); 53 * @link: broadcast send link structure
52static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); 54 * @inputq: data input queue; will only carry SOCK_WAKEUP messages
53static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); 55 * @dest: array keeping number of reachable destinations per bearer
54 56 * @primary_bearer: a bearer having links to all broadcast destinations, if any
55static void tipc_bclink_lock(struct net *net) 57 */
56{ 58struct tipc_bc_base {
57 struct tipc_net *tn = net_generic(net, tipc_net_id); 59 struct tipc_link *link;
58 60 struct sk_buff_head inputq;
59 spin_lock_bh(&tn->bclink->lock); 61 int dests[MAX_BEARERS];
60} 62 int primary_bearer;
61 63};
62static void tipc_bclink_unlock(struct net *net)
63{
64 struct tipc_net *tn = net_generic(net, tipc_net_id);
65
66 spin_unlock_bh(&tn->bclink->lock);
67}
68
69void tipc_bclink_input(struct net *net)
70{
71 struct tipc_net *tn = net_generic(net, tipc_net_id);
72
73 tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq);
74}
75
76uint tipc_bclink_get_mtu(void)
77{
78 return MAX_PKT_DEFAULT_MCAST;
79}
80
81static u32 bcbuf_acks(struct sk_buff *buf)
82{
83 return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle;
84}
85
86static void bcbuf_set_acks(struct sk_buff *buf, u32 acks)
87{
88 TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks;
89}
90
91static void bcbuf_decr_acks(struct sk_buff *buf)
92{
93 bcbuf_set_acks(buf, bcbuf_acks(buf) - 1);
94}
95 64
96void tipc_bclink_add_node(struct net *net, u32 addr) 65static struct tipc_bc_base *tipc_bc_base(struct net *net)
97{ 66{
98 struct tipc_net *tn = net_generic(net, tipc_net_id); 67 return tipc_net(net)->bcbase;
99
100 tipc_bclink_lock(net);
101 tipc_nmap_add(&tn->bclink->bcast_nodes, addr);
102 tipc_bclink_unlock(net);
103} 68}
104 69
105void tipc_bclink_remove_node(struct net *net, u32 addr) 70int tipc_bcast_get_mtu(struct net *net)
106{ 71{
107 struct tipc_net *tn = net_generic(net, tipc_net_id); 72 return tipc_link_mtu(tipc_bc_sndlink(net));
108
109 tipc_bclink_lock(net);
110 tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
111
112 /* Last node? => reset backlog queue */
113 if (!tn->bclink->bcast_nodes.count)
114 tipc_link_purge_backlog(&tn->bclink->link);
115
116 tipc_bclink_unlock(net);
117} 73}
118 74
119static void bclink_set_last_sent(struct net *net) 75/* tipc_bcbase_select_primary(): find a bearer with links to all destinations,
76 * if any, and make it primary bearer
77 */
78static void tipc_bcbase_select_primary(struct net *net)
120{ 79{
121 struct tipc_net *tn = net_generic(net, tipc_net_id); 80 struct tipc_bc_base *bb = tipc_bc_base(net);
122 struct tipc_link *bcl = tn->bcl; 81 int all_dests = tipc_link_bc_peers(bb->link);
82 int i, mtu;
123 83
124 bcl->silent_intv_cnt = mod(bcl->snd_nxt - 1); 84 bb->primary_bearer = INVALID_BEARER_ID;
125}
126 85
127u32 tipc_bclink_get_last_sent(struct net *net) 86 if (!all_dests)
128{ 87 return;
129 struct tipc_net *tn = net_generic(net, tipc_net_id);
130 88
131 return tn->bcl->silent_intv_cnt; 89 for (i = 0; i < MAX_BEARERS; i++) {
132} 90 if (!bb->dests[i])
91 continue;
133 92
134static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) 93 mtu = tipc_bearer_mtu(net, i);
135{ 94 if (mtu < tipc_link_mtu(bb->link))
136 node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ? 95 tipc_link_set_mtu(bb->link, mtu);
137 seqno : node->bclink.last_sent;
138}
139 96
140/** 97 if (bb->dests[i] < all_dests)
141 * tipc_bclink_retransmit_to - get most recent node to request retransmission 98 continue;
142 *
143 * Called with bclink_lock locked
144 */
145struct tipc_node *tipc_bclink_retransmit_to(struct net *net)
146{
147 struct tipc_net *tn = net_generic(net, tipc_net_id);
148
149 return tn->bclink->retransmit_to;
150}
151 99
152/** 100 bb->primary_bearer = i;
153 * bclink_retransmit_pkt - retransmit broadcast packets
154 * @after: sequence number of last packet to *not* retransmit
155 * @to: sequence number of last packet to retransmit
156 *
157 * Called with bclink_lock locked
158 */
159static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to)
160{
161 struct sk_buff *skb;
162 struct tipc_link *bcl = tn->bcl;
163 101
164 skb_queue_walk(&bcl->transmq, skb) { 102 /* Reduce risk that all nodes select same primary */
165 if (more(buf_seqno(skb), after)) { 103 if ((i ^ tipc_own_addr(net)) & 1)
166 tipc_link_retransmit(bcl, skb, mod(to - after));
167 break; 104 break;
168 }
169 } 105 }
170} 106}
171 107
172/** 108void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id)
173 * bclink_prepare_wakeup - prepare users for wakeup after congestion
174 * @bcl: broadcast link
175 * @resultq: queue for users which can be woken up
176 * Move a number of waiting users, as permitted by available space in
177 * the send queue, from link wait queue to specified queue for wakeup
178 */
179static void bclink_prepare_wakeup(struct tipc_link *bcl, struct sk_buff_head *resultq)
180{ 109{
181 int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; 110 struct tipc_bc_base *bb = tipc_bc_base(net);
182 int imp, lim;
183 struct sk_buff *skb, *tmp;
184
185 skb_queue_walk_safe(&bcl->wakeupq, skb, tmp) {
186 imp = TIPC_SKB_CB(skb)->chain_imp;
187 lim = bcl->window + bcl->backlog[imp].limit;
188 pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
189 if ((pnd[imp] + bcl->backlog[imp].len) >= lim)
190 continue;
191 skb_unlink(skb, &bcl->wakeupq);
192 skb_queue_tail(resultq, skb);
193 }
194}
195 111
196/** 112 tipc_bcast_lock(net);
197 * tipc_bclink_wakeup_users - wake up pending users 113 bb->dests[bearer_id]++;
198 * 114 tipc_bcbase_select_primary(net);
199 * Called with no locks taken 115 tipc_bcast_unlock(net);
200 */
201void tipc_bclink_wakeup_users(struct net *net)
202{
203 struct tipc_net *tn = net_generic(net, tipc_net_id);
204 struct tipc_link *bcl = tn->bcl;
205 struct sk_buff_head resultq;
206
207 skb_queue_head_init(&resultq);
208 bclink_prepare_wakeup(bcl, &resultq);
209 tipc_sk_rcv(net, &resultq);
210} 116}
211 117
212/** 118void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id)
213 * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets
214 * @n_ptr: node that sent acknowledgement info
215 * @acked: broadcast sequence # that has been acknowledged
216 *
217 * Node is locked, bclink_lock unlocked.
218 */
219void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
220{ 119{
221 struct sk_buff *skb, *tmp; 120 struct tipc_bc_base *bb = tipc_bc_base(net);
222 unsigned int released = 0;
223 struct net *net = n_ptr->net;
224 struct tipc_net *tn = net_generic(net, tipc_net_id);
225
226 if (unlikely(!n_ptr->bclink.recv_permitted))
227 return;
228
229 tipc_bclink_lock(net);
230
231 /* Bail out if tx queue is empty (no clean up is required) */
232 skb = skb_peek(&tn->bcl->transmq);
233 if (!skb)
234 goto exit;
235
236 /* Determine which messages need to be acknowledged */
237 if (acked == INVALID_LINK_SEQ) {
238 /*
239 * Contact with specified node has been lost, so need to
240 * acknowledge sent messages only (if other nodes still exist)
241 * or both sent and unsent messages (otherwise)
242 */
243 if (tn->bclink->bcast_nodes.count)
244 acked = tn->bcl->silent_intv_cnt;
245 else
246 acked = tn->bcl->snd_nxt;
247 } else {
248 /*
249 * Bail out if specified sequence number does not correspond
250 * to a message that has been sent and not yet acknowledged
251 */
252 if (less(acked, buf_seqno(skb)) ||
253 less(tn->bcl->silent_intv_cnt, acked) ||
254 less_eq(acked, n_ptr->bclink.acked))
255 goto exit;
256 }
257
258 /* Skip over packets that node has previously acknowledged */
259 skb_queue_walk(&tn->bcl->transmq, skb) {
260 if (more(buf_seqno(skb), n_ptr->bclink.acked))
261 break;
262 }
263
264 /* Update packets that node is now acknowledging */
265 skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) {
266 if (more(buf_seqno(skb), acked))
267 break;
268 bcbuf_decr_acks(skb);
269 bclink_set_last_sent(net);
270 if (bcbuf_acks(skb) == 0) {
271 __skb_unlink(skb, &tn->bcl->transmq);
272 kfree_skb(skb);
273 released = 1;
274 }
275 }
276 n_ptr->bclink.acked = acked;
277 121
278 /* Try resolving broadcast link congestion, if necessary */ 122 tipc_bcast_lock(net);
279 if (unlikely(skb_peek(&tn->bcl->backlogq))) { 123 bb->dests[bearer_id]--;
280 tipc_link_push_packets(tn->bcl); 124 tipc_bcbase_select_primary(net);
281 bclink_set_last_sent(net); 125 tipc_bcast_unlock(net);
282 }
283 if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq)))
284 n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS;
285exit:
286 tipc_bclink_unlock(net);
287} 126}
288 127
289/** 128/* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers
290 * tipc_bclink_update_link_state - update broadcast link state
291 * 129 *
292 * RCU and node lock set 130 * Note that number of reachable destinations, as indicated in the dests[]
131 * array, may transitionally differ from the number of destinations indicated
132 * in each sent buffer. We can sustain this. Excess destination nodes will
133 * drop and never acknowledge the unexpected packets, and missing destinations
134 * will either require retransmission (if they are just about to be added to
135 * the bearer), or be removed from the buffer's 'ackers' counter (if they
136 * just went down)
293 */ 137 */
294void tipc_bclink_update_link_state(struct tipc_node *n_ptr, 138static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq)
295 u32 last_sent)
296{ 139{
297 struct sk_buff *buf; 140 int bearer_id;
298 struct net *net = n_ptr->net; 141 struct tipc_bc_base *bb = tipc_bc_base(net);
299 struct tipc_net *tn = net_generic(net, tipc_net_id); 142 struct sk_buff *skb, *_skb;
143 struct sk_buff_head _xmitq;
300 144
301 /* Ignore "stale" link state info */ 145 if (skb_queue_empty(xmitq))
302 if (less_eq(last_sent, n_ptr->bclink.last_in))
303 return; 146 return;
304 147
305 /* Update link synchronization state; quit if in sync */ 148 /* The typical case: at least one bearer has links to all nodes */
306 bclink_update_last_sent(n_ptr, last_sent); 149 bearer_id = bb->primary_bearer;
307 150 if (bearer_id >= 0) {
308 if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in) 151 tipc_bearer_bc_xmit(net, bearer_id, xmitq);
309 return; 152 return;
310
311 /* Update out-of-sync state; quit if loss is still unconfirmed */
312 if ((++n_ptr->bclink.oos_state) == 1) {
313 if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2))
314 return;
315 n_ptr->bclink.oos_state++;
316 } 153 }
317 154
318 /* Don't NACK if one has been recently sent (or seen) */ 155 /* We have to transmit across all bearers */
319 if (n_ptr->bclink.oos_state & 0x1) 156 skb_queue_head_init(&_xmitq);
320 return; 157 for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
321 158 if (!bb->dests[bearer_id])
322 /* Send NACK */ 159 continue;
323 buf = tipc_buf_acquire(INT_H_SIZE);
324 if (buf) {
325 struct tipc_msg *msg = buf_msg(buf);
326 struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq);
327 u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent;
328
329 tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG,
330 INT_H_SIZE, n_ptr->addr);
331 msg_set_non_seq(msg, 1);
332 msg_set_mc_netid(msg, tn->net_id);
333 msg_set_bcast_ack(msg, n_ptr->bclink.last_in);
334 msg_set_bcgap_after(msg, n_ptr->bclink.last_in);
335 msg_set_bcgap_to(msg, to);
336
337 tipc_bclink_lock(net);
338 tipc_bearer_send(net, MAX_BEARERS, buf, NULL);
339 tn->bcl->stats.sent_nacks++;
340 tipc_bclink_unlock(net);
341 kfree_skb(buf);
342
343 n_ptr->bclink.oos_state++;
344 }
345}
346
347void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *hdr)
348{
349 u16 last = msg_last_bcast(hdr);
350 int mtyp = msg_type(hdr);
351 160
352 if (unlikely(msg_user(hdr) != LINK_PROTOCOL)) 161 skb_queue_walk(xmitq, skb) {
353 return; 162 _skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
354 if (mtyp == STATE_MSG) { 163 if (!_skb)
355 tipc_bclink_update_link_state(n, last); 164 break;
356 return; 165 __skb_queue_tail(&_xmitq, _skb);
166 }
167 tipc_bearer_bc_xmit(net, bearer_id, &_xmitq);
357 } 168 }
358 /* Compatibility: older nodes don't know BCAST_PROTOCOL synchronization, 169 __skb_queue_purge(xmitq);
359 * and transfer synch info in LINK_PROTOCOL messages. 170 __skb_queue_purge(&_xmitq);
360 */
361 if (tipc_node_is_up(n))
362 return;
363 if ((mtyp != RESET_MSG) && (mtyp != ACTIVATE_MSG))
364 return;
365 n->bclink.last_sent = last;
366 n->bclink.last_in = last;
367 n->bclink.oos_state = 0;
368} 171}
369 172
370/** 173/* tipc_bcast_xmit - deliver buffer chain to all nodes in cluster
371 * bclink_peek_nack - monitor retransmission requests sent by other nodes
372 *
373 * Delay any upcoming NACK by this node if another node has already
374 * requested the first message this node is going to ask for.
375 */
376static void bclink_peek_nack(struct net *net, struct tipc_msg *msg)
377{
378 struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg));
379
380 if (unlikely(!n_ptr))
381 return;
382
383 tipc_node_lock(n_ptr);
384 if (n_ptr->bclink.recv_permitted &&
385 (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) &&
386 (n_ptr->bclink.last_in == msg_bcgap_after(msg)))
387 n_ptr->bclink.oos_state = 2;
388 tipc_node_unlock(n_ptr);
389 tipc_node_put(n_ptr);
390}
391
392/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster
393 * and to identified node local sockets 174 * and to identified node local sockets
394 * @net: the applicable net namespace 175 * @net: the applicable net namespace
395 * @list: chain of buffers containing message 176 * @list: chain of buffers containing message
396 * Consumes the buffer chain, except when returning -ELINKCONG 177 * Consumes the buffer chain, except when returning -ELINKCONG
397 * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE 178 * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
398 */ 179 */
399int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) 180int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
400{ 181{
401 struct tipc_net *tn = net_generic(net, tipc_net_id); 182 struct tipc_link *l = tipc_bc_sndlink(net);
402 struct tipc_link *bcl = tn->bcl; 183 struct sk_buff_head xmitq, inputq, rcvq;
403 struct tipc_bclink *bclink = tn->bclink;
404 int rc = 0; 184 int rc = 0;
405 int bc = 0;
406 struct sk_buff *skb;
407 struct sk_buff_head arrvq;
408 struct sk_buff_head inputq;
409 185
410 /* Prepare clone of message for local node */ 186 __skb_queue_head_init(&rcvq);
411 skb = tipc_msg_reassemble(list); 187 __skb_queue_head_init(&xmitq);
412 if (unlikely(!skb)) 188 skb_queue_head_init(&inputq);
413 return -EHOSTUNREACH;
414 189
415 /* Broadcast to all nodes */ 190 /* Prepare message clone for local node */
416 if (likely(bclink)) { 191 if (unlikely(!tipc_msg_reassemble(list, &rcvq)))
417 tipc_bclink_lock(net); 192 return -EHOSTUNREACH;
418 if (likely(bclink->bcast_nodes.count)) {
419 rc = __tipc_link_xmit(net, bcl, list);
420 if (likely(!rc)) {
421 u32 len = skb_queue_len(&bcl->transmq);
422
423 bclink_set_last_sent(net);
424 bcl->stats.queue_sz_counts++;
425 bcl->stats.accu_queue_sz += len;
426 }
427 bc = 1;
428 }
429 tipc_bclink_unlock(net);
430 }
431 193
432 if (unlikely(!bc)) 194 tipc_bcast_lock(net);
433 __skb_queue_purge(list); 195 if (tipc_link_bc_peers(l))
196 rc = tipc_link_xmit(l, list, &xmitq);
197 tipc_bcast_unlock(net);
434 198
199 /* Don't send to local node if adding to link failed */
435 if (unlikely(rc)) { 200 if (unlikely(rc)) {
436 kfree_skb(skb); 201 __skb_queue_purge(&rcvq);
437 return rc; 202 return rc;
438 } 203 }
439 /* Deliver message clone */
440 __skb_queue_head_init(&arrvq);
441 skb_queue_head_init(&inputq);
442 __skb_queue_tail(&arrvq, skb);
443 tipc_sk_mcast_rcv(net, &arrvq, &inputq);
444 return rc;
445}
446 204
447/** 205 /* Broadcast to all nodes, inluding local node */
448 * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet 206 tipc_bcbase_xmit(net, &xmitq);
449 * 207 tipc_sk_mcast_rcv(net, &rcvq, &inputq);
450 * Called with both sending node's lock and bclink_lock taken. 208 __skb_queue_purge(list);
451 */ 209 return 0;
452static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
453{
454 struct tipc_net *tn = net_generic(node->net, tipc_net_id);
455
456 bclink_update_last_sent(node, seqno);
457 node->bclink.last_in = seqno;
458 node->bclink.oos_state = 0;
459 tn->bcl->stats.recv_info++;
460
461 /*
462 * Unicast an ACK periodically, ensuring that
463 * all nodes in the cluster don't ACK at the same time
464 */
465 if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) {
466 tipc_link_proto_xmit(node_active_link(node, node->addr),
467 STATE_MSG, 0, 0, 0, 0);
468 tn->bcl->stats.sent_acks++;
469 }
470} 210}
471 211
472/** 212/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link
473 * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards
474 * 213 *
475 * RCU is locked, no other locks set 214 * RCU is locked, no other locks set
476 */ 215 */
477void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) 216int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb)
478{ 217{
479 struct tipc_net *tn = net_generic(net, tipc_net_id); 218 struct tipc_msg *hdr = buf_msg(skb);
480 struct tipc_link *bcl = tn->bcl; 219 struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
481 struct tipc_msg *msg = buf_msg(buf); 220 struct sk_buff_head xmitq;
482 struct tipc_node *node; 221 int rc;
483 u32 next_in;
484 u32 seqno;
485 int deferred = 0;
486 int pos = 0;
487 struct sk_buff *iskb;
488 struct sk_buff_head *arrvq, *inputq;
489
490 /* Screen out unwanted broadcast messages */
491 if (msg_mc_netid(msg) != tn->net_id)
492 goto exit;
493
494 node = tipc_node_find(net, msg_prevnode(msg));
495 if (unlikely(!node))
496 goto exit;
497
498 tipc_node_lock(node);
499 if (unlikely(!node->bclink.recv_permitted))
500 goto unlock;
501
502 /* Handle broadcast protocol message */
503 if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
504 if (msg_type(msg) != STATE_MSG)
505 goto unlock;
506 if (msg_destnode(msg) == tn->own_addr) {
507 tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
508 tipc_bclink_lock(net);
509 bcl->stats.recv_nacks++;
510 tn->bclink->retransmit_to = node;
511 bclink_retransmit_pkt(tn, msg_bcgap_after(msg),
512 msg_bcgap_to(msg));
513 tipc_bclink_unlock(net);
514 tipc_node_unlock(node);
515 } else {
516 tipc_node_unlock(node);
517 bclink_peek_nack(net, msg);
518 }
519 tipc_node_put(node);
520 goto exit;
521 }
522
523 /* Handle in-sequence broadcast message */
524 seqno = msg_seqno(msg);
525 next_in = mod(node->bclink.last_in + 1);
526 arrvq = &tn->bclink->arrvq;
527 inputq = &tn->bclink->inputq;
528
529 if (likely(seqno == next_in)) {
530receive:
531 /* Deliver message to destination */
532 if (likely(msg_isdata(msg))) {
533 tipc_bclink_lock(net);
534 bclink_accept_pkt(node, seqno);
535 spin_lock_bh(&inputq->lock);
536 __skb_queue_tail(arrvq, buf);
537 spin_unlock_bh(&inputq->lock);
538 node->action_flags |= TIPC_BCAST_MSG_EVT;
539 tipc_bclink_unlock(net);
540 tipc_node_unlock(node);
541 } else if (msg_user(msg) == MSG_BUNDLER) {
542 tipc_bclink_lock(net);
543 bclink_accept_pkt(node, seqno);
544 bcl->stats.recv_bundles++;
545 bcl->stats.recv_bundled += msg_msgcnt(msg);
546 pos = 0;
547 while (tipc_msg_extract(buf, &iskb, &pos)) {
548 spin_lock_bh(&inputq->lock);
549 __skb_queue_tail(arrvq, iskb);
550 spin_unlock_bh(&inputq->lock);
551 }
552 node->action_flags |= TIPC_BCAST_MSG_EVT;
553 tipc_bclink_unlock(net);
554 tipc_node_unlock(node);
555 } else if (msg_user(msg) == MSG_FRAGMENTER) {
556 tipc_bclink_lock(net);
557 bclink_accept_pkt(node, seqno);
558 tipc_buf_append(&node->bclink.reasm_buf, &buf);
559 if (unlikely(!buf && !node->bclink.reasm_buf)) {
560 tipc_bclink_unlock(net);
561 goto unlock;
562 }
563 bcl->stats.recv_fragments++;
564 if (buf) {
565 bcl->stats.recv_fragmented++;
566 msg = buf_msg(buf);
567 tipc_bclink_unlock(net);
568 goto receive;
569 }
570 tipc_bclink_unlock(net);
571 tipc_node_unlock(node);
572 } else {
573 tipc_bclink_lock(net);
574 bclink_accept_pkt(node, seqno);
575 tipc_bclink_unlock(net);
576 tipc_node_unlock(node);
577 kfree_skb(buf);
578 }
579 buf = NULL;
580
581 /* Determine new synchronization state */
582 tipc_node_lock(node);
583 if (unlikely(!tipc_node_is_up(node)))
584 goto unlock;
585 222
586 if (node->bclink.last_in == node->bclink.last_sent) 223 __skb_queue_head_init(&xmitq);
587 goto unlock;
588 224
589 if (skb_queue_empty(&node->bclink.deferdq)) { 225 if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) {
590 node->bclink.oos_state = 1; 226 kfree_skb(skb);
591 goto unlock; 227 return 0;
592 }
593
594 msg = buf_msg(skb_peek(&node->bclink.deferdq));
595 seqno = msg_seqno(msg);
596 next_in = mod(next_in + 1);
597 if (seqno != next_in)
598 goto unlock;
599
600 /* Take in-sequence message from deferred queue & deliver it */
601 buf = __skb_dequeue(&node->bclink.deferdq);
602 goto receive;
603 }
604
605 /* Handle out-of-sequence broadcast message */
606 if (less(next_in, seqno)) {
607 deferred = tipc_link_defer_pkt(&node->bclink.deferdq,
608 buf);
609 bclink_update_last_sent(node, seqno);
610 buf = NULL;
611 } 228 }
612 229
613 tipc_bclink_lock(net); 230 tipc_bcast_lock(net);
614 231 if (msg_user(hdr) == BCAST_PROTOCOL)
615 if (deferred) 232 rc = tipc_link_bc_nack_rcv(l, skb, &xmitq);
616 bcl->stats.deferred_recv++;
617 else 233 else
618 bcl->stats.duplicates++; 234 rc = tipc_link_rcv(l, skb, NULL);
235 tipc_bcast_unlock(net);
619 236
620 tipc_bclink_unlock(net); 237 tipc_bcbase_xmit(net, &xmitq);
621 238
622unlock: 239 /* Any socket wakeup messages ? */
623 tipc_node_unlock(node); 240 if (!skb_queue_empty(inputq))
624 tipc_node_put(node); 241 tipc_sk_rcv(net, inputq);
625exit:
626 kfree_skb(buf);
627}
628 242
629u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) 243 return rc;
630{
631 return (n_ptr->bclink.recv_permitted &&
632 (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked));
633} 244}
634 245
635 246/* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge
636/**
637 * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer
638 *
639 * Send packet over as many bearers as necessary to reach all nodes
640 * that have joined the broadcast link.
641 * 247 *
642 * Returns 0 (packet sent successfully) under all circumstances, 248 * RCU is locked, no other locks set
643 * since the broadcast link's pseudo-bearer never blocks
644 */ 249 */
645static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, 250void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked)
646 struct tipc_bearer *unused1,
647 struct tipc_media_addr *unused2)
648{ 251{
649 int bp_index; 252 struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
650 struct tipc_msg *msg = buf_msg(buf); 253 struct sk_buff_head xmitq;
651 struct tipc_net *tn = net_generic(net, tipc_net_id);
652 struct tipc_bcbearer *bcbearer = tn->bcbearer;
653 struct tipc_bclink *bclink = tn->bclink;
654
655 /* Prepare broadcast link message for reliable transmission,
656 * if first time trying to send it;
657 * preparation is skipped for broadcast link protocol messages
658 * since they are sent in an unreliable manner and don't need it
659 */
660 if (likely(!msg_non_seq(buf_msg(buf)))) {
661 bcbuf_set_acks(buf, bclink->bcast_nodes.count);
662 msg_set_non_seq(msg, 1);
663 msg_set_mc_netid(msg, tn->net_id);
664 tn->bcl->stats.sent_info++;
665 if (WARN_ON(!bclink->bcast_nodes.count)) {
666 dump_stack();
667 return 0;
668 }
669 }
670 254
671 /* Send buffer over bearers until all targets reached */ 255 __skb_queue_head_init(&xmitq);
672 bcbearer->remains = bclink->bcast_nodes;
673
674 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
675 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
676 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
677 struct tipc_bearer *bp[2] = {p, s};
678 struct tipc_bearer *b = bp[msg_link_selector(msg)];
679 struct sk_buff *tbuf;
680
681 if (!p)
682 break; /* No more bearers to try */
683 if (!b)
684 b = p;
685 tipc_nmap_diff(&bcbearer->remains, &b->nodes,
686 &bcbearer->remains_new);
687 if (bcbearer->remains_new.count == bcbearer->remains.count)
688 continue; /* Nothing added by bearer pair */
689
690 if (bp_index == 0) {
691 /* Use original buffer for first bearer */
692 tipc_bearer_send(net, b->identity, buf, &b->bcast_addr);
693 } else {
694 /* Avoid concurrent buffer access */
695 tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
696 if (!tbuf)
697 break;
698 tipc_bearer_send(net, b->identity, tbuf,
699 &b->bcast_addr);
700 kfree_skb(tbuf); /* Bearer keeps a clone */
701 }
702 if (bcbearer->remains_new.count == 0)
703 break; /* All targets reached */
704 256
705 bcbearer->remains = bcbearer->remains_new; 257 tipc_bcast_lock(net);
706 } 258 tipc_link_bc_ack_rcv(l, acked, &xmitq);
259 tipc_bcast_unlock(net);
707 260
708 return 0; 261 tipc_bcbase_xmit(net, &xmitq);
262
263 /* Any socket wakeup messages ? */
264 if (!skb_queue_empty(inputq))
265 tipc_sk_rcv(net, inputq);
709} 266}
710 267
711/** 268/* tipc_bcast_synch_rcv - check and update rcv link with peer's send state
712 * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer 269 *
270 * RCU is locked, no other locks set
713 */ 271 */
714void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, 272void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
715 u32 node, bool action) 273 struct tipc_msg *hdr)
716{ 274{
717 struct tipc_net *tn = net_generic(net, tipc_net_id); 275 struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
718 struct tipc_bcbearer *bcbearer = tn->bcbearer; 276 struct sk_buff_head xmitq;
719 struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
720 struct tipc_bcbearer_pair *bp_curr;
721 struct tipc_bearer *b;
722 int b_index;
723 int pri;
724 277
725 tipc_bclink_lock(net); 278 __skb_queue_head_init(&xmitq);
726 279
727 if (action) 280 tipc_bcast_lock(net);
728 tipc_nmap_add(nm_ptr, node); 281 if (msg_type(hdr) == STATE_MSG) {
729 else 282 tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
730 tipc_nmap_remove(nm_ptr, node); 283 tipc_link_bc_sync_rcv(l, hdr, &xmitq);
284 } else {
285 tipc_link_bc_init_rcv(l, hdr);
286 }
287 tipc_bcast_unlock(net);
731 288
732 /* Group bearers by priority (can assume max of two per priority) */ 289 tipc_bcbase_xmit(net, &xmitq);
733 memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
734 290
735 rcu_read_lock(); 291 /* Any socket wakeup messages ? */
736 for (b_index = 0; b_index < MAX_BEARERS; b_index++) { 292 if (!skb_queue_empty(inputq))
737 b = rcu_dereference_rtnl(tn->bearer_list[b_index]); 293 tipc_sk_rcv(net, inputq);
738 if (!b || !b->nodes.count) 294}
739 continue;
740
741 if (!bp_temp[b->priority].primary)
742 bp_temp[b->priority].primary = b;
743 else
744 bp_temp[b->priority].secondary = b;
745 }
746 rcu_read_unlock();
747 295
748 /* Create array of bearer pairs for broadcasting */ 296/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer
749 bp_curr = bcbearer->bpairs; 297 *
750 memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs)); 298 * RCU is locked, node lock is set
299 */
300void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l,
301 struct sk_buff_head *xmitq)
302{
303 struct tipc_link *snd_l = tipc_bc_sndlink(net);
751 304
752 for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) { 305 tipc_bcast_lock(net);
306 tipc_link_add_bc_peer(snd_l, uc_l, xmitq);
307 tipc_bcbase_select_primary(net);
308 tipc_bcast_unlock(net);
309}
753 310
754 if (!bp_temp[pri].primary) 311/* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer
755 continue; 312 *
313 * RCU is locked, node lock is set
314 */
315void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
316{
317 struct tipc_link *snd_l = tipc_bc_sndlink(net);
318 struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
319 struct sk_buff_head xmitq;
756 320
757 bp_curr->primary = bp_temp[pri].primary; 321 __skb_queue_head_init(&xmitq);
758 322
759 if (bp_temp[pri].secondary) { 323 tipc_bcast_lock(net);
760 if (tipc_nmap_equal(&bp_temp[pri].primary->nodes, 324 tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq);
761 &bp_temp[pri].secondary->nodes)) { 325 tipc_bcbase_select_primary(net);
762 bp_curr->secondary = bp_temp[pri].secondary; 326 tipc_bcast_unlock(net);
763 } else {
764 bp_curr++;
765 bp_curr->primary = bp_temp[pri].secondary;
766 }
767 }
768 327
769 bp_curr++; 328 tipc_bcbase_xmit(net, &xmitq);
770 }
771 329
772 tipc_bclink_unlock(net); 330 /* Any socket wakeup messages ? */
331 if (!skb_queue_empty(inputq))
332 tipc_sk_rcv(net, inputq);
773} 333}
774 334
775static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, 335static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb,
@@ -835,7 +395,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
835 if (!bcl) 395 if (!bcl)
836 return 0; 396 return 0;
837 397
838 tipc_bclink_lock(net); 398 tipc_bcast_lock(net);
839 399
840 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, 400 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
841 NLM_F_MULTI, TIPC_NL_LINK_GET); 401 NLM_F_MULTI, TIPC_NL_LINK_GET);
@@ -870,7 +430,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
870 if (err) 430 if (err)
871 goto attr_msg_full; 431 goto attr_msg_full;
872 432
873 tipc_bclink_unlock(net); 433 tipc_bcast_unlock(net);
874 nla_nest_end(msg->skb, attrs); 434 nla_nest_end(msg->skb, attrs);
875 genlmsg_end(msg->skb, hdr); 435 genlmsg_end(msg->skb, hdr);
876 436
@@ -881,7 +441,7 @@ prop_msg_full:
881attr_msg_full: 441attr_msg_full:
882 nla_nest_cancel(msg->skb, attrs); 442 nla_nest_cancel(msg->skb, attrs);
883msg_full: 443msg_full:
884 tipc_bclink_unlock(net); 444 tipc_bcast_unlock(net);
885 genlmsg_cancel(msg->skb, hdr); 445 genlmsg_cancel(msg->skb, hdr);
886 446
887 return -EMSGSIZE; 447 return -EMSGSIZE;
@@ -895,25 +455,25 @@ int tipc_bclink_reset_stats(struct net *net)
895 if (!bcl) 455 if (!bcl)
896 return -ENOPROTOOPT; 456 return -ENOPROTOOPT;
897 457
898 tipc_bclink_lock(net); 458 tipc_bcast_lock(net);
899 memset(&bcl->stats, 0, sizeof(bcl->stats)); 459 memset(&bcl->stats, 0, sizeof(bcl->stats));
900 tipc_bclink_unlock(net); 460 tipc_bcast_unlock(net);
901 return 0; 461 return 0;
902} 462}
903 463
904int tipc_bclink_set_queue_limits(struct net *net, u32 limit) 464static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit)
905{ 465{
906 struct tipc_net *tn = net_generic(net, tipc_net_id); 466 struct tipc_link *l = tipc_bc_sndlink(net);
907 struct tipc_link *bcl = tn->bcl;
908 467
909 if (!bcl) 468 if (!l)
910 return -ENOPROTOOPT; 469 return -ENOPROTOOPT;
911 if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) 470 if (limit < BCLINK_WIN_MIN)
471 limit = BCLINK_WIN_MIN;
472 if (limit > TIPC_MAX_LINK_WIN)
912 return -EINVAL; 473 return -EINVAL;
913 474 tipc_bcast_lock(net);
914 tipc_bclink_lock(net); 475 tipc_link_set_queue_limits(l, limit);
915 tipc_link_set_queue_limits(bcl, limit); 476 tipc_bcast_unlock(net);
916 tipc_bclink_unlock(net);
917 return 0; 477 return 0;
918} 478}
919 479
@@ -935,123 +495,51 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
935 495
936 win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); 496 win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
937 497
938 return tipc_bclink_set_queue_limits(net, win); 498 return tipc_bc_link_set_queue_limits(net, win);
939} 499}
940 500
941int tipc_bclink_init(struct net *net) 501int tipc_bcast_init(struct net *net)
942{ 502{
943 struct tipc_net *tn = net_generic(net, tipc_net_id); 503 struct tipc_net *tn = tipc_net(net);
944 struct tipc_bcbearer *bcbearer; 504 struct tipc_bc_base *bb = NULL;
945 struct tipc_bclink *bclink; 505 struct tipc_link *l = NULL;
946 struct tipc_link *bcl;
947
948 bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
949 if (!bcbearer)
950 return -ENOMEM;
951
952 bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
953 if (!bclink) {
954 kfree(bcbearer);
955 return -ENOMEM;
956 }
957 506
958 bcl = &bclink->link; 507 bb = kzalloc(sizeof(*bb), GFP_ATOMIC);
959 bcbearer->bearer.media = &bcbearer->media; 508 if (!bb)
960 bcbearer->media.send_msg = tipc_bcbearer_send; 509 goto enomem;
961 sprintf(bcbearer->media.name, "tipc-broadcast"); 510 tn->bcbase = bb;
962 511 spin_lock_init(&tipc_net(net)->bclock);
963 spin_lock_init(&bclink->lock);
964 __skb_queue_head_init(&bcl->transmq);
965 __skb_queue_head_init(&bcl->backlogq);
966 __skb_queue_head_init(&bcl->deferdq);
967 skb_queue_head_init(&bcl->wakeupq);
968 bcl->snd_nxt = 1;
969 spin_lock_init(&bclink->node.lock);
970 __skb_queue_head_init(&bclink->arrvq);
971 skb_queue_head_init(&bclink->inputq);
972 bcl->owner = &bclink->node;
973 bcl->owner->net = net;
974 bcl->mtu = MAX_PKT_DEFAULT_MCAST;
975 tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
976 bcl->bearer_id = MAX_BEARERS;
977 rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
978 bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg;
979 msg_set_prevnode(bcl->pmsg, tn->own_addr);
980 strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
981 tn->bcbearer = bcbearer;
982 tn->bclink = bclink;
983 tn->bcl = bcl;
984 return 0;
985}
986 512
987void tipc_bclink_stop(struct net *net) 513 if (!tipc_link_bc_create(net, 0, 0,
988{ 514 U16_MAX,
989 struct tipc_net *tn = net_generic(net, tipc_net_id); 515 BCLINK_WIN_DEFAULT,
990 516 0,
991 tipc_bclink_lock(net); 517 &bb->inputq,
992 tipc_link_purge_queues(tn->bcl); 518 NULL,
993 tipc_bclink_unlock(net); 519 NULL,
994 520 &l))
995 RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); 521 goto enomem;
996 synchronize_net(); 522 bb->link = l;
997 kfree(tn->bcbearer); 523 tn->bcl = l;
998 kfree(tn->bclink); 524 return 0;
525enomem:
526 kfree(bb);
527 kfree(l);
528 return -ENOMEM;
999} 529}
1000 530
1001/** 531void tipc_bcast_reinit(struct net *net)
1002 * tipc_nmap_add - add a node to a node map
1003 */
1004static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
1005{ 532{
1006 int n = tipc_node(node); 533 struct tipc_bc_base *b = tipc_bc_base(net);
1007 int w = n / WSIZE;
1008 u32 mask = (1 << (n % WSIZE));
1009 534
1010 if ((nm_ptr->map[w] & mask) == 0) { 535 msg_set_prevnode(b->link->pmsg, tipc_own_addr(net));
1011 nm_ptr->count++;
1012 nm_ptr->map[w] |= mask;
1013 }
1014} 536}
1015 537
1016/** 538void tipc_bcast_stop(struct net *net)
1017 * tipc_nmap_remove - remove a node from a node map
1018 */
1019static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
1020{ 539{
1021 int n = tipc_node(node); 540 struct tipc_net *tn = net_generic(net, tipc_net_id);
1022 int w = n / WSIZE;
1023 u32 mask = (1 << (n % WSIZE));
1024
1025 if ((nm_ptr->map[w] & mask) != 0) {
1026 nm_ptr->map[w] &= ~mask;
1027 nm_ptr->count--;
1028 }
1029}
1030 541
1031/** 542 synchronize_net();
1032 * tipc_nmap_diff - find differences between node maps 543 kfree(tn->bcbase);
1033 * @nm_a: input node map A 544 kfree(tn->bcl);
1034 * @nm_b: input node map B
1035 * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
1036 */
1037static void tipc_nmap_diff(struct tipc_node_map *nm_a,
1038 struct tipc_node_map *nm_b,
1039 struct tipc_node_map *nm_diff)
1040{
1041 int stop = ARRAY_SIZE(nm_a->map);
1042 int w;
1043 int b;
1044 u32 map;
1045
1046 memset(nm_diff, 0, sizeof(*nm_diff));
1047 for (w = 0; w < stop; w++) {
1048 map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
1049 nm_diff->map[w] = map;
1050 if (map != 0) {
1051 for (b = 0 ; b < WSIZE; b++) {
1052 if (map & (1 << b))
1053 nm_diff->count++;
1054 }
1055 }
1056 }
1057} 545}
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index d74c69bcf60b..2855b9356a15 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -37,102 +37,44 @@
37#ifndef _TIPC_BCAST_H 37#ifndef _TIPC_BCAST_H
38#define _TIPC_BCAST_H 38#define _TIPC_BCAST_H
39 39
40#include <linux/tipc_config.h> 40#include "core.h"
41#include "link.h"
42#include "node.h"
43 41
44/** 42struct tipc_node;
45 * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link 43struct tipc_msg;
46 * @primary: pointer to primary bearer 44struct tipc_nl_msg;
47 * @secondary: pointer to secondary bearer 45struct tipc_node_map;
48 *
49 * Bearers must have same priority and same set of reachable destinations
50 * to be paired.
51 */
52
53struct tipc_bcbearer_pair {
54 struct tipc_bearer *primary;
55 struct tipc_bearer *secondary;
56};
57
58#define BCBEARER MAX_BEARERS
59
60/**
61 * struct tipc_bcbearer - bearer used by broadcast link
62 * @bearer: (non-standard) broadcast bearer structure
63 * @media: (non-standard) broadcast media structure
64 * @bpairs: array of bearer pairs
65 * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort()
66 * @remains: temporary node map used by tipc_bcbearer_send()
67 * @remains_new: temporary node map used tipc_bcbearer_send()
68 *
69 * Note: The fields labelled "temporary" are incorporated into the bearer
70 * to avoid consuming potentially limited stack space through the use of
71 * large local variables within multicast routines. Concurrent access is
72 * prevented through use of the spinlock "bclink_lock".
73 */
74struct tipc_bcbearer {
75 struct tipc_bearer bearer;
76 struct tipc_media media;
77 struct tipc_bcbearer_pair bpairs[MAX_BEARERS];
78 struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
79 struct tipc_node_map remains;
80 struct tipc_node_map remains_new;
81};
82 46
83/** 47int tipc_bcast_init(struct net *net);
84 * struct tipc_bclink - link used for broadcast messages 48void tipc_bcast_reinit(struct net *net);
85 * @lock: spinlock governing access to structure 49void tipc_bcast_stop(struct net *net);
86 * @link: (non-standard) broadcast link structure 50void tipc_bcast_add_peer(struct net *net, struct tipc_link *l,
87 * @node: (non-standard) node structure representing b'cast link's peer node 51 struct sk_buff_head *xmitq);
88 * @bcast_nodes: map of broadcast-capable nodes 52void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl);
89 * @retransmit_to: node that most recently requested a retransmit 53void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
90 * 54void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
91 * Handles sequence numbering, fragmentation, bundling, etc. 55int tipc_bcast_get_mtu(struct net *net);
92 */ 56int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list);
93struct tipc_bclink { 57int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
94 spinlock_t lock; 58void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked);
95 struct tipc_link link; 59void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
96 struct tipc_node node; 60 struct tipc_msg *hdr);
97 struct sk_buff_head arrvq; 61int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
98 struct sk_buff_head inputq; 62int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
99 struct tipc_node_map bcast_nodes; 63int tipc_bclink_reset_stats(struct net *net);
100 struct tipc_node *retransmit_to;
101};
102 64
103struct tipc_node; 65static inline void tipc_bcast_lock(struct net *net)
104extern const char tipc_bclink_name[]; 66{
67 spin_lock_bh(&tipc_net(net)->bclock);
68}
105 69
106/** 70static inline void tipc_bcast_unlock(struct net *net)
107 * tipc_nmap_equal - test for equality of node maps
108 */
109static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
110 struct tipc_node_map *nm_b)
111{ 71{
112 return !memcmp(nm_a, nm_b, sizeof(*nm_a)); 72 spin_unlock_bh(&tipc_net(net)->bclock);
113} 73}
114 74
115int tipc_bclink_init(struct net *net); 75static inline struct tipc_link *tipc_bc_sndlink(struct net *net)
116void tipc_bclink_stop(struct net *net); 76{
117void tipc_bclink_add_node(struct net *net, u32 addr); 77 return tipc_net(net)->bcl;
118void tipc_bclink_remove_node(struct net *net, u32 addr); 78}
119struct tipc_node *tipc_bclink_retransmit_to(struct net *tn);
120void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
121void tipc_bclink_rcv(struct net *net, struct sk_buff *buf);
122u32 tipc_bclink_get_last_sent(struct net *net);
123u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr);
124void tipc_bclink_update_link_state(struct tipc_node *node,
125 u32 last_sent);
126int tipc_bclink_reset_stats(struct net *net);
127int tipc_bclink_set_queue_limits(struct net *net, u32 limit);
128void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
129 u32 node, bool action);
130uint tipc_bclink_get_mtu(void);
131int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
132void tipc_bclink_wakeup_users(struct net *net);
133int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
134int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
135void tipc_bclink_input(struct net *net);
136void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *msg);
137 79
138#endif 80#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index ce9f7bfc0b92..648f2a67f314 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -193,10 +193,8 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
193 193
194 rcu_read_lock(); 194 rcu_read_lock();
195 b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); 195 b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
196 if (b_ptr) { 196 if (b_ptr)
197 tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true);
198 tipc_disc_add_dest(b_ptr->link_req); 197 tipc_disc_add_dest(b_ptr->link_req);
199 }
200 rcu_read_unlock(); 198 rcu_read_unlock();
201} 199}
202 200
@@ -207,10 +205,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
207 205
208 rcu_read_lock(); 206 rcu_read_lock();
209 b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); 207 b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
210 if (b_ptr) { 208 if (b_ptr)
211 tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false);
212 tipc_disc_remove_dest(b_ptr->link_req); 209 tipc_disc_remove_dest(b_ptr->link_req);
213 }
214 rcu_read_unlock(); 210 rcu_read_unlock();
215} 211}
216 212
@@ -362,6 +358,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr)
362 b_ptr->media->disable_media(b_ptr); 358 b_ptr->media->disable_media(b_ptr);
363 359
364 tipc_node_delete_links(net, b_ptr->identity); 360 tipc_node_delete_links(net, b_ptr->identity);
361 RCU_INIT_POINTER(b_ptr->media_ptr, NULL);
365 if (b_ptr->link_req) 362 if (b_ptr->link_req)
366 tipc_disc_delete(b_ptr->link_req); 363 tipc_disc_delete(b_ptr->link_req);
367 364
@@ -399,16 +396,13 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
399 396
400/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface 397/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
401 * 398 *
402 * Mark L2 bearer as inactive so that incoming buffers are thrown away, 399 * Mark L2 bearer as inactive so that incoming buffers are thrown away
403 * then get worker thread to complete bearer cleanup. (Can't do cleanup
404 * here because cleanup code needs to sleep and caller holds spinlocks.)
405 */ 400 */
406void tipc_disable_l2_media(struct tipc_bearer *b) 401void tipc_disable_l2_media(struct tipc_bearer *b)
407{ 402{
408 struct net_device *dev; 403 struct net_device *dev;
409 404
410 dev = (struct net_device *)rtnl_dereference(b->media_ptr); 405 dev = (struct net_device *)rtnl_dereference(b->media_ptr);
411 RCU_INIT_POINTER(b->media_ptr, NULL);
412 RCU_INIT_POINTER(dev->tipc_ptr, NULL); 406 RCU_INIT_POINTER(dev->tipc_ptr, NULL);
413 synchronize_net(); 407 synchronize_net();
414 dev_put(dev); 408 dev_put(dev);
@@ -420,10 +414,9 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
420 * @b_ptr: the bearer through which the packet is to be sent 414 * @b_ptr: the bearer through which the packet is to be sent
421 * @dest: peer destination address 415 * @dest: peer destination address
422 */ 416 */
423int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, 417int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
424 struct tipc_bearer *b, struct tipc_media_addr *dest) 418 struct tipc_bearer *b, struct tipc_media_addr *dest)
425{ 419{
426 struct sk_buff *clone;
427 struct net_device *dev; 420 struct net_device *dev;
428 int delta; 421 int delta;
429 422
@@ -431,42 +424,48 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
431 if (!dev) 424 if (!dev)
432 return 0; 425 return 0;
433 426
434 clone = skb_clone(buf, GFP_ATOMIC); 427 delta = dev->hard_header_len - skb_headroom(skb);
435 if (!clone)
436 return 0;
437
438 delta = dev->hard_header_len - skb_headroom(buf);
439 if ((delta > 0) && 428 if ((delta > 0) &&
440 pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { 429 pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
441 kfree_skb(clone); 430 kfree_skb(skb);
442 return 0; 431 return 0;
443 } 432 }
444 433
445 skb_reset_network_header(clone); 434 skb_reset_network_header(skb);
446 clone->dev = dev; 435 skb->dev = dev;
447 clone->protocol = htons(ETH_P_TIPC); 436 skb->protocol = htons(ETH_P_TIPC);
448 dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, 437 dev_hard_header(skb, dev, ETH_P_TIPC, dest->value,
449 dev->dev_addr, clone->len); 438 dev->dev_addr, skb->len);
450 dev_queue_xmit(clone); 439 dev_queue_xmit(skb);
451 return 0; 440 return 0;
452} 441}
453 442
454/* tipc_bearer_send- sends buffer to destination over bearer 443int tipc_bearer_mtu(struct net *net, u32 bearer_id)
455 * 444{
456 * IMPORTANT: 445 int mtu = 0;
457 * The media send routine must not alter the buffer being passed in 446 struct tipc_bearer *b;
458 * as it may be needed for later retransmission! 447
448 rcu_read_lock();
449 b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]);
450 if (b)
451 mtu = b->mtu;
452 rcu_read_unlock();
453 return mtu;
454}
455
456/* tipc_bearer_xmit_skb - sends buffer to destination over bearer
459 */ 457 */
460void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, 458void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
461 struct tipc_media_addr *dest) 459 struct sk_buff *skb,
460 struct tipc_media_addr *dest)
462{ 461{
463 struct tipc_net *tn = net_generic(net, tipc_net_id); 462 struct tipc_net *tn = tipc_net(net);
464 struct tipc_bearer *b_ptr; 463 struct tipc_bearer *b;
465 464
466 rcu_read_lock(); 465 rcu_read_lock();
467 b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); 466 b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
468 if (likely(b_ptr)) 467 if (likely(b))
469 b_ptr->media->send_msg(net, buf, b_ptr, dest); 468 b->media->send_msg(net, skb, b, dest);
470 rcu_read_unlock(); 469 rcu_read_unlock();
471} 470}
472 471
@@ -489,8 +488,31 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
489 skb_queue_walk_safe(xmitq, skb, tmp) { 488 skb_queue_walk_safe(xmitq, skb, tmp) {
490 __skb_dequeue(xmitq); 489 __skb_dequeue(xmitq);
491 b->media->send_msg(net, skb, b, dst); 490 b->media->send_msg(net, skb, b, dst);
492 /* Until we remove cloning in tipc_l2_send_msg(): */ 491 }
493 kfree_skb(skb); 492 }
493 rcu_read_unlock();
494}
495
496/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations
497 */
498void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
499 struct sk_buff_head *xmitq)
500{
501 struct tipc_net *tn = tipc_net(net);
502 int net_id = tn->net_id;
503 struct tipc_bearer *b;
504 struct sk_buff *skb, *tmp;
505 struct tipc_msg *hdr;
506
507 rcu_read_lock();
508 b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
509 if (likely(b)) {
510 skb_queue_walk_safe(xmitq, skb, tmp) {
511 hdr = buf_msg(skb);
512 msg_set_non_seq(hdr, 1);
513 msg_set_mc_netid(hdr, net_id);
514 __skb_dequeue(xmitq);
515 b->media->send_msg(net, skb, b, &b->bcast_addr);
494 } 516 }
495 } 517 }
496 rcu_read_unlock(); 518 rcu_read_unlock();
@@ -554,7 +576,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
554 case NETDEV_CHANGE: 576 case NETDEV_CHANGE:
555 if (netif_carrier_ok(dev)) 577 if (netif_carrier_ok(dev))
556 break; 578 break;
557 case NETDEV_DOWN: 579 case NETDEV_GOING_DOWN:
558 case NETDEV_CHANGEMTU: 580 case NETDEV_CHANGEMTU:
559 tipc_reset_bearer(net, b_ptr); 581 tipc_reset_bearer(net, b_ptr);
560 break; 582 break;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6426f242f626..552185bc4773 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -163,6 +163,7 @@ struct tipc_bearer {
163 u32 identity; 163 u32 identity;
164 struct tipc_link_req *link_req; 164 struct tipc_link_req *link_req;
165 char net_plane; 165 char net_plane;
166 int node_cnt;
166 struct tipc_node_map nodes; 167 struct tipc_node_map nodes;
167}; 168};
168 169
@@ -215,10 +216,14 @@ struct tipc_media *tipc_media_find(const char *name);
215int tipc_bearer_setup(void); 216int tipc_bearer_setup(void);
216void tipc_bearer_cleanup(void); 217void tipc_bearer_cleanup(void);
217void tipc_bearer_stop(struct net *net); 218void tipc_bearer_stop(struct net *net);
218void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, 219int tipc_bearer_mtu(struct net *net, u32 bearer_id);
219 struct tipc_media_addr *dest); 220void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
221 struct sk_buff *skb,
222 struct tipc_media_addr *dest);
220void tipc_bearer_xmit(struct net *net, u32 bearer_id, 223void tipc_bearer_xmit(struct net *net, u32 bearer_id,
221 struct sk_buff_head *xmitq, 224 struct sk_buff_head *xmitq,
222 struct tipc_media_addr *dst); 225 struct tipc_media_addr *dst);
226void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
227 struct sk_buff_head *xmitq);
223 228
224#endif /* _TIPC_BEARER_H */ 229#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 005ba5eb0ea4..03a842870c52 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -42,6 +42,7 @@
42#include "bearer.h" 42#include "bearer.h"
43#include "net.h" 43#include "net.h"
44#include "socket.h" 44#include "socket.h"
45#include "bcast.h"
45 46
46#include <linux/module.h> 47#include <linux/module.h>
47 48
@@ -71,8 +72,15 @@ static int __net_init tipc_init_net(struct net *net)
71 err = tipc_topsrv_start(net); 72 err = tipc_topsrv_start(net);
72 if (err) 73 if (err)
73 goto out_subscr; 74 goto out_subscr;
75
76 err = tipc_bcast_init(net);
77 if (err)
78 goto out_bclink;
79
74 return 0; 80 return 0;
75 81
82out_bclink:
83 tipc_bcast_stop(net);
76out_subscr: 84out_subscr:
77 tipc_nametbl_stop(net); 85 tipc_nametbl_stop(net);
78out_nametbl: 86out_nametbl:
@@ -85,6 +93,7 @@ static void __net_exit tipc_exit_net(struct net *net)
85{ 93{
86 tipc_topsrv_stop(net); 94 tipc_topsrv_stop(net);
87 tipc_net_stop(net); 95 tipc_net_stop(net);
96 tipc_bcast_stop(net);
88 tipc_nametbl_stop(net); 97 tipc_nametbl_stop(net);
89 tipc_sk_rht_destroy(net); 98 tipc_sk_rht_destroy(net);
90} 99}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index b96b41eabf12..18e95a8020cd 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -62,8 +62,7 @@
62 62
63struct tipc_node; 63struct tipc_node;
64struct tipc_bearer; 64struct tipc_bearer;
65struct tipc_bcbearer; 65struct tipc_bc_base;
66struct tipc_bclink;
67struct tipc_link; 66struct tipc_link;
68struct tipc_name_table; 67struct tipc_name_table;
69struct tipc_server; 68struct tipc_server;
@@ -93,8 +92,8 @@ struct tipc_net {
93 struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; 92 struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
94 93
95 /* Broadcast link */ 94 /* Broadcast link */
96 struct tipc_bcbearer *bcbearer; 95 spinlock_t bclock;
97 struct tipc_bclink *bclink; 96 struct tipc_bc_base *bcbase;
98 struct tipc_link *bcl; 97 struct tipc_link *bcl;
99 98
100 /* Socket hash table */ 99 /* Socket hash table */
@@ -114,6 +113,11 @@ static inline struct tipc_net *tipc_net(struct net *net)
114 return net_generic(net, tipc_net_id); 113 return net_generic(net, tipc_net_id);
115} 114}
116 115
116static inline int tipc_netid(struct net *net)
117{
118 return tipc_net(net)->net_id;
119}
120
117static inline u16 mod(u16 x) 121static inline u16 mod(u16 x)
118{ 122{
119 return x & 0xffffu; 123 return x & 0xffffu;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index d14e0a4aa9af..afe8c47c4085 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -89,7 +89,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
89 MAX_H_SIZE, dest_domain); 89 MAX_H_SIZE, dest_domain);
90 msg_set_non_seq(msg, 1); 90 msg_set_non_seq(msg, 1);
91 msg_set_node_sig(msg, tn->random); 91 msg_set_node_sig(msg, tn->random);
92 msg_set_node_capabilities(msg, 0); 92 msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES);
93 msg_set_dest_domain(msg, dest_domain); 93 msg_set_dest_domain(msg, dest_domain);
94 msg_set_bc_netid(msg, tn->net_id); 94 msg_set_bc_netid(msg, tn->net_id);
95 b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); 95 b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
@@ -167,11 +167,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
167 /* Send response, if necessary */ 167 /* Send response, if necessary */
168 if (respond && (mtyp == DSC_REQ_MSG)) { 168 if (respond && (mtyp == DSC_REQ_MSG)) {
169 rskb = tipc_buf_acquire(MAX_H_SIZE); 169 rskb = tipc_buf_acquire(MAX_H_SIZE);
170 if (rskb) { 170 if (!rskb)
171 tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); 171 return;
172 tipc_bearer_send(net, bearer->identity, rskb, &maddr); 172 tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
173 kfree_skb(rskb); 173 tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr);
174 }
175 } 174 }
176} 175}
177 176
@@ -225,6 +224,7 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
225static void disc_timeout(unsigned long data) 224static void disc_timeout(unsigned long data)
226{ 225{
227 struct tipc_link_req *req = (struct tipc_link_req *)data; 226 struct tipc_link_req *req = (struct tipc_link_req *)data;
227 struct sk_buff *skb;
228 int max_delay; 228 int max_delay;
229 229
230 spin_lock_bh(&req->lock); 230 spin_lock_bh(&req->lock);
@@ -242,9 +242,9 @@ static void disc_timeout(unsigned long data)
242 * hold at fast polling rate if don't have any associated nodes, 242 * hold at fast polling rate if don't have any associated nodes,
243 * otherwise hold at slow polling rate 243 * otherwise hold at slow polling rate
244 */ 244 */
245 tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest); 245 skb = skb_clone(req->buf, GFP_ATOMIC);
246 246 if (skb)
247 247 tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest);
248 req->timer_intv *= 2; 248 req->timer_intv *= 2;
249 if (req->num_nodes) 249 if (req->num_nodes)
250 max_delay = TIPC_LINK_REQ_SLOW; 250 max_delay = TIPC_LINK_REQ_SLOW;
@@ -271,6 +271,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
271 struct tipc_media_addr *dest) 271 struct tipc_media_addr *dest)
272{ 272{
273 struct tipc_link_req *req; 273 struct tipc_link_req *req;
274 struct sk_buff *skb;
274 275
275 req = kmalloc(sizeof(*req), GFP_ATOMIC); 276 req = kmalloc(sizeof(*req), GFP_ATOMIC);
276 if (!req) 277 if (!req)
@@ -292,7 +293,9 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
292 setup_timer(&req->timer, disc_timeout, (unsigned long)req); 293 setup_timer(&req->timer, disc_timeout, (unsigned long)req);
293 mod_timer(&req->timer, jiffies + req->timer_intv); 294 mod_timer(&req->timer, jiffies + req->timer_intv);
294 b_ptr->link_req = req; 295 b_ptr->link_req = req;
295 tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); 296 skb = skb_clone(req->buf, GFP_ATOMIC);
297 if (skb)
298 tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
296 return 0; 299 return 0;
297} 300}
298 301
@@ -316,6 +319,7 @@ void tipc_disc_delete(struct tipc_link_req *req)
316void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) 319void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
317{ 320{
318 struct tipc_link_req *req = b_ptr->link_req; 321 struct tipc_link_req *req = b_ptr->link_req;
322 struct sk_buff *skb;
319 323
320 spin_lock_bh(&req->lock); 324 spin_lock_bh(&req->lock);
321 tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); 325 tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
@@ -325,6 +329,8 @@ void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
325 req->num_nodes = 0; 329 req->num_nodes = 0;
326 req->timer_intv = TIPC_LINK_REQ_INIT; 330 req->timer_intv = TIPC_LINK_REQ_INIT;
327 mod_timer(&req->timer, jiffies + req->timer_intv); 331 mod_timer(&req->timer, jiffies + req->timer_intv);
328 tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); 332 skb = skb_clone(req->buf, GFP_ATOMIC);
333 if (skb)
334 tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
329 spin_unlock_bh(&req->lock); 335 spin_unlock_bh(&req->lock);
330} 336}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 75db07c78a69..9efbdbde2b08 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -50,6 +50,7 @@
50 */ 50 */
51static const char *link_co_err = "Link tunneling error, "; 51static const char *link_co_err = "Link tunneling error, ";
52static const char *link_rst_msg = "Resetting link "; 52static const char *link_rst_msg = "Resetting link ";
53static const char tipc_bclink_name[] = "broadcast-link";
53 54
54static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { 55static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
55 [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, 56 [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
@@ -75,6 +76,14 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
75 [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } 76 [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
76}; 77};
77 78
79/* Send states for broadcast NACKs
80 */
81enum {
82 BC_NACK_SND_CONDITIONAL,
83 BC_NACK_SND_UNCONDITIONAL,
84 BC_NACK_SND_SUPPRESS,
85};
86
78/* 87/*
79 * Interval between NACKs when packets arrive out of order 88 * Interval between NACKs when packets arrive out of order
80 */ 89 */
@@ -110,7 +119,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
110 struct sk_buff_head *xmitq); 119 struct sk_buff_head *xmitq);
111static void link_reset_statistics(struct tipc_link *l_ptr); 120static void link_reset_statistics(struct tipc_link *l_ptr);
112static void link_print(struct tipc_link *l_ptr, const char *str); 121static void link_print(struct tipc_link *l_ptr, const char *str);
113static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); 122static void tipc_link_build_nack_msg(struct tipc_link *l,
123 struct sk_buff_head *xmitq);
124static void tipc_link_build_bc_init_msg(struct tipc_link *l,
125 struct sk_buff_head *xmitq);
126static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
114 127
115/* 128/*
116 * Simple non-static link routines (i.e. referenced outside this file) 129 * Simple non-static link routines (i.e. referenced outside this file)
@@ -120,11 +133,21 @@ bool tipc_link_is_up(struct tipc_link *l)
120 return link_is_up(l); 133 return link_is_up(l);
121} 134}
122 135
136bool tipc_link_peer_is_down(struct tipc_link *l)
137{
138 return l->state == LINK_PEER_RESET;
139}
140
123bool tipc_link_is_reset(struct tipc_link *l) 141bool tipc_link_is_reset(struct tipc_link *l)
124{ 142{
125 return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING); 143 return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING);
126} 144}
127 145
146bool tipc_link_is_establishing(struct tipc_link *l)
147{
148 return l->state == LINK_ESTABLISHING;
149}
150
128bool tipc_link_is_synching(struct tipc_link *l) 151bool tipc_link_is_synching(struct tipc_link *l)
129{ 152{
130 return l->state == LINK_SYNCHING; 153 return l->state == LINK_SYNCHING;
@@ -140,11 +163,66 @@ bool tipc_link_is_blocked(struct tipc_link *l)
140 return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER); 163 return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER);
141} 164}
142 165
166static bool link_is_bc_sndlink(struct tipc_link *l)
167{
168 return !l->bc_sndlink;
169}
170
171static bool link_is_bc_rcvlink(struct tipc_link *l)
172{
173 return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l));
174}
175
143int tipc_link_is_active(struct tipc_link *l) 176int tipc_link_is_active(struct tipc_link *l)
144{ 177{
145 struct tipc_node *n = l->owner; 178 return l->active;
179}
180
181void tipc_link_set_active(struct tipc_link *l, bool active)
182{
183 l->active = active;
184}
185
186void tipc_link_add_bc_peer(struct tipc_link *snd_l,
187 struct tipc_link *uc_l,
188 struct sk_buff_head *xmitq)
189{
190 struct tipc_link *rcv_l = uc_l->bc_rcvlink;
191
192 snd_l->ackers++;
193 rcv_l->acked = snd_l->snd_nxt - 1;
194 tipc_link_build_bc_init_msg(uc_l, xmitq);
195}
196
197void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
198 struct tipc_link *rcv_l,
199 struct sk_buff_head *xmitq)
200{
201 u16 ack = snd_l->snd_nxt - 1;
202
203 snd_l->ackers--;
204 tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
205 tipc_link_reset(rcv_l);
206 rcv_l->state = LINK_RESET;
207 if (!snd_l->ackers) {
208 tipc_link_reset(snd_l);
209 __skb_queue_purge(xmitq);
210 }
211}
212
213int tipc_link_bc_peers(struct tipc_link *l)
214{
215 return l->ackers;
216}
217
218void tipc_link_set_mtu(struct tipc_link *l, int mtu)
219{
220 l->mtu = mtu;
221}
146 222
147 return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l); 223int tipc_link_mtu(struct tipc_link *l)
224{
225 return l->mtu;
148} 226}
149 227
150static u32 link_own_addr(struct tipc_link *l) 228static u32 link_own_addr(struct tipc_link *l)
@@ -155,57 +233,72 @@ static u32 link_own_addr(struct tipc_link *l)
155/** 233/**
156 * tipc_link_create - create a new link 234 * tipc_link_create - create a new link
157 * @n: pointer to associated node 235 * @n: pointer to associated node
158 * @b: pointer to associated bearer 236 * @if_name: associated interface name
237 * @bearer_id: id (index) of associated bearer
238 * @tolerance: link tolerance to be used by link
239 * @net_plane: network plane (A,B,c..) this link belongs to
240 * @mtu: mtu to be advertised by link
241 * @priority: priority to be used by link
242 * @window: send window to be used by link
243 * @session: session to be used by link
159 * @ownnode: identity of own node 244 * @ownnode: identity of own node
160 * @peer: identity of peer node 245 * @peer: node id of peer node
161 * @maddr: media address to be used 246 * @peer_caps: bitmap describing peer node capabilities
247 * @bc_sndlink: the namespace global link used for broadcast sending
248 * @bc_rcvlink: the peer specific link used for broadcast reception
162 * @inputq: queue to put messages ready for delivery 249 * @inputq: queue to put messages ready for delivery
163 * @namedq: queue to put binding table update messages ready for delivery 250 * @namedq: queue to put binding table update messages ready for delivery
164 * @link: return value, pointer to put the created link 251 * @link: return value, pointer to put the created link
165 * 252 *
166 * Returns true if link was created, otherwise false 253 * Returns true if link was created, otherwise false
167 */ 254 */
168bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, 255bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
169 u32 ownnode, u32 peer, struct tipc_media_addr *maddr, 256 int tolerance, char net_plane, u32 mtu, int priority,
170 struct sk_buff_head *inputq, struct sk_buff_head *namedq, 257 int window, u32 session, u32 ownnode, u32 peer,
258 u16 peer_caps,
259 struct tipc_link *bc_sndlink,
260 struct tipc_link *bc_rcvlink,
261 struct sk_buff_head *inputq,
262 struct sk_buff_head *namedq,
171 struct tipc_link **link) 263 struct tipc_link **link)
172{ 264{
173 struct tipc_link *l; 265 struct tipc_link *l;
174 struct tipc_msg *hdr; 266 struct tipc_msg *hdr;
175 char *if_name;
176 267
177 l = kzalloc(sizeof(*l), GFP_ATOMIC); 268 l = kzalloc(sizeof(*l), GFP_ATOMIC);
178 if (!l) 269 if (!l)
179 return false; 270 return false;
180 *link = l; 271 *link = l;
272 l->pmsg = (struct tipc_msg *)&l->proto_msg;
273 hdr = l->pmsg;
274 tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
275 msg_set_size(hdr, sizeof(l->proto_msg));
276 msg_set_session(hdr, session);
277 msg_set_bearer_id(hdr, l->bearer_id);
181 278
182 /* Note: peer i/f name is completed by reset/activate message */ 279 /* Note: peer i/f name is completed by reset/activate message */
183 if_name = strchr(b->name, ':') + 1;
184 sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown", 280 sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
185 tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode), 281 tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
186 if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); 282 if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
283 strcpy((char *)msg_data(hdr), if_name);
187 284
188 l->addr = peer; 285 l->addr = peer;
189 l->media_addr = maddr; 286 l->peer_caps = peer_caps;
190 l->owner = n; 287 l->net = net;
191 l->peer_session = WILDCARD_SESSION; 288 l->peer_session = WILDCARD_SESSION;
192 l->bearer_id = b->identity; 289 l->bearer_id = bearer_id;
193 l->tolerance = b->tolerance; 290 l->tolerance = tolerance;
194 l->net_plane = b->net_plane; 291 l->net_plane = net_plane;
195 l->advertised_mtu = b->mtu; 292 l->advertised_mtu = mtu;
196 l->mtu = b->mtu; 293 l->mtu = mtu;
197 l->priority = b->priority; 294 l->priority = priority;
198 tipc_link_set_queue_limits(l, b->window); 295 tipc_link_set_queue_limits(l, window);
296 l->ackers = 1;
297 l->bc_sndlink = bc_sndlink;
298 l->bc_rcvlink = bc_rcvlink;
199 l->inputq = inputq; 299 l->inputq = inputq;
200 l->namedq = namedq; 300 l->namedq = namedq;
201 l->state = LINK_RESETTING; 301 l->state = LINK_RESETTING;
202 l->pmsg = (struct tipc_msg *)&l->proto_msg;
203 hdr = l->pmsg;
204 tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
205 msg_set_size(hdr, sizeof(l->proto_msg));
206 msg_set_session(hdr, session);
207 msg_set_bearer_id(hdr, l->bearer_id);
208 strcpy((char *)msg_data(hdr), if_name);
209 __skb_queue_head_init(&l->transmq); 302 __skb_queue_head_init(&l->transmq);
210 __skb_queue_head_init(&l->backlogq); 303 __skb_queue_head_init(&l->backlogq);
211 __skb_queue_head_init(&l->deferdq); 304 __skb_queue_head_init(&l->deferdq);
@@ -214,27 +307,43 @@ bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
214 return true; 307 return true;
215} 308}
216 309
217/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints. 310/**
311 * tipc_link_bc_create - create new link to be used for broadcast
312 * @n: pointer to associated node
313 * @mtu: mtu to be used
314 * @window: send window to be used
315 * @inputq: queue to put messages ready for delivery
316 * @namedq: queue to put binding table update messages ready for delivery
317 * @link: return value, pointer to put the created link
218 * 318 *
219 * Give a newly added peer node the sequence number where it should 319 * Returns true if link was created, otherwise false
220 * start receiving and acking broadcast packets.
221 */ 320 */
222void tipc_link_build_bcast_sync_msg(struct tipc_link *l, 321bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
223 struct sk_buff_head *xmitq) 322 int mtu, int window, u16 peer_caps,
323 struct sk_buff_head *inputq,
324 struct sk_buff_head *namedq,
325 struct tipc_link *bc_sndlink,
326 struct tipc_link **link)
224{ 327{
225 struct sk_buff *skb; 328 struct tipc_link *l;
226 struct sk_buff_head list;
227 u16 last_sent;
228 329
229 skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, 330 if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
230 0, l->addr, link_own_addr(l), 0, 0, 0); 331 0, ownnode, peer, peer_caps, bc_sndlink,
231 if (!skb) 332 NULL, inputq, namedq, link))
232 return; 333 return false;
233 last_sent = tipc_bclink_get_last_sent(l->owner->net); 334
234 msg_set_last_bcast(buf_msg(skb), last_sent); 335 l = *link;
235 __skb_queue_head_init(&list); 336 strcpy(l->name, tipc_bclink_name);
236 __skb_queue_tail(&list, skb); 337 tipc_link_reset(l);
237 tipc_link_xmit(l, &list, xmitq); 338 l->state = LINK_RESET;
339 l->ackers = 0;
340 l->bc_rcvlink = l;
341
342 /* Broadcast send link is always up */
343 if (link_is_bc_sndlink(l))
344 l->state = LINK_ESTABLISHED;
345
346 return true;
238} 347}
239 348
240/** 349/**
@@ -321,14 +430,15 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
321 switch (evt) { 430 switch (evt) {
322 case LINK_ESTABLISH_EVT: 431 case LINK_ESTABLISH_EVT:
323 l->state = LINK_ESTABLISHED; 432 l->state = LINK_ESTABLISHED;
324 rc |= TIPC_LINK_UP_EVT;
325 break; 433 break;
326 case LINK_FAILOVER_BEGIN_EVT: 434 case LINK_FAILOVER_BEGIN_EVT:
327 l->state = LINK_FAILINGOVER; 435 l->state = LINK_FAILINGOVER;
328 break; 436 break;
329 case LINK_PEER_RESET_EVT:
330 case LINK_RESET_EVT: 437 case LINK_RESET_EVT:
438 l->state = LINK_RESET;
439 break;
331 case LINK_FAILURE_EVT: 440 case LINK_FAILURE_EVT:
441 case LINK_PEER_RESET_EVT:
332 case LINK_SYNCH_BEGIN_EVT: 442 case LINK_SYNCH_BEGIN_EVT:
333 case LINK_FAILOVER_END_EVT: 443 case LINK_FAILOVER_END_EVT:
334 break; 444 break;
@@ -440,12 +550,17 @@ static void link_profile_stats(struct tipc_link *l)
440 550
441/* tipc_link_timeout - perform periodic task as instructed from node timeout 551/* tipc_link_timeout - perform periodic task as instructed from node timeout
442 */ 552 */
553/* tipc_link_timeout - perform periodic task as instructed from node timeout
554 */
443int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) 555int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
444{ 556{
445 int rc = 0; 557 int rc = 0;
446 int mtyp = STATE_MSG; 558 int mtyp = STATE_MSG;
447 bool xmit = false; 559 bool xmit = false;
448 bool prb = false; 560 bool prb = false;
561 u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
562 u16 bc_acked = l->bc_rcvlink->acked;
563 bool bc_up = link_is_up(l->bc_rcvlink);
449 564
450 link_profile_stats(l); 565 link_profile_stats(l);
451 566
@@ -453,7 +568,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
453 case LINK_ESTABLISHED: 568 case LINK_ESTABLISHED:
454 case LINK_SYNCHING: 569 case LINK_SYNCHING:
455 if (!l->silent_intv_cnt) { 570 if (!l->silent_intv_cnt) {
456 if (tipc_bclink_acks_missing(l->owner)) 571 if (bc_up && (bc_acked != bc_snt))
457 xmit = true; 572 xmit = true;
458 } else if (l->silent_intv_cnt <= l->abort_limit) { 573 } else if (l->silent_intv_cnt <= l->abort_limit) {
459 xmit = true; 574 xmit = true;
@@ -544,42 +659,8 @@ void link_prepare_wakeup(struct tipc_link *l)
544 } 659 }
545} 660}
546 661
547/**
548 * tipc_link_reset_fragments - purge link's inbound message fragments queue
549 * @l_ptr: pointer to link
550 */
551void tipc_link_reset_fragments(struct tipc_link *l_ptr)
552{
553 kfree_skb(l_ptr->reasm_buf);
554 l_ptr->reasm_buf = NULL;
555}
556
557void tipc_link_purge_backlog(struct tipc_link *l)
558{
559 __skb_queue_purge(&l->backlogq);
560 l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
561 l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
562 l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
563 l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
564 l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
565}
566
567/**
568 * tipc_link_purge_queues - purge all pkt queues associated with link
569 * @l_ptr: pointer to link
570 */
571void tipc_link_purge_queues(struct tipc_link *l_ptr)
572{
573 __skb_queue_purge(&l_ptr->deferdq);
574 __skb_queue_purge(&l_ptr->transmq);
575 tipc_link_purge_backlog(l_ptr);
576 tipc_link_reset_fragments(l_ptr);
577}
578
579void tipc_link_reset(struct tipc_link *l) 662void tipc_link_reset(struct tipc_link *l)
580{ 663{
581 tipc_link_fsm_evt(l, LINK_RESET_EVT);
582
583 /* Link is down, accept any session */ 664 /* Link is down, accept any session */
584 l->peer_session = WILDCARD_SESSION; 665 l->peer_session = WILDCARD_SESSION;
585 666
@@ -589,12 +670,16 @@ void tipc_link_reset(struct tipc_link *l)
589 /* Prepare for renewed mtu size negotiation */ 670 /* Prepare for renewed mtu size negotiation */
590 l->mtu = l->advertised_mtu; 671 l->mtu = l->advertised_mtu;
591 672
592 /* Clean up all queues: */ 673 /* Clean up all queues and counters: */
593 __skb_queue_purge(&l->transmq); 674 __skb_queue_purge(&l->transmq);
594 __skb_queue_purge(&l->deferdq); 675 __skb_queue_purge(&l->deferdq);
595 skb_queue_splice_init(&l->wakeupq, l->inputq); 676 skb_queue_splice_init(&l->wakeupq, l->inputq);
596 677 __skb_queue_purge(&l->backlogq);
597 tipc_link_purge_backlog(l); 678 l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
679 l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
680 l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
681 l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
682 l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
598 kfree_skb(l->reasm_buf); 683 kfree_skb(l->reasm_buf);
599 kfree_skb(l->failover_reasm_skb); 684 kfree_skb(l->failover_reasm_skb);
600 l->reasm_buf = NULL; 685 l->reasm_buf = NULL;
@@ -602,81 +687,15 @@ void tipc_link_reset(struct tipc_link *l)
602 l->rcv_unacked = 0; 687 l->rcv_unacked = 0;
603 l->snd_nxt = 1; 688 l->snd_nxt = 1;
604 l->rcv_nxt = 1; 689 l->rcv_nxt = 1;
690 l->acked = 0;
605 l->silent_intv_cnt = 0; 691 l->silent_intv_cnt = 0;
606 l->stats.recv_info = 0; 692 l->stats.recv_info = 0;
607 l->stale_count = 0; 693 l->stale_count = 0;
694 l->bc_peer_is_up = false;
608 link_reset_statistics(l); 695 link_reset_statistics(l);
609} 696}
610 697
611/** 698/**
612 * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked
613 * @link: link to use
614 * @list: chain of buffers containing message
615 *
616 * Consumes the buffer chain, except when returning an error code,
617 * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
618 * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
619 */
620int __tipc_link_xmit(struct net *net, struct tipc_link *link,
621 struct sk_buff_head *list)
622{
623 struct tipc_msg *msg = buf_msg(skb_peek(list));
624 unsigned int maxwin = link->window;
625 unsigned int i, imp = msg_importance(msg);
626 uint mtu = link->mtu;
627 u16 ack = mod(link->rcv_nxt - 1);
628 u16 seqno = link->snd_nxt;
629 u16 bc_last_in = link->owner->bclink.last_in;
630 struct tipc_media_addr *addr = link->media_addr;
631 struct sk_buff_head *transmq = &link->transmq;
632 struct sk_buff_head *backlogq = &link->backlogq;
633 struct sk_buff *skb, *bskb;
634
635 /* Match msg importance against this and all higher backlog limits: */
636 for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
637 if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
638 return link_schedule_user(link, list);
639 }
640 if (unlikely(msg_size(msg) > mtu))
641 return -EMSGSIZE;
642
643 /* Prepare each packet for sending, and add to relevant queue: */
644 while (skb_queue_len(list)) {
645 skb = skb_peek(list);
646 msg = buf_msg(skb);
647 msg_set_seqno(msg, seqno);
648 msg_set_ack(msg, ack);
649 msg_set_bcast_ack(msg, bc_last_in);
650
651 if (likely(skb_queue_len(transmq) < maxwin)) {
652 __skb_dequeue(list);
653 __skb_queue_tail(transmq, skb);
654 tipc_bearer_send(net, link->bearer_id, skb, addr);
655 link->rcv_unacked = 0;
656 seqno++;
657 continue;
658 }
659 if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) {
660 kfree_skb(__skb_dequeue(list));
661 link->stats.sent_bundled++;
662 continue;
663 }
664 if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) {
665 kfree_skb(__skb_dequeue(list));
666 __skb_queue_tail(backlogq, bskb);
667 link->backlog[msg_importance(buf_msg(bskb))].len++;
668 link->stats.sent_bundled++;
669 link->stats.sent_bundles++;
670 continue;
671 }
672 link->backlog[imp].len += skb_queue_len(list);
673 skb_queue_splice_tail_init(list, backlogq);
674 }
675 link->snd_nxt = seqno;
676 return 0;
677}
678
679/**
680 * tipc_link_xmit(): enqueue buffer list according to queue situation 699 * tipc_link_xmit(): enqueue buffer list according to queue situation
681 * @link: link to use 700 * @link: link to use
682 * @list: chain of buffers containing message 701 * @list: chain of buffers containing message
@@ -696,7 +715,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
696 unsigned int mtu = l->mtu; 715 unsigned int mtu = l->mtu;
697 u16 ack = l->rcv_nxt - 1; 716 u16 ack = l->rcv_nxt - 1;
698 u16 seqno = l->snd_nxt; 717 u16 seqno = l->snd_nxt;
699 u16 bc_last_in = l->owner->bclink.last_in; 718 u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
700 struct sk_buff_head *transmq = &l->transmq; 719 struct sk_buff_head *transmq = &l->transmq;
701 struct sk_buff_head *backlogq = &l->backlogq; 720 struct sk_buff_head *backlogq = &l->backlogq;
702 struct sk_buff *skb, *_skb, *bskb; 721 struct sk_buff *skb, *_skb, *bskb;
@@ -715,7 +734,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
715 hdr = buf_msg(skb); 734 hdr = buf_msg(skb);
716 msg_set_seqno(hdr, seqno); 735 msg_set_seqno(hdr, seqno);
717 msg_set_ack(hdr, ack); 736 msg_set_ack(hdr, ack);
718 msg_set_bcast_ack(hdr, bc_last_in); 737 msg_set_bcast_ack(hdr, bc_ack);
719 738
720 if (likely(skb_queue_len(transmq) < maxwin)) { 739 if (likely(skb_queue_len(transmq) < maxwin)) {
721 _skb = skb_clone(skb, GFP_ATOMIC); 740 _skb = skb_clone(skb, GFP_ATOMIC);
@@ -724,6 +743,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
724 __skb_dequeue(list); 743 __skb_dequeue(list);
725 __skb_queue_tail(transmq, skb); 744 __skb_queue_tail(transmq, skb);
726 __skb_queue_tail(xmitq, _skb); 745 __skb_queue_tail(xmitq, _skb);
746 TIPC_SKB_CB(skb)->ackers = l->ackers;
727 l->rcv_unacked = 0; 747 l->rcv_unacked = 0;
728 seqno++; 748 seqno++;
729 continue; 749 continue;
@@ -748,62 +768,13 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
748 return 0; 768 return 0;
749} 769}
750 770
751/*
752 * tipc_link_sync_rcv - synchronize broadcast link endpoints.
753 * Receive the sequence number where we should start receiving and
754 * acking broadcast packets from a newly added peer node, and open
755 * up for reception of such packets.
756 *
757 * Called with node locked
758 */
759static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf)
760{
761 struct tipc_msg *msg = buf_msg(buf);
762
763 n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg);
764 n->bclink.recv_permitted = true;
765 kfree_skb(buf);
766}
767
768/*
769 * tipc_link_push_packets - push unsent packets to bearer
770 *
771 * Push out the unsent messages of a link where congestion
772 * has abated. Node is locked.
773 *
774 * Called with node locked
775 */
776void tipc_link_push_packets(struct tipc_link *link)
777{
778 struct sk_buff *skb;
779 struct tipc_msg *msg;
780 u16 seqno = link->snd_nxt;
781 u16 ack = mod(link->rcv_nxt - 1);
782
783 while (skb_queue_len(&link->transmq) < link->window) {
784 skb = __skb_dequeue(&link->backlogq);
785 if (!skb)
786 break;
787 msg = buf_msg(skb);
788 link->backlog[msg_importance(msg)].len--;
789 msg_set_ack(msg, ack);
790 msg_set_seqno(msg, seqno);
791 seqno = mod(seqno + 1);
792 msg_set_bcast_ack(msg, link->owner->bclink.last_in);
793 link->rcv_unacked = 0;
794 __skb_queue_tail(&link->transmq, skb);
795 tipc_bearer_send(link->owner->net, link->bearer_id,
796 skb, link->media_addr);
797 }
798 link->snd_nxt = seqno;
799}
800
801void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) 771void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
802{ 772{
803 struct sk_buff *skb, *_skb; 773 struct sk_buff *skb, *_skb;
804 struct tipc_msg *hdr; 774 struct tipc_msg *hdr;
805 u16 seqno = l->snd_nxt; 775 u16 seqno = l->snd_nxt;
806 u16 ack = l->rcv_nxt - 1; 776 u16 ack = l->rcv_nxt - 1;
777 u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
807 778
808 while (skb_queue_len(&l->transmq) < l->window) { 779 while (skb_queue_len(&l->transmq) < l->window) {
809 skb = skb_peek(&l->backlogq); 780 skb = skb_peek(&l->backlogq);
@@ -817,96 +788,35 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
817 l->backlog[msg_importance(hdr)].len--; 788 l->backlog[msg_importance(hdr)].len--;
818 __skb_queue_tail(&l->transmq, skb); 789 __skb_queue_tail(&l->transmq, skb);
819 __skb_queue_tail(xmitq, _skb); 790 __skb_queue_tail(xmitq, _skb);
820 msg_set_ack(hdr, ack); 791 TIPC_SKB_CB(skb)->ackers = l->ackers;
821 msg_set_seqno(hdr, seqno); 792 msg_set_seqno(hdr, seqno);
822 msg_set_bcast_ack(hdr, l->owner->bclink.last_in); 793 msg_set_ack(hdr, ack);
794 msg_set_bcast_ack(hdr, bc_ack);
823 l->rcv_unacked = 0; 795 l->rcv_unacked = 0;
824 seqno++; 796 seqno++;
825 } 797 }
826 l->snd_nxt = seqno; 798 l->snd_nxt = seqno;
827} 799}
828 800
829static void link_retransmit_failure(struct tipc_link *l_ptr, 801static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
830 struct sk_buff *buf)
831{
832 struct tipc_msg *msg = buf_msg(buf);
833 struct net *net = l_ptr->owner->net;
834
835 pr_warn("Retransmission failure on link <%s>\n", l_ptr->name);
836
837 if (l_ptr->addr) {
838 /* Handle failure on standard link */
839 link_print(l_ptr, "Resetting link ");
840 pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
841 msg_user(msg), msg_type(msg), msg_size(msg),
842 msg_errcode(msg));
843 pr_info("sqno %u, prev: %x, src: %x\n",
844 msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg));
845 } else {
846 /* Handle failure on broadcast link */
847 struct tipc_node *n_ptr;
848 char addr_string[16];
849
850 pr_info("Msg seq number: %u, ", msg_seqno(msg));
851 pr_cont("Outstanding acks: %lu\n",
852 (unsigned long) TIPC_SKB_CB(buf)->handle);
853
854 n_ptr = tipc_bclink_retransmit_to(net);
855
856 tipc_addr_string_fill(addr_string, n_ptr->addr);
857 pr_info("Broadcast link info for %s\n", addr_string);
858 pr_info("Reception permitted: %d, Acked: %u\n",
859 n_ptr->bclink.recv_permitted,
860 n_ptr->bclink.acked);
861 pr_info("Last in: %u, Oos state: %u, Last sent: %u\n",
862 n_ptr->bclink.last_in,
863 n_ptr->bclink.oos_state,
864 n_ptr->bclink.last_sent);
865
866 n_ptr->action_flags |= TIPC_BCAST_RESET;
867 l_ptr->stale_count = 0;
868 }
869}
870
871void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
872 u32 retransmits)
873{ 802{
874 struct tipc_msg *msg; 803 struct tipc_msg *hdr = buf_msg(skb);
875
876 if (!skb)
877 return;
878
879 msg = buf_msg(skb);
880
881 /* Detect repeated retransmit failures */
882 if (l_ptr->last_retransm == msg_seqno(msg)) {
883 if (++l_ptr->stale_count > 100) {
884 link_retransmit_failure(l_ptr, skb);
885 return;
886 }
887 } else {
888 l_ptr->last_retransm = msg_seqno(msg);
889 l_ptr->stale_count = 1;
890 }
891 804
892 skb_queue_walk_from(&l_ptr->transmq, skb) { 805 pr_warn("Retransmission failure on link <%s>\n", l->name);
893 if (!retransmits) 806 link_print(l, "Resetting link ");
894 break; 807 pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
895 msg = buf_msg(skb); 808 msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
896 msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1)); 809 pr_info("sqno %u, prev: %x, src: %x\n",
897 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 810 msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
898 tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb,
899 l_ptr->media_addr);
900 retransmits--;
901 l_ptr->stats.retransmitted++;
902 }
903} 811}
904 812
905static int tipc_link_retransm(struct tipc_link *l, int retransm, 813int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
906 struct sk_buff_head *xmitq) 814 struct sk_buff_head *xmitq)
907{ 815{
908 struct sk_buff *_skb, *skb = skb_peek(&l->transmq); 816 struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
909 struct tipc_msg *hdr; 817 struct tipc_msg *hdr;
818 u16 ack = l->rcv_nxt - 1;
819 u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
910 820
911 if (!skb) 821 if (!skb)
912 return 0; 822 return 0;
@@ -919,19 +829,25 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm,
919 link_retransmit_failure(l, skb); 829 link_retransmit_failure(l, skb);
920 return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); 830 return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
921 } 831 }
832
833 /* Move forward to where retransmission should start */
922 skb_queue_walk(&l->transmq, skb) { 834 skb_queue_walk(&l->transmq, skb) {
923 if (!retransm) 835 if (!less(buf_seqno(skb), from))
924 return 0; 836 break;
837 }
838
839 skb_queue_walk_from(&l->transmq, skb) {
840 if (more(buf_seqno(skb), to))
841 break;
925 hdr = buf_msg(skb); 842 hdr = buf_msg(skb);
926 _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); 843 _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
927 if (!_skb) 844 if (!_skb)
928 return 0; 845 return 0;
929 hdr = buf_msg(_skb); 846 hdr = buf_msg(_skb);
930 msg_set_ack(hdr, l->rcv_nxt - 1); 847 msg_set_ack(hdr, ack);
931 msg_set_bcast_ack(hdr, l->owner->bclink.last_in); 848 msg_set_bcast_ack(hdr, bc_ack);
932 _skb->priority = TC_PRIO_CONTROL; 849 _skb->priority = TC_PRIO_CONTROL;
933 __skb_queue_tail(xmitq, _skb); 850 __skb_queue_tail(xmitq, _skb);
934 retransm--;
935 l->stats.retransmitted++; 851 l->stats.retransmitted++;
936 } 852 }
937 return 0; 853 return 0;
@@ -942,22 +858,20 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm,
942 * Consumes buffer if message is of right type 858 * Consumes buffer if message is of right type
943 * Node lock must be held 859 * Node lock must be held
944 */ 860 */
945static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, 861static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
946 struct sk_buff_head *inputq) 862 struct sk_buff_head *inputq)
947{ 863{
948 struct tipc_node *node = link->owner;
949
950 switch (msg_user(buf_msg(skb))) { 864 switch (msg_user(buf_msg(skb))) {
951 case TIPC_LOW_IMPORTANCE: 865 case TIPC_LOW_IMPORTANCE:
952 case TIPC_MEDIUM_IMPORTANCE: 866 case TIPC_MEDIUM_IMPORTANCE:
953 case TIPC_HIGH_IMPORTANCE: 867 case TIPC_HIGH_IMPORTANCE:
954 case TIPC_CRITICAL_IMPORTANCE: 868 case TIPC_CRITICAL_IMPORTANCE:
955 case CONN_MANAGER: 869 case CONN_MANAGER:
956 __skb_queue_tail(inputq, skb); 870 skb_queue_tail(inputq, skb);
957 return true; 871 return true;
958 case NAME_DISTRIBUTOR: 872 case NAME_DISTRIBUTOR:
959 node->bclink.recv_permitted = true; 873 l->bc_rcvlink->state = LINK_ESTABLISHED;
960 skb_queue_tail(link->namedq, skb); 874 skb_queue_tail(l->namedq, skb);
961 return true; 875 return true;
962 case MSG_BUNDLER: 876 case MSG_BUNDLER:
963 case TUNNEL_PROTOCOL: 877 case TUNNEL_PROTOCOL:
@@ -978,10 +892,10 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
978static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, 892static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
979 struct sk_buff_head *inputq) 893 struct sk_buff_head *inputq)
980{ 894{
981 struct tipc_node *node = l->owner;
982 struct tipc_msg *hdr = buf_msg(skb); 895 struct tipc_msg *hdr = buf_msg(skb);
983 struct sk_buff **reasm_skb = &l->reasm_buf; 896 struct sk_buff **reasm_skb = &l->reasm_buf;
984 struct sk_buff *iskb; 897 struct sk_buff *iskb;
898 struct sk_buff_head tmpq;
985 int usr = msg_user(hdr); 899 int usr = msg_user(hdr);
986 int rc = 0; 900 int rc = 0;
987 int pos = 0; 901 int pos = 0;
@@ -1006,23 +920,27 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
1006 } 920 }
1007 921
1008 if (usr == MSG_BUNDLER) { 922 if (usr == MSG_BUNDLER) {
923 skb_queue_head_init(&tmpq);
1009 l->stats.recv_bundles++; 924 l->stats.recv_bundles++;
1010 l->stats.recv_bundled += msg_msgcnt(hdr); 925 l->stats.recv_bundled += msg_msgcnt(hdr);
1011 while (tipc_msg_extract(skb, &iskb, &pos)) 926 while (tipc_msg_extract(skb, &iskb, &pos))
1012 tipc_data_input(l, iskb, inputq); 927 tipc_data_input(l, iskb, &tmpq);
928 tipc_skb_queue_splice_tail(&tmpq, inputq);
1013 return 0; 929 return 0;
1014 } else if (usr == MSG_FRAGMENTER) { 930 } else if (usr == MSG_FRAGMENTER) {
1015 l->stats.recv_fragments++; 931 l->stats.recv_fragments++;
1016 if (tipc_buf_append(reasm_skb, &skb)) { 932 if (tipc_buf_append(reasm_skb, &skb)) {
1017 l->stats.recv_fragmented++; 933 l->stats.recv_fragmented++;
1018 tipc_data_input(l, skb, inputq); 934 tipc_data_input(l, skb, inputq);
1019 } else if (!*reasm_skb) { 935 } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) {
936 pr_warn_ratelimited("Unable to build fragment list\n");
1020 return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); 937 return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
1021 } 938 }
1022 return 0; 939 return 0;
1023 } else if (usr == BCAST_PROTOCOL) { 940 } else if (usr == BCAST_PROTOCOL) {
1024 tipc_link_sync_rcv(node, skb); 941 tipc_bcast_lock(l->net);
1025 return 0; 942 tipc_link_bc_init_rcv(l->bc_rcvlink, hdr);
943 tipc_bcast_unlock(l->net);
1026 } 944 }
1027drop: 945drop:
1028 kfree_skb(skb); 946 kfree_skb(skb);
@@ -1044,49 +962,95 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
1044 return released; 962 return released;
1045} 963}
1046 964
965/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission
966 *
967 * Note that sending of broadcast ack is coordinated among nodes, to reduce
968 * risk of ack storms towards the sender
969 */
970int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
971{
972 if (!l)
973 return 0;
974
975 /* Broadcast ACK must be sent via a unicast link => defer to caller */
976 if (link_is_bc_rcvlink(l)) {
977 if (((l->rcv_nxt ^ link_own_addr(l)) & 0xf) != 0xf)
978 return 0;
979 l->rcv_unacked = 0;
980 return TIPC_LINK_SND_BC_ACK;
981 }
982
983 /* Unicast ACK */
984 l->rcv_unacked = 0;
985 l->stats.sent_acks++;
986 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
987 return 0;
988}
989
990/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message
991 */
992void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
993{
994 int mtyp = RESET_MSG;
995
996 if (l->state == LINK_ESTABLISHING)
997 mtyp = ACTIVATE_MSG;
998
999 tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
1000}
1001
1002/* tipc_link_build_nack_msg: prepare link nack message for transmission
1003 */
1004static void tipc_link_build_nack_msg(struct tipc_link *l,
1005 struct sk_buff_head *xmitq)
1006{
1007 u32 def_cnt = ++l->stats.deferred_recv;
1008
1009 if (link_is_bc_rcvlink(l))
1010 return;
1011
1012 if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
1013 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
1014}
1015
1047/* tipc_link_rcv - process TIPC packets/messages arriving from off-node 1016/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
1048 * @link: the link that should handle the message 1017 * @l: the link that should handle the message
1049 * @skb: TIPC packet 1018 * @skb: TIPC packet
1050 * @xmitq: queue to place packets to be sent after this call 1019 * @xmitq: queue to place packets to be sent after this call
1051 */ 1020 */
1052int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, 1021int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
1053 struct sk_buff_head *xmitq) 1022 struct sk_buff_head *xmitq)
1054{ 1023{
1055 struct sk_buff_head *arrvq = &l->deferdq; 1024 struct sk_buff_head *defq = &l->deferdq;
1056 struct sk_buff_head tmpq;
1057 struct tipc_msg *hdr; 1025 struct tipc_msg *hdr;
1058 u16 seqno, rcv_nxt; 1026 u16 seqno, rcv_nxt, win_lim;
1059 int rc = 0; 1027 int rc = 0;
1060 1028
1061 __skb_queue_head_init(&tmpq); 1029 do {
1062
1063 if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) {
1064 if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV))
1065 tipc_link_build_proto_msg(l, STATE_MSG, 0,
1066 0, 0, 0, xmitq);
1067 return rc;
1068 }
1069
1070 while ((skb = skb_peek(arrvq))) {
1071 hdr = buf_msg(skb); 1030 hdr = buf_msg(skb);
1031 seqno = msg_seqno(hdr);
1032 rcv_nxt = l->rcv_nxt;
1033 win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
1072 1034
1073 /* Verify and update link state */ 1035 /* Verify and update link state */
1074 if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) { 1036 if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
1075 __skb_dequeue(arrvq); 1037 return tipc_link_proto_rcv(l, skb, xmitq);
1076 rc = tipc_link_proto_rcv(l, skb, xmitq);
1077 continue;
1078 }
1079 1038
1080 if (unlikely(!link_is_up(l))) { 1039 if (unlikely(!link_is_up(l))) {
1081 rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); 1040 if (l->state == LINK_ESTABLISHING)
1082 if (!link_is_up(l)) { 1041 rc = TIPC_LINK_UP_EVT;
1083 kfree_skb(__skb_dequeue(arrvq)); 1042 goto drop;
1084 goto exit;
1085 }
1086 } 1043 }
1087 1044
1045 /* Don't send probe at next timeout expiration */
1088 l->silent_intv_cnt = 0; 1046 l->silent_intv_cnt = 0;
1089 1047
1048 /* Drop if outside receive window */
1049 if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
1050 l->stats.duplicates++;
1051 goto drop;
1052 }
1053
1090 /* Forward queues and wake up waiting users */ 1054 /* Forward queues and wake up waiting users */
1091 if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) { 1055 if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
1092 tipc_link_advance_backlog(l, xmitq); 1056 tipc_link_advance_backlog(l, xmitq);
@@ -1094,79 +1058,28 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
1094 link_prepare_wakeup(l); 1058 link_prepare_wakeup(l);
1095 } 1059 }
1096 1060
1097 /* Defer reception if there is a gap in the sequence */ 1061 /* Defer delivery if sequence gap */
1098 seqno = msg_seqno(hdr); 1062 if (unlikely(seqno != rcv_nxt)) {
1099 rcv_nxt = l->rcv_nxt; 1063 __tipc_skb_queue_sorted(defq, seqno, skb);
1100 if (unlikely(less(rcv_nxt, seqno))) { 1064 tipc_link_build_nack_msg(l, xmitq);
1101 l->stats.deferred_recv++; 1065 break;
1102 goto exit;
1103 }
1104
1105 __skb_dequeue(arrvq);
1106
1107 /* Drop if packet already received */
1108 if (unlikely(more(rcv_nxt, seqno))) {
1109 l->stats.duplicates++;
1110 kfree_skb(skb);
1111 goto exit;
1112 } 1066 }
1113 1067
1114 /* Packet can be delivered */ 1068 /* Deliver packet */
1115 l->rcv_nxt++; 1069 l->rcv_nxt++;
1116 l->stats.recv_info++; 1070 l->stats.recv_info++;
1117 if (unlikely(!tipc_data_input(l, skb, &tmpq))) 1071 if (!tipc_data_input(l, skb, l->inputq))
1118 rc = tipc_link_input(l, skb, &tmpq); 1072 rc |= tipc_link_input(l, skb, l->inputq);
1119 1073 if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
1120 /* Ack at regular intervals */ 1074 rc |= tipc_link_build_ack_msg(l, xmitq);
1121 if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) { 1075 if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK))
1122 l->rcv_unacked = 0;
1123 l->stats.sent_acks++;
1124 tipc_link_build_proto_msg(l, STATE_MSG,
1125 0, 0, 0, 0, xmitq);
1126 }
1127 }
1128exit:
1129 tipc_skb_queue_splice_tail(&tmpq, l->inputq);
1130 return rc;
1131}
1132
1133/**
1134 * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue
1135 *
1136 * Returns increase in queue length (i.e. 0 or 1)
1137 */
1138u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
1139{
1140 struct sk_buff *skb1;
1141 u16 seq_no = buf_seqno(skb);
1142
1143 /* Empty queue ? */
1144 if (skb_queue_empty(list)) {
1145 __skb_queue_tail(list, skb);
1146 return 1;
1147 }
1148
1149 /* Last ? */
1150 if (less(buf_seqno(skb_peek_tail(list)), seq_no)) {
1151 __skb_queue_tail(list, skb);
1152 return 1;
1153 }
1154
1155 /* Locate insertion point in queue, then insert; discard if duplicate */
1156 skb_queue_walk(list, skb1) {
1157 u16 curr_seqno = buf_seqno(skb1);
1158
1159 if (seq_no == curr_seqno) {
1160 kfree_skb(skb);
1161 return 0;
1162 }
1163
1164 if (less(seq_no, curr_seqno))
1165 break; 1076 break;
1166 } 1077 } while ((skb = __skb_dequeue(defq)));
1167 1078
1168 __skb_queue_before(list, skb1, skb); 1079 return rc;
1169 return 1; 1080drop:
1081 kfree_skb(skb);
1082 return rc;
1170} 1083}
1171 1084
1172/* 1085/*
@@ -1184,23 +1097,17 @@ void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg,
1184 skb = __skb_dequeue(&xmitq); 1097 skb = __skb_dequeue(&xmitq);
1185 if (!skb) 1098 if (!skb)
1186 return; 1099 return;
1187 tipc_bearer_send(l->owner->net, l->bearer_id, skb, l->media_addr); 1100 tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr);
1188 l->rcv_unacked = 0; 1101 l->rcv_unacked = 0;
1189 kfree_skb(skb);
1190} 1102}
1191 1103
1192/* tipc_link_build_proto_msg: prepare link protocol message for transmission
1193 */
1194static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, 1104static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1195 u16 rcvgap, int tolerance, int priority, 1105 u16 rcvgap, int tolerance, int priority,
1196 struct sk_buff_head *xmitq) 1106 struct sk_buff_head *xmitq)
1197{ 1107{
1198 struct sk_buff *skb = NULL; 1108 struct sk_buff *skb = NULL;
1199 struct tipc_msg *hdr = l->pmsg; 1109 struct tipc_msg *hdr = l->pmsg;
1200 u16 snd_nxt = l->snd_nxt; 1110 bool node_up = link_is_up(l->bc_rcvlink);
1201 u16 rcv_nxt = l->rcv_nxt;
1202 u16 rcv_last = rcv_nxt - 1;
1203 int node_up = l->owner->bclink.recv_permitted;
1204 1111
1205 /* Don't send protocol message during reset or link failover */ 1112 /* Don't send protocol message during reset or link failover */
1206 if (tipc_link_is_blocked(l)) 1113 if (tipc_link_is_blocked(l))
@@ -1208,33 +1115,34 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1208 1115
1209 msg_set_type(hdr, mtyp); 1116 msg_set_type(hdr, mtyp);
1210 msg_set_net_plane(hdr, l->net_plane); 1117 msg_set_net_plane(hdr, l->net_plane);
1211 msg_set_bcast_ack(hdr, l->owner->bclink.last_in); 1118 msg_set_next_sent(hdr, l->snd_nxt);
1212 msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net)); 1119 msg_set_ack(hdr, l->rcv_nxt - 1);
1120 msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1);
1121 msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
1213 msg_set_link_tolerance(hdr, tolerance); 1122 msg_set_link_tolerance(hdr, tolerance);
1214 msg_set_linkprio(hdr, priority); 1123 msg_set_linkprio(hdr, priority);
1215 msg_set_redundant_link(hdr, node_up); 1124 msg_set_redundant_link(hdr, node_up);
1216 msg_set_seq_gap(hdr, 0); 1125 msg_set_seq_gap(hdr, 0);
1217 1126
1218 /* Compatibility: created msg must not be in sequence with pkt flow */ 1127 /* Compatibility: created msg must not be in sequence with pkt flow */
1219 msg_set_seqno(hdr, snd_nxt + U16_MAX / 2); 1128 msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
1220 1129
1221 if (mtyp == STATE_MSG) { 1130 if (mtyp == STATE_MSG) {
1222 if (!tipc_link_is_up(l)) 1131 if (!tipc_link_is_up(l))
1223 return; 1132 return;
1224 msg_set_next_sent(hdr, snd_nxt);
1225 1133
1226 /* Override rcvgap if there are packets in deferred queue */ 1134 /* Override rcvgap if there are packets in deferred queue */
1227 if (!skb_queue_empty(&l->deferdq)) 1135 if (!skb_queue_empty(&l->deferdq))
1228 rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt; 1136 rcvgap = buf_seqno(skb_peek(&l->deferdq)) - l->rcv_nxt;
1229 if (rcvgap) { 1137 if (rcvgap) {
1230 msg_set_seq_gap(hdr, rcvgap); 1138 msg_set_seq_gap(hdr, rcvgap);
1231 l->stats.sent_nacks++; 1139 l->stats.sent_nacks++;
1232 } 1140 }
1233 msg_set_ack(hdr, rcv_last);
1234 msg_set_probe(hdr, probe); 1141 msg_set_probe(hdr, probe);
1235 if (probe) 1142 if (probe)
1236 l->stats.sent_probes++; 1143 l->stats.sent_probes++;
1237 l->stats.sent_states++; 1144 l->stats.sent_states++;
1145 l->rcv_unacked = 0;
1238 } else { 1146 } else {
1239 /* RESET_MSG or ACTIVATE_MSG */ 1147 /* RESET_MSG or ACTIVATE_MSG */
1240 msg_set_max_pkt(hdr, l->advertised_mtu); 1148 msg_set_max_pkt(hdr, l->advertised_mtu);
@@ -1250,7 +1158,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1250} 1158}
1251 1159
1252/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets 1160/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
1253 * with contents of the link's tranmsit and backlog queues. 1161 * with contents of the link's transmit and backlog queues.
1254 */ 1162 */
1255void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, 1163void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
1256 int mtyp, struct sk_buff_head *xmitq) 1164 int mtyp, struct sk_buff_head *xmitq)
@@ -1326,21 +1234,23 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1326{ 1234{
1327 struct tipc_msg *hdr = buf_msg(skb); 1235 struct tipc_msg *hdr = buf_msg(skb);
1328 u16 rcvgap = 0; 1236 u16 rcvgap = 0;
1329 u16 nacked_gap = msg_seq_gap(hdr); 1237 u16 ack = msg_ack(hdr);
1238 u16 gap = msg_seq_gap(hdr);
1330 u16 peers_snd_nxt = msg_next_sent(hdr); 1239 u16 peers_snd_nxt = msg_next_sent(hdr);
1331 u16 peers_tol = msg_link_tolerance(hdr); 1240 u16 peers_tol = msg_link_tolerance(hdr);
1332 u16 peers_prio = msg_linkprio(hdr); 1241 u16 peers_prio = msg_linkprio(hdr);
1333 u16 rcv_nxt = l->rcv_nxt; 1242 u16 rcv_nxt = l->rcv_nxt;
1243 int mtyp = msg_type(hdr);
1334 char *if_name; 1244 char *if_name;
1335 int rc = 0; 1245 int rc = 0;
1336 1246
1337 if (tipc_link_is_blocked(l)) 1247 if (tipc_link_is_blocked(l) || !xmitq)
1338 goto exit; 1248 goto exit;
1339 1249
1340 if (link_own_addr(l) > msg_prevnode(hdr)) 1250 if (link_own_addr(l) > msg_prevnode(hdr))
1341 l->net_plane = msg_net_plane(hdr); 1251 l->net_plane = msg_net_plane(hdr);
1342 1252
1343 switch (msg_type(hdr)) { 1253 switch (mtyp) {
1344 case RESET_MSG: 1254 case RESET_MSG:
1345 1255
1346 /* Ignore duplicate RESET with old session number */ 1256 /* Ignore duplicate RESET with old session number */
@@ -1367,12 +1277,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1367 if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) 1277 if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
1368 l->priority = peers_prio; 1278 l->priority = peers_prio;
1369 1279
1370 if (msg_type(hdr) == RESET_MSG) { 1280 /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
1371 rc |= tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); 1281 if ((mtyp == RESET_MSG) || !link_is_up(l))
1372 } else if (!link_is_up(l)) { 1282 rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
1373 tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); 1283
1374 rc |= tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); 1284 /* ACTIVATE_MSG takes up link if it was already locally reset */
1375 } 1285 if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
1286 rc = TIPC_LINK_UP_EVT;
1287
1376 l->peer_session = msg_session(hdr); 1288 l->peer_session = msg_session(hdr);
1377 l->peer_bearer_id = msg_bearer_id(hdr); 1289 l->peer_bearer_id = msg_bearer_id(hdr);
1378 if (l->mtu > msg_max_pkt(hdr)) 1290 if (l->mtu > msg_max_pkt(hdr))
@@ -1389,9 +1301,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1389 l->stats.recv_states++; 1301 l->stats.recv_states++;
1390 if (msg_probe(hdr)) 1302 if (msg_probe(hdr))
1391 l->stats.recv_probes++; 1303 l->stats.recv_probes++;
1392 rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); 1304
1393 if (!link_is_up(l)) 1305 if (!link_is_up(l)) {
1306 if (l->state == LINK_ESTABLISHING)
1307 rc = TIPC_LINK_UP_EVT;
1394 break; 1308 break;
1309 }
1395 1310
1396 /* Send NACK if peer has sent pkts we haven't received yet */ 1311 /* Send NACK if peer has sent pkts we haven't received yet */
1397 if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) 1312 if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
@@ -1399,11 +1314,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1399 if (rcvgap || (msg_probe(hdr))) 1314 if (rcvgap || (msg_probe(hdr)))
1400 tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, 1315 tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
1401 0, 0, xmitq); 1316 0, 0, xmitq);
1402 tipc_link_release_pkts(l, msg_ack(hdr)); 1317 tipc_link_release_pkts(l, ack);
1403 1318
1404 /* If NACK, retransmit will now start at right position */ 1319 /* If NACK, retransmit will now start at right position */
1405 if (nacked_gap) { 1320 if (gap) {
1406 rc = tipc_link_retransm(l, nacked_gap, xmitq); 1321 rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq);
1407 l->stats.recv_nacks++; 1322 l->stats.recv_nacks++;
1408 } 1323 }
1409 1324
@@ -1416,6 +1331,188 @@ exit:
1416 return rc; 1331 return rc;
1417} 1332}
1418 1333
1334/* tipc_link_build_bc_proto_msg() - create broadcast protocol message
1335 */
1336static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast,
1337 u16 peers_snd_nxt,
1338 struct sk_buff_head *xmitq)
1339{
1340 struct sk_buff *skb;
1341 struct tipc_msg *hdr;
1342 struct sk_buff *dfrd_skb = skb_peek(&l->deferdq);
1343 u16 ack = l->rcv_nxt - 1;
1344 u16 gap_to = peers_snd_nxt - 1;
1345
1346 skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
1347 0, l->addr, link_own_addr(l), 0, 0, 0);
1348 if (!skb)
1349 return false;
1350 hdr = buf_msg(skb);
1351 msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
1352 msg_set_bcast_ack(hdr, ack);
1353 msg_set_bcgap_after(hdr, ack);
1354 if (dfrd_skb)
1355 gap_to = buf_seqno(dfrd_skb) - 1;
1356 msg_set_bcgap_to(hdr, gap_to);
1357 msg_set_non_seq(hdr, bcast);
1358 __skb_queue_tail(xmitq, skb);
1359 return true;
1360}
1361
1362/* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints.
1363 *
1364 * Give a newly added peer node the sequence number where it should
1365 * start receiving and acking broadcast packets.
1366 */
1367static void tipc_link_build_bc_init_msg(struct tipc_link *l,
1368 struct sk_buff_head *xmitq)
1369{
1370 struct sk_buff_head list;
1371
1372 __skb_queue_head_init(&list);
1373 if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list))
1374 return;
1375 tipc_link_xmit(l, &list, xmitq);
1376}
1377
1378/* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer
1379 */
1380void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
1381{
1382 int mtyp = msg_type(hdr);
1383 u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
1384
1385 if (link_is_up(l))
1386 return;
1387
1388 if (msg_user(hdr) == BCAST_PROTOCOL) {
1389 l->rcv_nxt = peers_snd_nxt;
1390 l->state = LINK_ESTABLISHED;
1391 return;
1392 }
1393
1394 if (l->peer_caps & TIPC_BCAST_SYNCH)
1395 return;
1396
1397 if (msg_peer_node_is_up(hdr))
1398 return;
1399
1400 /* Compatibility: accept older, less safe initial synch data */
1401 if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG))
1402 l->rcv_nxt = peers_snd_nxt;
1403}
1404
1405/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
1406 */
1407void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
1408 struct sk_buff_head *xmitq)
1409{
1410 u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
1411
1412 if (!link_is_up(l))
1413 return;
1414
1415 if (!msg_peer_node_is_up(hdr))
1416 return;
1417
1418 l->bc_peer_is_up = true;
1419
1420 /* Ignore if peers_snd_nxt goes beyond receive window */
1421 if (more(peers_snd_nxt, l->rcv_nxt + l->window))
1422 return;
1423
1424 if (!more(peers_snd_nxt, l->rcv_nxt)) {
1425 l->nack_state = BC_NACK_SND_CONDITIONAL;
1426 return;
1427 }
1428
1429 /* Don't NACK if one was recently sent or peeked */
1430 if (l->nack_state == BC_NACK_SND_SUPPRESS) {
1431 l->nack_state = BC_NACK_SND_UNCONDITIONAL;
1432 return;
1433 }
1434
1435 /* Conditionally delay NACK sending until next synch rcv */
1436 if (l->nack_state == BC_NACK_SND_CONDITIONAL) {
1437 l->nack_state = BC_NACK_SND_UNCONDITIONAL;
1438 if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN)
1439 return;
1440 }
1441
1442 /* Send NACK now but suppress next one */
1443 tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq);
1444 l->nack_state = BC_NACK_SND_SUPPRESS;
1445}
1446
1447void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
1448 struct sk_buff_head *xmitq)
1449{
1450 struct sk_buff *skb, *tmp;
1451 struct tipc_link *snd_l = l->bc_sndlink;
1452
1453 if (!link_is_up(l) || !l->bc_peer_is_up)
1454 return;
1455
1456 if (!more(acked, l->acked))
1457 return;
1458
1459 /* Skip over packets peer has already acked */
1460 skb_queue_walk(&snd_l->transmq, skb) {
1461 if (more(buf_seqno(skb), l->acked))
1462 break;
1463 }
1464
1465 /* Update/release the packets peer is acking now */
1466 skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) {
1467 if (more(buf_seqno(skb), acked))
1468 break;
1469 if (!--TIPC_SKB_CB(skb)->ackers) {
1470 __skb_unlink(skb, &snd_l->transmq);
1471 kfree_skb(skb);
1472 }
1473 }
1474 l->acked = acked;
1475 tipc_link_advance_backlog(snd_l, xmitq);
1476 if (unlikely(!skb_queue_empty(&snd_l->wakeupq)))
1477 link_prepare_wakeup(snd_l);
1478}
1479
1480/* tipc_link_bc_nack_rcv(): receive broadcast nack message
1481 */
1482int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
1483 struct sk_buff_head *xmitq)
1484{
1485 struct tipc_msg *hdr = buf_msg(skb);
1486 u32 dnode = msg_destnode(hdr);
1487 int mtyp = msg_type(hdr);
1488 u16 acked = msg_bcast_ack(hdr);
1489 u16 from = acked + 1;
1490 u16 to = msg_bcgap_to(hdr);
1491 u16 peers_snd_nxt = to + 1;
1492 int rc = 0;
1493
1494 kfree_skb(skb);
1495
1496 if (!tipc_link_is_up(l) || !l->bc_peer_is_up)
1497 return 0;
1498
1499 if (mtyp != STATE_MSG)
1500 return 0;
1501
1502 if (dnode == link_own_addr(l)) {
1503 tipc_link_bc_ack_rcv(l, acked, xmitq);
1504 rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
1505 l->stats.recv_nacks++;
1506 return rc;
1507 }
1508
1509 /* Msg for other node => suppress own NACK at next sync if applicable */
1510 if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from))
1511 l->nack_state = BC_NACK_SND_SUPPRESS;
1512
1513 return 0;
1514}
1515
1419void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) 1516void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
1420{ 1517{
1421 int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); 1518 int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
@@ -1480,7 +1577,7 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
1480static void link_print(struct tipc_link *l, const char *str) 1577static void link_print(struct tipc_link *l, const char *str)
1481{ 1578{
1482 struct sk_buff *hskb = skb_peek(&l->transmq); 1579 struct sk_buff *hskb = skb_peek(&l->transmq);
1483 u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt; 1580 u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1;
1484 u16 tail = l->snd_nxt - 1; 1581 u16 tail = l->snd_nxt - 1;
1485 1582
1486 pr_info("%s Link <%s> state %x\n", str, l->name, l->state); 1583 pr_info("%s Link <%s> state %x\n", str, l->name, l->state);
@@ -1704,7 +1801,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
1704 if (tipc_link_is_up(link)) 1801 if (tipc_link_is_up(link))
1705 if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) 1802 if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP))
1706 goto attr_msg_full; 1803 goto attr_msg_full;
1707 if (tipc_link_is_active(link)) 1804 if (link->active)
1708 if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) 1805 if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE))
1709 goto attr_msg_full; 1806 goto attr_msg_full;
1710 1807
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 39ff8b6919a4..66d859b66c84 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -66,7 +66,8 @@ enum {
66 */ 66 */
67enum { 67enum {
68 TIPC_LINK_UP_EVT = 1, 68 TIPC_LINK_UP_EVT = 1,
69 TIPC_LINK_DOWN_EVT = (1 << 1) 69 TIPC_LINK_DOWN_EVT = (1 << 1),
70 TIPC_LINK_SND_BC_ACK = (1 << 2)
70}; 71};
71 72
72/* Starting value for maximum packet size negotiation on unicast links 73/* Starting value for maximum packet size negotiation on unicast links
@@ -110,7 +111,7 @@ struct tipc_stats {
110 * @name: link name character string 111 * @name: link name character string
111 * @media_addr: media address to use when sending messages over link 112 * @media_addr: media address to use when sending messages over link
112 * @timer: link timer 113 * @timer: link timer
113 * @owner: pointer to peer node 114 * @net: pointer to namespace struct
114 * @refcnt: reference counter for permanent references (owner node & timer) 115 * @refcnt: reference counter for permanent references (owner node & timer)
115 * @peer_session: link session # being used by peer end of link 116 * @peer_session: link session # being used by peer end of link
116 * @peer_bearer_id: bearer id used by link's peer endpoint 117 * @peer_bearer_id: bearer id used by link's peer endpoint
@@ -119,6 +120,7 @@ struct tipc_stats {
119 * @keepalive_intv: link keepalive timer interval 120 * @keepalive_intv: link keepalive timer interval
120 * @abort_limit: # of unacknowledged continuity probes needed to reset link 121 * @abort_limit: # of unacknowledged continuity probes needed to reset link
121 * @state: current state of link FSM 122 * @state: current state of link FSM
123 * @peer_caps: bitmap describing capabilities of peer node
122 * @silent_intv_cnt: # of timer intervals without any reception from peer 124 * @silent_intv_cnt: # of timer intervals without any reception from peer
123 * @proto_msg: template for control messages generated by link 125 * @proto_msg: template for control messages generated by link
124 * @pmsg: convenience pointer to "proto_msg" field 126 * @pmsg: convenience pointer to "proto_msg" field
@@ -134,6 +136,8 @@ struct tipc_stats {
134 * @snt_nxt: next sequence number to use for outbound messages 136 * @snt_nxt: next sequence number to use for outbound messages
135 * @last_retransmitted: sequence number of most recently retransmitted message 137 * @last_retransmitted: sequence number of most recently retransmitted message
136 * @stale_count: # of identical retransmit requests made by peer 138 * @stale_count: # of identical retransmit requests made by peer
139 * @ackers: # of peers that needs to ack each packet before it can be released
140 * @acked: # last packet acked by a certain peer. Used for broadcast.
137 * @rcv_nxt: next sequence number to expect for inbound messages 141 * @rcv_nxt: next sequence number to expect for inbound messages
138 * @deferred_queue: deferred queue saved OOS b'cast message received from node 142 * @deferred_queue: deferred queue saved OOS b'cast message received from node
139 * @unacked_window: # of inbound messages rx'd without ack'ing back to peer 143 * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
@@ -143,13 +147,14 @@ struct tipc_stats {
143 * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate 147 * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
144 * @long_msg_seq_no: next identifier to use for outbound fragmented messages 148 * @long_msg_seq_no: next identifier to use for outbound fragmented messages
145 * @reasm_buf: head of partially reassembled inbound message fragments 149 * @reasm_buf: head of partially reassembled inbound message fragments
150 * @bc_rcvr: marks that this is a broadcast receiver link
146 * @stats: collects statistics regarding link activity 151 * @stats: collects statistics regarding link activity
147 */ 152 */
148struct tipc_link { 153struct tipc_link {
149 u32 addr; 154 u32 addr;
150 char name[TIPC_MAX_LINK_NAME]; 155 char name[TIPC_MAX_LINK_NAME];
151 struct tipc_media_addr *media_addr; 156 struct tipc_media_addr *media_addr;
152 struct tipc_node *owner; 157 struct net *net;
153 158
154 /* Management and link supervision data */ 159 /* Management and link supervision data */
155 u32 peer_session; 160 u32 peer_session;
@@ -159,6 +164,8 @@ struct tipc_link {
159 unsigned long keepalive_intv; 164 unsigned long keepalive_intv;
160 u32 abort_limit; 165 u32 abort_limit;
161 u32 state; 166 u32 state;
167 u16 peer_caps;
168 bool active;
162 u32 silent_intv_cnt; 169 u32 silent_intv_cnt;
163 struct { 170 struct {
164 unchar hdr[INT_H_SIZE]; 171 unchar hdr[INT_H_SIZE];
@@ -185,7 +192,7 @@ struct tipc_link {
185 } backlog[5]; 192 } backlog[5];
186 u16 snd_nxt; 193 u16 snd_nxt;
187 u16 last_retransm; 194 u16 last_retransm;
188 u32 window; 195 u16 window;
189 u32 stale_count; 196 u32 stale_count;
190 197
191 /* Reception */ 198 /* Reception */
@@ -201,42 +208,50 @@ struct tipc_link {
201 /* Fragmentation/reassembly */ 208 /* Fragmentation/reassembly */
202 struct sk_buff *reasm_buf; 209 struct sk_buff *reasm_buf;
203 210
211 /* Broadcast */
212 u16 ackers;
213 u16 acked;
214 struct tipc_link *bc_rcvlink;
215 struct tipc_link *bc_sndlink;
216 int nack_state;
217 bool bc_peer_is_up;
218
204 /* Statistics */ 219 /* Statistics */
205 struct tipc_stats stats; 220 struct tipc_stats stats;
206}; 221};
207 222
208bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, 223bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
209 u32 ownnode, u32 peer, struct tipc_media_addr *maddr, 224 int tolerance, char net_plane, u32 mtu, int priority,
210 struct sk_buff_head *inputq, struct sk_buff_head *namedq, 225 int window, u32 session, u32 ownnode, u32 peer,
226 u16 peer_caps,
227 struct tipc_link *bc_sndlink,
228 struct tipc_link *bc_rcvlink,
229 struct sk_buff_head *inputq,
230 struct sk_buff_head *namedq,
211 struct tipc_link **link); 231 struct tipc_link **link);
232bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
233 int mtu, int window, u16 peer_caps,
234 struct sk_buff_head *inputq,
235 struct sk_buff_head *namedq,
236 struct tipc_link *bc_sndlink,
237 struct tipc_link **link);
212void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, 238void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
213 int mtyp, struct sk_buff_head *xmitq); 239 int mtyp, struct sk_buff_head *xmitq);
214void tipc_link_build_bcast_sync_msg(struct tipc_link *l, 240void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
215 struct sk_buff_head *xmitq);
216int tipc_link_fsm_evt(struct tipc_link *l, int evt); 241int tipc_link_fsm_evt(struct tipc_link *l, int evt);
217void tipc_link_reset_fragments(struct tipc_link *l_ptr); 242void tipc_link_reset_fragments(struct tipc_link *l_ptr);
218bool tipc_link_is_up(struct tipc_link *l); 243bool tipc_link_is_up(struct tipc_link *l);
244bool tipc_link_peer_is_down(struct tipc_link *l);
219bool tipc_link_is_reset(struct tipc_link *l); 245bool tipc_link_is_reset(struct tipc_link *l);
246bool tipc_link_is_establishing(struct tipc_link *l);
220bool tipc_link_is_synching(struct tipc_link *l); 247bool tipc_link_is_synching(struct tipc_link *l);
221bool tipc_link_is_failingover(struct tipc_link *l); 248bool tipc_link_is_failingover(struct tipc_link *l);
222bool tipc_link_is_blocked(struct tipc_link *l); 249bool tipc_link_is_blocked(struct tipc_link *l);
223int tipc_link_is_active(struct tipc_link *l_ptr); 250void tipc_link_set_active(struct tipc_link *l, bool active);
224void tipc_link_purge_queues(struct tipc_link *l_ptr);
225void tipc_link_purge_backlog(struct tipc_link *l);
226void tipc_link_reset(struct tipc_link *l_ptr); 251void tipc_link_reset(struct tipc_link *l_ptr);
227int __tipc_link_xmit(struct net *net, struct tipc_link *link,
228 struct sk_buff_head *list);
229int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, 252int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
230 struct sk_buff_head *xmitq); 253 struct sk_buff_head *xmitq);
231void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, 254void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
232 u32 gap, u32 tolerance, u32 priority);
233void tipc_link_push_packets(struct tipc_link *l_ptr);
234u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf);
235void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
236void tipc_link_retransmit(struct tipc_link *l_ptr,
237 struct sk_buff *start, u32 retransmits);
238struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list,
239 const struct sk_buff *skb);
240 255
241int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); 256int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb);
242int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); 257int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
@@ -246,5 +261,23 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
246int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); 261int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
247int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, 262int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
248 struct sk_buff_head *xmitq); 263 struct sk_buff_head *xmitq);
249 264int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
265void tipc_link_add_bc_peer(struct tipc_link *snd_l,
266 struct tipc_link *uc_l,
267 struct sk_buff_head *xmitq);
268void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
269 struct tipc_link *rcv_l,
270 struct sk_buff_head *xmitq);
271int tipc_link_bc_peers(struct tipc_link *l);
272void tipc_link_set_mtu(struct tipc_link *l, int mtu);
273int tipc_link_mtu(struct tipc_link *l);
274void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
275 struct sk_buff_head *xmitq);
276void tipc_link_build_bc_sync_msg(struct tipc_link *l,
277 struct sk_buff_head *xmitq);
278void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
279void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
280 struct sk_buff_head *xmitq);
281int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
282 struct sk_buff_head *xmitq);
250#endif 283#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index c5ac436235e0..8740930f0787 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
121{ 121{
122 struct sk_buff *head = *headbuf; 122 struct sk_buff *head = *headbuf;
123 struct sk_buff *frag = *buf; 123 struct sk_buff *frag = *buf;
124 struct sk_buff *tail; 124 struct sk_buff *tail = NULL;
125 struct tipc_msg *msg; 125 struct tipc_msg *msg;
126 u32 fragid; 126 u32 fragid;
127 int delta; 127 int delta;
@@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
141 if (unlikely(skb_unclone(frag, GFP_ATOMIC))) 141 if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
142 goto err; 142 goto err;
143 head = *headbuf = frag; 143 head = *headbuf = frag;
144 skb_frag_list_init(head);
145 TIPC_SKB_CB(head)->tail = NULL;
146 *buf = NULL; 144 *buf = NULL;
145 TIPC_SKB_CB(head)->tail = NULL;
146 if (skb_is_nonlinear(head)) {
147 skb_walk_frags(head, tail) {
148 TIPC_SKB_CB(head)->tail = tail;
149 }
150 } else {
151 skb_frag_list_init(head);
152 }
147 return 0; 153 return 0;
148 } 154 }
149 155
@@ -176,7 +182,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
176 *buf = NULL; 182 *buf = NULL;
177 return 0; 183 return 0;
178err: 184err:
179 pr_warn_ratelimited("Unable to build fragment list\n");
180 kfree_skb(*buf); 185 kfree_skb(*buf);
181 kfree_skb(*headbuf); 186 kfree_skb(*headbuf);
182 *buf = *headbuf = NULL; 187 *buf = *headbuf = NULL;
@@ -559,18 +564,22 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
559/* tipc_msg_reassemble() - clone a buffer chain of fragments and 564/* tipc_msg_reassemble() - clone a buffer chain of fragments and
560 * reassemble the clones into one message 565 * reassemble the clones into one message
561 */ 566 */
562struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list) 567bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq)
563{ 568{
564 struct sk_buff *skb; 569 struct sk_buff *skb, *_skb;
565 struct sk_buff *frag = NULL; 570 struct sk_buff *frag = NULL;
566 struct sk_buff *head = NULL; 571 struct sk_buff *head = NULL;
567 int hdr_sz; 572 int hdr_len;
568 573
569 /* Copy header if single buffer */ 574 /* Copy header if single buffer */
570 if (skb_queue_len(list) == 1) { 575 if (skb_queue_len(list) == 1) {
571 skb = skb_peek(list); 576 skb = skb_peek(list);
572 hdr_sz = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb)); 577 hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
573 return __pskb_copy(skb, hdr_sz, GFP_ATOMIC); 578 _skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC);
579 if (!_skb)
580 return false;
581 __skb_queue_tail(rcvq, _skb);
582 return true;
574 } 583 }
575 584
576 /* Clone all fragments and reassemble */ 585 /* Clone all fragments and reassemble */
@@ -584,9 +593,41 @@ struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list)
584 if (!head) 593 if (!head)
585 goto error; 594 goto error;
586 } 595 }
587 return frag; 596 __skb_queue_tail(rcvq, frag);
597 return true;
588error: 598error:
589 pr_warn("Failed do clone local mcast rcv buffer\n"); 599 pr_warn("Failed do clone local mcast rcv buffer\n");
590 kfree_skb(head); 600 kfree_skb(head);
591 return NULL; 601 return false;
602}
603
604/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
605 * @list: list to be appended to
606 * @seqno: sequence number of buffer to add
607 * @skb: buffer to add
608 */
609void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
610 struct sk_buff *skb)
611{
612 struct sk_buff *_skb, *tmp;
613
614 if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
615 __skb_queue_head(list, skb);
616 return;
617 }
618
619 if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
620 __skb_queue_tail(list, skb);
621 return;
622 }
623
624 skb_queue_walk_safe(list, _skb, tmp) {
625 if (more(seqno, buf_seqno(_skb)))
626 continue;
627 if (seqno == buf_seqno(_skb))
628 break;
629 __skb_queue_before(list, _skb, skb);
630 return;
631 }
632 kfree_skb(skb);
592} 633}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index a82c5848d4bc..55778a0aebf3 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -112,6 +112,7 @@ struct tipc_skb_cb {
112 bool wakeup_pending; 112 bool wakeup_pending;
113 u16 chain_sz; 113 u16 chain_sz;
114 u16 chain_imp; 114 u16 chain_imp;
115 u16 ackers;
115}; 116};
116 117
117#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) 118#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
@@ -357,7 +358,7 @@ static inline u32 msg_importance(struct tipc_msg *m)
357 if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m))) 358 if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m)))
358 return usr; 359 return usr;
359 if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)) 360 if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))
360 return msg_bits(m, 5, 13, 0x7); 361 return msg_bits(m, 9, 0, 0x7);
361 return TIPC_SYSTEM_IMPORTANCE; 362 return TIPC_SYSTEM_IMPORTANCE;
362} 363}
363 364
@@ -366,7 +367,7 @@ static inline void msg_set_importance(struct tipc_msg *m, u32 i)
366 int usr = msg_user(m); 367 int usr = msg_user(m);
367 368
368 if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))) 369 if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)))
369 msg_set_bits(m, 5, 13, 0x7, i); 370 msg_set_bits(m, 9, 0, 0x7, i);
370 else if (i < TIPC_SYSTEM_IMPORTANCE) 371 else if (i < TIPC_SYSTEM_IMPORTANCE)
371 msg_set_user(m, i); 372 msg_set_user(m, i);
372 else 373 else
@@ -600,6 +601,11 @@ static inline u32 msg_last_bcast(struct tipc_msg *m)
600 return msg_bits(m, 4, 16, 0xffff); 601 return msg_bits(m, 4, 16, 0xffff);
601} 602}
602 603
604static inline u32 msg_bc_snd_nxt(struct tipc_msg *m)
605{
606 return msg_last_bcast(m) + 1;
607}
608
603static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) 609static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n)
604{ 610{
605 msg_set_bits(m, 4, 16, 0xffff, n); 611 msg_set_bits(m, 4, 16, 0xffff, n);
@@ -789,7 +795,9 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
789int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, 795int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
790 int offset, int dsz, int mtu, struct sk_buff_head *list); 796 int offset, int dsz, int mtu, struct sk_buff_head *list);
791bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); 797bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
792struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); 798bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
799void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
800 struct sk_buff *skb);
793 801
794static inline u16 buf_seqno(struct sk_buff *skb) 802static inline u16 buf_seqno(struct sk_buff *skb)
795{ 803{
@@ -862,38 +870,6 @@ static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list,
862 return skb; 870 return skb;
863} 871}
864 872
865/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
866 * @list: list to be appended to
867 * @skb: buffer to add
868 * Returns true if queue should treated further, otherwise false
869 */
870static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list,
871 struct sk_buff *skb)
872{
873 struct sk_buff *_skb, *tmp;
874 struct tipc_msg *hdr = buf_msg(skb);
875 u16 seqno = msg_seqno(hdr);
876
877 if (skb_queue_empty(list) || (msg_user(hdr) == LINK_PROTOCOL)) {
878 __skb_queue_head(list, skb);
879 return true;
880 }
881 if (likely(less(seqno, buf_seqno(skb_peek(list))))) {
882 __skb_queue_head(list, skb);
883 return true;
884 }
885 if (!more(seqno, buf_seqno(skb_peek_tail(list)))) {
886 skb_queue_walk_safe(list, _skb, tmp) {
887 if (likely(less(seqno, buf_seqno(_skb)))) {
888 __skb_queue_before(list, _skb, skb);
889 return true;
890 }
891 }
892 }
893 __skb_queue_tail(list, skb);
894 return false;
895}
896
897/* tipc_skb_queue_splice_tail - append an skb list to lock protected list 873/* tipc_skb_queue_splice_tail - append an skb list to lock protected list
898 * @list: the new list to append. Not lock protected 874 * @list: the new list to append. Not lock protected
899 * @head: target list. Lock protected. 875 * @head: target list. Lock protected.
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index e6018b7eb197..c07612bab95c 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -102,7 +102,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb)
102 if (!oskb) 102 if (!oskb)
103 break; 103 break;
104 msg_set_destnode(buf_msg(oskb), dnode); 104 msg_set_destnode(buf_msg(oskb), dnode);
105 tipc_node_xmit_skb(net, oskb, dnode, dnode); 105 tipc_node_xmit_skb(net, oskb, dnode, 0);
106 } 106 }
107 rcu_read_unlock(); 107 rcu_read_unlock();
108 108
@@ -223,7 +223,7 @@ void tipc_named_node_up(struct net *net, u32 dnode)
223 &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); 223 &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
224 rcu_read_unlock(); 224 rcu_read_unlock();
225 225
226 tipc_node_xmit(net, &head, dnode, dnode); 226 tipc_node_xmit(net, &head, dnode, 0);
227} 227}
228 228
229static void tipc_publ_subscribe(struct net *net, struct publication *publ, 229static void tipc_publ_subscribe(struct net *net, struct publication *publ,
diff --git a/net/tipc/net.c b/net/tipc/net.c
index d6d1399ae229..77bf9113c7a7 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -112,14 +112,11 @@ int tipc_net_start(struct net *net, u32 addr)
112{ 112{
113 struct tipc_net *tn = net_generic(net, tipc_net_id); 113 struct tipc_net *tn = net_generic(net, tipc_net_id);
114 char addr_string[16]; 114 char addr_string[16];
115 int res;
116 115
117 tn->own_addr = addr; 116 tn->own_addr = addr;
118 tipc_named_reinit(net); 117 tipc_named_reinit(net);
119 tipc_sk_reinit(net); 118 tipc_sk_reinit(net);
120 res = tipc_bclink_init(net); 119 tipc_bcast_reinit(net);
121 if (res)
122 return res;
123 120
124 tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, 121 tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
125 TIPC_ZONE_SCOPE, 0, tn->own_addr); 122 TIPC_ZONE_SCOPE, 0, tn->own_addr);
@@ -142,7 +139,6 @@ void tipc_net_stop(struct net *net)
142 tn->own_addr); 139 tn->own_addr);
143 rtnl_lock(); 140 rtnl_lock();
144 tipc_bearer_stop(net); 141 tipc_bearer_stop(net);
145 tipc_bclink_stop(net);
146 tipc_node_stop(net); 142 tipc_node_stop(net);
147 rtnl_unlock(); 143 rtnl_unlock();
148 144
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 703875fd6cde..20cddec0a43c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -72,7 +72,6 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
72static void tipc_node_link_down(struct tipc_node *n, int bearer_id, 72static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
73 bool delete); 73 bool delete);
74static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); 74static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
75static void node_established_contact(struct tipc_node *n_ptr);
76static void tipc_node_delete(struct tipc_node *node); 75static void tipc_node_delete(struct tipc_node *node);
77static void tipc_node_timeout(unsigned long data); 76static void tipc_node_timeout(unsigned long data);
78static void tipc_node_fsm_evt(struct tipc_node *n, int evt); 77static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
@@ -165,8 +164,10 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
165 INIT_LIST_HEAD(&n_ptr->list); 164 INIT_LIST_HEAD(&n_ptr->list);
166 INIT_LIST_HEAD(&n_ptr->publ_list); 165 INIT_LIST_HEAD(&n_ptr->publ_list);
167 INIT_LIST_HEAD(&n_ptr->conn_sks); 166 INIT_LIST_HEAD(&n_ptr->conn_sks);
168 skb_queue_head_init(&n_ptr->bclink.namedq); 167 skb_queue_head_init(&n_ptr->bc_entry.namedq);
169 __skb_queue_head_init(&n_ptr->bclink.deferdq); 168 skb_queue_head_init(&n_ptr->bc_entry.inputq1);
169 __skb_queue_head_init(&n_ptr->bc_entry.arrvq);
170 skb_queue_head_init(&n_ptr->bc_entry.inputq2);
170 hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); 171 hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
171 list_for_each_entry_rcu(temp_node, &tn->node_list, list) { 172 list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
172 if (n_ptr->addr < temp_node->addr) 173 if (n_ptr->addr < temp_node->addr)
@@ -177,6 +178,18 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
177 n_ptr->signature = INVALID_NODE_SIG; 178 n_ptr->signature = INVALID_NODE_SIG;
178 n_ptr->active_links[0] = INVALID_BEARER_ID; 179 n_ptr->active_links[0] = INVALID_BEARER_ID;
179 n_ptr->active_links[1] = INVALID_BEARER_ID; 180 n_ptr->active_links[1] = INVALID_BEARER_ID;
181 if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr,
182 U16_MAX, tipc_bc_sndlink(net)->window,
183 n_ptr->capabilities,
184 &n_ptr->bc_entry.inputq1,
185 &n_ptr->bc_entry.namedq,
186 tipc_bc_sndlink(net),
187 &n_ptr->bc_entry.link)) {
188 pr_warn("Broadcast rcv link creation failed, no memory\n");
189 kfree(n_ptr);
190 n_ptr = NULL;
191 goto exit;
192 }
180 tipc_node_get(n_ptr); 193 tipc_node_get(n_ptr);
181 setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); 194 setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr);
182 n_ptr->keepalive_intv = U32_MAX; 195 n_ptr->keepalive_intv = U32_MAX;
@@ -203,6 +216,7 @@ static void tipc_node_delete(struct tipc_node *node)
203{ 216{
204 list_del_rcu(&node->list); 217 list_del_rcu(&node->list);
205 hlist_del_rcu(&node->hash); 218 hlist_del_rcu(&node->hash);
219 kfree(node->bc_entry.link);
206 kfree_rcu(node, rcu); 220 kfree_rcu(node, rcu);
207} 221}
208 222
@@ -317,7 +331,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
317 struct tipc_link *ol = node_active_link(n, 0); 331 struct tipc_link *ol = node_active_link(n, 0);
318 struct tipc_link *nl = n->links[bearer_id].link; 332 struct tipc_link *nl = n->links[bearer_id].link;
319 333
320 if (!nl || !tipc_link_is_up(nl)) 334 if (!nl)
335 return;
336
337 tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT);
338 if (!tipc_link_is_up(nl))
321 return; 339 return;
322 340
323 n->working_links++; 341 n->working_links++;
@@ -328,6 +346,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
328 n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; 346 n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE;
329 347
330 tipc_bearer_add_dest(n->net, bearer_id, n->addr); 348 tipc_bearer_add_dest(n->net, bearer_id, n->addr);
349 tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id);
331 350
332 pr_debug("Established link <%s> on network plane %c\n", 351 pr_debug("Established link <%s> on network plane %c\n",
333 nl->name, nl->net_plane); 352 nl->name, nl->net_plane);
@@ -336,8 +355,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
336 if (!ol) { 355 if (!ol) {
337 *slot0 = bearer_id; 356 *slot0 = bearer_id;
338 *slot1 = bearer_id; 357 *slot1 = bearer_id;
339 tipc_link_build_bcast_sync_msg(nl, xmitq); 358 tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
340 node_established_contact(n); 359 n->action_flags |= TIPC_NOTIFY_NODE_UP;
360 tipc_bcast_add_peer(n->net, nl, xmitq);
341 return; 361 return;
342 } 362 }
343 363
@@ -346,8 +366,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
346 pr_debug("Old link <%s> becomes standby\n", ol->name); 366 pr_debug("Old link <%s> becomes standby\n", ol->name);
347 *slot0 = bearer_id; 367 *slot0 = bearer_id;
348 *slot1 = bearer_id; 368 *slot1 = bearer_id;
369 tipc_link_set_active(nl, true);
370 tipc_link_set_active(ol, false);
349 } else if (nl->priority == ol->priority) { 371 } else if (nl->priority == ol->priority) {
350 *slot0 = bearer_id; 372 tipc_link_set_active(nl, true);
373 *slot1 = bearer_id;
351 } else { 374 } else {
352 pr_debug("New link <%s> is standby\n", nl->name); 375 pr_debug("New link <%s> is standby\n", nl->name);
353 } 376 }
@@ -416,10 +439,18 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
416 } 439 }
417 440
418 if (!tipc_node_is_up(n)) { 441 if (!tipc_node_is_up(n)) {
442 if (tipc_link_peer_is_down(l))
443 tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
444 tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
445 tipc_link_fsm_evt(l, LINK_RESET_EVT);
419 tipc_link_reset(l); 446 tipc_link_reset(l);
447 tipc_link_build_reset_msg(l, xmitq);
448 *maddr = &n->links[*bearer_id].maddr;
420 node_lost_contact(n, &le->inputq); 449 node_lost_contact(n, &le->inputq);
450 tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
421 return; 451 return;
422 } 452 }
453 tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
423 454
424 /* There is still a working link => initiate failover */ 455 /* There is still a working link => initiate failover */
425 tnl = node_active_link(n, 0); 456 tnl = node_active_link(n, 0);
@@ -428,6 +459,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
428 n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); 459 n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1);
429 tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); 460 tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
430 tipc_link_reset(l); 461 tipc_link_reset(l);
462 tipc_link_fsm_evt(l, LINK_RESET_EVT);
431 tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); 463 tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
432 tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); 464 tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
433 *maddr = &n->links[tnl->bearer_id].maddr; 465 *maddr = &n->links[tnl->bearer_id].maddr;
@@ -437,20 +469,28 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
437static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) 469static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
438{ 470{
439 struct tipc_link_entry *le = &n->links[bearer_id]; 471 struct tipc_link_entry *le = &n->links[bearer_id];
472 struct tipc_link *l = le->link;
440 struct tipc_media_addr *maddr; 473 struct tipc_media_addr *maddr;
441 struct sk_buff_head xmitq; 474 struct sk_buff_head xmitq;
442 475
476 if (!l)
477 return;
478
443 __skb_queue_head_init(&xmitq); 479 __skb_queue_head_init(&xmitq);
444 480
445 tipc_node_lock(n); 481 tipc_node_lock(n);
446 __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); 482 if (!tipc_link_is_establishing(l)) {
447 if (delete && le->link) { 483 __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
448 kfree(le->link); 484 if (delete) {
449 le->link = NULL; 485 kfree(l);
450 n->link_cnt--; 486 le->link = NULL;
487 n->link_cnt--;
488 }
489 } else {
490 /* Defuse pending tipc_node_link_up() */
491 tipc_link_fsm_evt(l, LINK_RESET_EVT);
451 } 492 }
452 tipc_node_unlock(n); 493 tipc_node_unlock(n);
453
454 tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); 494 tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
455 tipc_sk_rcv(n->net, &le->inputq); 495 tipc_sk_rcv(n->net, &le->inputq);
456} 496}
@@ -474,6 +514,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
474 bool link_up = false; 514 bool link_up = false;
475 bool accept_addr = false; 515 bool accept_addr = false;
476 bool reset = true; 516 bool reset = true;
517 char *if_name;
477 518
478 *dupl_addr = false; 519 *dupl_addr = false;
479 *respond = false; 520 *respond = false;
@@ -560,13 +601,20 @@ void tipc_node_check_dest(struct net *net, u32 onode,
560 pr_warn("Cannot establish 3rd link to %x\n", n->addr); 601 pr_warn("Cannot establish 3rd link to %x\n", n->addr);
561 goto exit; 602 goto exit;
562 } 603 }
563 if (!tipc_link_create(n, b, mod(tipc_net(net)->random), 604 if_name = strchr(b->name, ':') + 1;
564 tipc_own_addr(net), onode, &le->maddr, 605 if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
565 &le->inputq, &n->bclink.namedq, &l)) { 606 b->net_plane, b->mtu, b->priority,
607 b->window, mod(tipc_net(net)->random),
608 tipc_own_addr(net), onode,
609 n->capabilities,
610 tipc_bc_sndlink(n->net), n->bc_entry.link,
611 &le->inputq,
612 &n->bc_entry.namedq, &l)) {
566 *respond = false; 613 *respond = false;
567 goto exit; 614 goto exit;
568 } 615 }
569 tipc_link_reset(l); 616 tipc_link_reset(l);
617 tipc_link_fsm_evt(l, LINK_RESET_EVT);
570 if (n->state == NODE_FAILINGOVER) 618 if (n->state == NODE_FAILINGOVER)
571 tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); 619 tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
572 le->link = l; 620 le->link = l;
@@ -579,7 +627,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
579 memcpy(&le->maddr, maddr, sizeof(*maddr)); 627 memcpy(&le->maddr, maddr, sizeof(*maddr));
580exit: 628exit:
581 tipc_node_unlock(n); 629 tipc_node_unlock(n);
582 if (reset) 630 if (reset && !tipc_link_is_reset(l))
583 tipc_node_link_down(n, b->identity, false); 631 tipc_node_link_down(n, b->identity, false);
584 tipc_node_put(n); 632 tipc_node_put(n);
585} 633}
@@ -686,10 +734,10 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
686 break; 734 break;
687 case SELF_ESTABL_CONTACT_EVT: 735 case SELF_ESTABL_CONTACT_EVT:
688 case PEER_LOST_CONTACT_EVT: 736 case PEER_LOST_CONTACT_EVT:
689 break;
690 case NODE_SYNCH_END_EVT: 737 case NODE_SYNCH_END_EVT:
691 case NODE_SYNCH_BEGIN_EVT:
692 case NODE_FAILOVER_BEGIN_EVT: 738 case NODE_FAILOVER_BEGIN_EVT:
739 break;
740 case NODE_SYNCH_BEGIN_EVT:
693 case NODE_FAILOVER_END_EVT: 741 case NODE_FAILOVER_END_EVT:
694 default: 742 default:
695 goto illegal_evt; 743 goto illegal_evt;
@@ -804,61 +852,36 @@ bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr)
804 return true; 852 return true;
805} 853}
806 854
807static void node_established_contact(struct tipc_node *n_ptr) 855static void node_lost_contact(struct tipc_node *n,
808{
809 tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT);
810 n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
811 n_ptr->bclink.oos_state = 0;
812 n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
813 tipc_bclink_add_node(n_ptr->net, n_ptr->addr);
814}
815
816static void node_lost_contact(struct tipc_node *n_ptr,
817 struct sk_buff_head *inputq) 856 struct sk_buff_head *inputq)
818{ 857{
819 char addr_string[16]; 858 char addr_string[16];
820 struct tipc_sock_conn *conn, *safe; 859 struct tipc_sock_conn *conn, *safe;
821 struct tipc_link *l; 860 struct tipc_link *l;
822 struct list_head *conns = &n_ptr->conn_sks; 861 struct list_head *conns = &n->conn_sks;
823 struct sk_buff *skb; 862 struct sk_buff *skb;
824 struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
825 uint i; 863 uint i;
826 864
827 pr_debug("Lost contact with %s\n", 865 pr_debug("Lost contact with %s\n",
828 tipc_addr_string_fill(addr_string, n_ptr->addr)); 866 tipc_addr_string_fill(addr_string, n->addr));
829
830 /* Flush broadcast link info associated with lost node */
831 if (n_ptr->bclink.recv_permitted) {
832 __skb_queue_purge(&n_ptr->bclink.deferdq);
833 867
834 if (n_ptr->bclink.reasm_buf) { 868 /* Clean up broadcast state */
835 kfree_skb(n_ptr->bclink.reasm_buf); 869 tipc_bcast_remove_peer(n->net, n->bc_entry.link);
836 n_ptr->bclink.reasm_buf = NULL;
837 }
838
839 tipc_bclink_remove_node(n_ptr->net, n_ptr->addr);
840 tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
841
842 n_ptr->bclink.recv_permitted = false;
843 }
844 870
845 /* Abort any ongoing link failover */ 871 /* Abort any ongoing link failover */
846 for (i = 0; i < MAX_BEARERS; i++) { 872 for (i = 0; i < MAX_BEARERS; i++) {
847 l = n_ptr->links[i].link; 873 l = n->links[i].link;
848 if (l) 874 if (l)
849 tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); 875 tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT);
850 } 876 }
851 877
852 /* Prevent re-contact with node until cleanup is done */
853 tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT);
854
855 /* Notify publications from this node */ 878 /* Notify publications from this node */
856 n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; 879 n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
857 880
858 /* Notify sockets connected to node */ 881 /* Notify sockets connected to node */
859 list_for_each_entry_safe(conn, safe, conns, list) { 882 list_for_each_entry_safe(conn, safe, conns, list) {
860 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, 883 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
861 SHORT_H_SIZE, 0, tn->own_addr, 884 SHORT_H_SIZE, 0, tipc_own_addr(n->net),
862 conn->peer_node, conn->port, 885 conn->peer_node, conn->port,
863 conn->peer_port, TIPC_ERR_NO_NODE); 886 conn->peer_port, TIPC_ERR_NO_NODE);
864 if (likely(skb)) 887 if (likely(skb))
@@ -920,18 +943,13 @@ void tipc_node_unlock(struct tipc_node *node)
920 publ_list = &node->publ_list; 943 publ_list = &node->publ_list;
921 944
922 node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | 945 node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
923 TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | 946 TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP);
924 TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
925 TIPC_BCAST_RESET);
926 947
927 spin_unlock_bh(&node->lock); 948 spin_unlock_bh(&node->lock);
928 949
929 if (flags & TIPC_NOTIFY_NODE_DOWN) 950 if (flags & TIPC_NOTIFY_NODE_DOWN)
930 tipc_publ_notify(net, publ_list, addr); 951 tipc_publ_notify(net, publ_list, addr);
931 952
932 if (flags & TIPC_WAKEUP_BCAST_USERS)
933 tipc_bclink_wakeup_users(net);
934
935 if (flags & TIPC_NOTIFY_NODE_UP) 953 if (flags & TIPC_NOTIFY_NODE_UP)
936 tipc_named_node_up(net, addr); 954 tipc_named_node_up(net, addr);
937 955
@@ -943,11 +961,6 @@ void tipc_node_unlock(struct tipc_node *node)
943 tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, 961 tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
944 link_id, addr); 962 link_id, addr);
945 963
946 if (flags & TIPC_BCAST_MSG_EVT)
947 tipc_bclink_input(net);
948
949 if (flags & TIPC_BCAST_RESET)
950 tipc_node_reset_links(node);
951} 964}
952 965
953/* Caller should hold node lock for the passed node */ 966/* Caller should hold node lock for the passed node */
@@ -1063,6 +1076,67 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
1063} 1076}
1064 1077
1065/** 1078/**
1079 * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
1080 * @net: the applicable net namespace
1081 * @skb: TIPC packet
1082 * @bearer_id: id of bearer message arrived on
1083 *
1084 * Invoked with no locks held.
1085 */
1086static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id)
1087{
1088 int rc;
1089 struct sk_buff_head xmitq;
1090 struct tipc_bclink_entry *be;
1091 struct tipc_link_entry *le;
1092 struct tipc_msg *hdr = buf_msg(skb);
1093 int usr = msg_user(hdr);
1094 u32 dnode = msg_destnode(hdr);
1095 struct tipc_node *n;
1096
1097 __skb_queue_head_init(&xmitq);
1098
1099 /* If NACK for other node, let rcv link for that node peek into it */
1100 if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net)))
1101 n = tipc_node_find(net, dnode);
1102 else
1103 n = tipc_node_find(net, msg_prevnode(hdr));
1104 if (!n) {
1105 kfree_skb(skb);
1106 return;
1107 }
1108 be = &n->bc_entry;
1109 le = &n->links[bearer_id];
1110
1111 rc = tipc_bcast_rcv(net, be->link, skb);
1112
1113 /* Broadcast link reset may happen at reassembly failure */
1114 if (rc & TIPC_LINK_DOWN_EVT)
1115 tipc_node_reset_links(n);
1116
1117 /* Broadcast ACKs are sent on a unicast link */
1118 if (rc & TIPC_LINK_SND_BC_ACK) {
1119 tipc_node_lock(n);
1120 tipc_link_build_ack_msg(le->link, &xmitq);
1121 tipc_node_unlock(n);
1122 }
1123
1124 if (!skb_queue_empty(&xmitq))
1125 tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
1126
1127 /* Deliver. 'arrvq' is under inputq2's lock protection */
1128 if (!skb_queue_empty(&be->inputq1)) {
1129 spin_lock_bh(&be->inputq2.lock);
1130 spin_lock_bh(&be->inputq1.lock);
1131 skb_queue_splice_tail_init(&be->inputq1, &be->arrvq);
1132 spin_unlock_bh(&be->inputq1.lock);
1133 spin_unlock_bh(&be->inputq2.lock);
1134 tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2);
1135 }
1136 tipc_node_put(n);
1137}
1138
1139/**
1066 * tipc_node_check_state - check and if necessary update node state 1140 * tipc_node_check_state - check and if necessary update node state
1067 * @skb: TIPC packet 1141 * @skb: TIPC packet
1068 * @bearer_id: identity of bearer delivering the packet 1142 * @bearer_id: identity of bearer delivering the packet
@@ -1116,7 +1190,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
1116 } 1190 }
1117 1191
1118 /* Ignore duplicate packets */ 1192 /* Ignore duplicate packets */
1119 if (less(oseqno, rcv_nxt)) 1193 if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt))
1120 return true; 1194 return true;
1121 1195
1122 /* Initiate or update failover mode if applicable */ 1196 /* Initiate or update failover mode if applicable */
@@ -1146,8 +1220,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
1146 if (!pl || !tipc_link_is_up(pl)) 1220 if (!pl || !tipc_link_is_up(pl))
1147 return true; 1221 return true;
1148 1222
1149 /* Initiate or update synch mode if applicable */ 1223 /* Initiate synch mode if applicable */
1150 if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) { 1224 if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) {
1151 syncpt = iseqno + exp_pkts - 1; 1225 syncpt = iseqno + exp_pkts - 1;
1152 if (!tipc_link_is_up(l)) { 1226 if (!tipc_link_is_up(l)) {
1153 tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); 1227 tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
@@ -1204,6 +1278,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1204 int usr = msg_user(hdr); 1278 int usr = msg_user(hdr);
1205 int bearer_id = b->identity; 1279 int bearer_id = b->identity;
1206 struct tipc_link_entry *le; 1280 struct tipc_link_entry *le;
1281 u16 bc_ack = msg_bcast_ack(hdr);
1207 int rc = 0; 1282 int rc = 0;
1208 1283
1209 __skb_queue_head_init(&xmitq); 1284 __skb_queue_head_init(&xmitq);
@@ -1212,13 +1287,12 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1212 if (unlikely(!tipc_msg_validate(skb))) 1287 if (unlikely(!tipc_msg_validate(skb)))
1213 goto discard; 1288 goto discard;
1214 1289
1215 /* Handle arrival of a non-unicast link packet */ 1290 /* Handle arrival of discovery or broadcast packet */
1216 if (unlikely(msg_non_seq(hdr))) { 1291 if (unlikely(msg_non_seq(hdr))) {
1217 if (usr == LINK_CONFIG) 1292 if (unlikely(usr == LINK_CONFIG))
1218 tipc_disc_rcv(net, skb, b); 1293 return tipc_disc_rcv(net, skb, b);
1219 else 1294 else
1220 tipc_bclink_rcv(net, skb); 1295 return tipc_node_bc_rcv(net, skb, bearer_id);
1221 return;
1222 } 1296 }
1223 1297
1224 /* Locate neighboring node that sent packet */ 1298 /* Locate neighboring node that sent packet */
@@ -1227,19 +1301,18 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1227 goto discard; 1301 goto discard;
1228 le = &n->links[bearer_id]; 1302 le = &n->links[bearer_id];
1229 1303
1304 /* Ensure broadcast reception is in synch with peer's send state */
1305 if (unlikely(usr == LINK_PROTOCOL))
1306 tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr);
1307 else if (unlikely(n->bc_entry.link->acked != bc_ack))
1308 tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
1309
1230 tipc_node_lock(n); 1310 tipc_node_lock(n);
1231 1311
1232 /* Is reception permitted at the moment ? */ 1312 /* Is reception permitted at the moment ? */
1233 if (!tipc_node_filter_pkt(n, hdr)) 1313 if (!tipc_node_filter_pkt(n, hdr))
1234 goto unlock; 1314 goto unlock;
1235 1315
1236 if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
1237 tipc_bclink_sync_state(n, hdr);
1238
1239 /* Release acked broadcast packets */
1240 if (unlikely(n->bclink.acked != msg_bcast_ack(hdr)))
1241 tipc_bclink_acknowledge(n, msg_bcast_ack(hdr));
1242
1243 /* Check and if necessary update node state */ 1316 /* Check and if necessary update node state */
1244 if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { 1317 if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) {
1245 rc = tipc_link_rcv(le->link, skb, &xmitq); 1318 rc = tipc_link_rcv(le->link, skb, &xmitq);
@@ -1254,8 +1327,8 @@ unlock:
1254 if (unlikely(rc & TIPC_LINK_DOWN_EVT)) 1327 if (unlikely(rc & TIPC_LINK_DOWN_EVT))
1255 tipc_node_link_down(n, bearer_id, false); 1328 tipc_node_link_down(n, bearer_id, false);
1256 1329
1257 if (unlikely(!skb_queue_empty(&n->bclink.namedq))) 1330 if (unlikely(!skb_queue_empty(&n->bc_entry.namedq)))
1258 tipc_named_rcv(net, &n->bclink.namedq); 1331 tipc_named_rcv(net, &n->bc_entry.namedq);
1259 1332
1260 if (!skb_queue_empty(&le->inputq)) 1333 if (!skb_queue_empty(&le->inputq))
1261 tipc_sk_rcv(net, &le->inputq); 1334 tipc_sk_rcv(net, &le->inputq);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 344b3e7594fd..6734562d3c6e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -55,36 +55,18 @@
55enum { 55enum {
56 TIPC_NOTIFY_NODE_DOWN = (1 << 3), 56 TIPC_NOTIFY_NODE_DOWN = (1 << 3),
57 TIPC_NOTIFY_NODE_UP = (1 << 4), 57 TIPC_NOTIFY_NODE_UP = (1 << 4),
58 TIPC_WAKEUP_BCAST_USERS = (1 << 5),
59 TIPC_NOTIFY_LINK_UP = (1 << 6), 58 TIPC_NOTIFY_LINK_UP = (1 << 6),
60 TIPC_NOTIFY_LINK_DOWN = (1 << 7), 59 TIPC_NOTIFY_LINK_DOWN = (1 << 7)
61 TIPC_BCAST_MSG_EVT = (1 << 9),
62 TIPC_BCAST_RESET = (1 << 10)
63}; 60};
64 61
65/** 62/* Optional capabilities supported by this code version
66 * struct tipc_node_bclink - TIPC node bclink structure
67 * @acked: sequence # of last outbound b'cast message acknowledged by node
68 * @last_in: sequence # of last in-sequence b'cast message received from node
69 * @last_sent: sequence # of last b'cast message sent by node
70 * @oos_state: state tracker for handling OOS b'cast messages
71 * @deferred_queue: deferred queue saved OOS b'cast message received from node
72 * @reasm_buf: broadcast reassembly queue head from node
73 * @inputq_map: bitmap indicating which inqueues should be kicked
74 * @recv_permitted: true if node is allowed to receive b'cast messages
75 */ 63 */
76struct tipc_node_bclink { 64enum {
77 u32 acked; 65 TIPC_BCAST_SYNCH = (1 << 1)
78 u32 last_in;
79 u32 last_sent;
80 u32 oos_state;
81 u32 deferred_size;
82 struct sk_buff_head deferdq;
83 struct sk_buff *reasm_buf;
84 struct sk_buff_head namedq;
85 bool recv_permitted;
86}; 66};
87 67
68#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH
69
88struct tipc_link_entry { 70struct tipc_link_entry {
89 struct tipc_link *link; 71 struct tipc_link *link;
90 u32 mtu; 72 u32 mtu;
@@ -92,6 +74,14 @@ struct tipc_link_entry {
92 struct tipc_media_addr maddr; 74 struct tipc_media_addr maddr;
93}; 75};
94 76
77struct tipc_bclink_entry {
78 struct tipc_link *link;
79 struct sk_buff_head inputq1;
80 struct sk_buff_head arrvq;
81 struct sk_buff_head inputq2;
82 struct sk_buff_head namedq;
83};
84
95/** 85/**
96 * struct tipc_node - TIPC node structure 86 * struct tipc_node - TIPC node structure
97 * @addr: network address of node 87 * @addr: network address of node
@@ -104,7 +94,6 @@ struct tipc_link_entry {
104 * @active_links: bearer ids of active links, used as index into links[] array 94 * @active_links: bearer ids of active links, used as index into links[] array
105 * @links: array containing references to all links to node 95 * @links: array containing references to all links to node
106 * @action_flags: bit mask of different types of node actions 96 * @action_flags: bit mask of different types of node actions
107 * @bclink: broadcast-related info
108 * @state: connectivity state vs peer node 97 * @state: connectivity state vs peer node
109 * @sync_point: sequence number where synch/failover is finished 98 * @sync_point: sequence number where synch/failover is finished
110 * @list: links to adjacent nodes in sorted list of cluster's nodes 99 * @list: links to adjacent nodes in sorted list of cluster's nodes
@@ -124,8 +113,8 @@ struct tipc_node {
124 struct hlist_node hash; 113 struct hlist_node hash;
125 int active_links[2]; 114 int active_links[2];
126 struct tipc_link_entry links[MAX_BEARERS]; 115 struct tipc_link_entry links[MAX_BEARERS];
116 struct tipc_bclink_entry bc_entry;
127 int action_flags; 117 int action_flags;
128 struct tipc_node_bclink bclink;
129 struct list_head list; 118 struct list_head list;
130 int state; 119 int state;
131 u16 sync_point; 120 u16 sync_point;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1060d52ff23e..552dbaba9cf3 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -689,13 +689,13 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
689 msg_set_hdr_sz(mhdr, MCAST_H_SIZE); 689 msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
690 690
691new_mtu: 691new_mtu:
692 mtu = tipc_bclink_get_mtu(); 692 mtu = tipc_bcast_get_mtu(net);
693 rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); 693 rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
694 if (unlikely(rc < 0)) 694 if (unlikely(rc < 0))
695 return rc; 695 return rc;
696 696
697 do { 697 do {
698 rc = tipc_bclink_xmit(net, pktchain); 698 rc = tipc_bcast_xmit(net, pktchain);
699 if (likely(!rc)) 699 if (likely(!rc))
700 return dsz; 700 return dsz;
701 701
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index c170d3138953..ad2719ad4c1b 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -48,10 +48,13 @@
48#include <linux/tipc_netlink.h> 48#include <linux/tipc_netlink.h>
49#include "core.h" 49#include "core.h"
50#include "bearer.h" 50#include "bearer.h"
51#include "msg.h"
51 52
52/* IANA assigned UDP port */ 53/* IANA assigned UDP port */
53#define UDP_PORT_DEFAULT 6118 54#define UDP_PORT_DEFAULT 6118
54 55
56#define UDP_MIN_HEADROOM 28
57
55static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { 58static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
56 [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, 59 [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
57 [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, 60 [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
@@ -153,11 +156,12 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
153 struct udp_bearer *ub; 156 struct udp_bearer *ub;
154 struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; 157 struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
155 struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; 158 struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
156 struct sk_buff *clone;
157 struct rtable *rt; 159 struct rtable *rt;
158 160
159 clone = skb_clone(skb, GFP_ATOMIC); 161 if (skb_headroom(skb) < UDP_MIN_HEADROOM)
160 skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); 162 pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
163
164 skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
161 ub = rcu_dereference_rtnl(b->media_ptr); 165 ub = rcu_dereference_rtnl(b->media_ptr);
162 if (!ub) { 166 if (!ub) {
163 err = -ENODEV; 167 err = -ENODEV;
@@ -167,7 +171,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
167 struct flowi4 fl = { 171 struct flowi4 fl = {
168 .daddr = dst->ipv4.s_addr, 172 .daddr = dst->ipv4.s_addr,
169 .saddr = src->ipv4.s_addr, 173 .saddr = src->ipv4.s_addr,
170 .flowi4_mark = clone->mark, 174 .flowi4_mark = skb->mark,
171 .flowi4_proto = IPPROTO_UDP 175 .flowi4_proto = IPPROTO_UDP
172 }; 176 };
173 rt = ip_route_output_key(net, &fl); 177 rt = ip_route_output_key(net, &fl);
@@ -176,7 +180,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
176 goto tx_error; 180 goto tx_error;
177 } 181 }
178 ttl = ip4_dst_hoplimit(&rt->dst); 182 ttl = ip4_dst_hoplimit(&rt->dst);
179 err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone, 183 err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb,
180 src->ipv4.s_addr, 184 src->ipv4.s_addr,
181 dst->ipv4.s_addr, 0, ttl, 0, 185 dst->ipv4.s_addr, 0, ttl, 0,
182 src->udp_port, dst->udp_port, 186 src->udp_port, dst->udp_port,
@@ -199,7 +203,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
199 if (err) 203 if (err)
200 goto tx_error; 204 goto tx_error;
201 ttl = ip6_dst_hoplimit(ndst); 205 ttl = ip6_dst_hoplimit(ndst);
202 err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone, 206 err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
203 ndst->dev, &src->ipv6, 207 ndst->dev, &src->ipv6,
204 &dst->ipv6, 0, ttl, src->udp_port, 208 &dst->ipv6, 0, ttl, src->udp_port,
205 dst->udp_port, false); 209 dst->udp_port, false);
@@ -208,7 +212,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
208 return err; 212 return err;
209 213
210tx_error: 214tx_error:
211 kfree_skb(clone); 215 kfree_skb(skb);
212 return err; 216 return err;
213} 217}
214 218
@@ -217,6 +221,10 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
217{ 221{
218 struct udp_bearer *ub; 222 struct udp_bearer *ub;
219 struct tipc_bearer *b; 223 struct tipc_bearer *b;
224 int usr = msg_user(buf_msg(skb));
225
226 if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR))
227 skb_linearize(skb);
220 228
221 ub = rcu_dereference_sk_user_data(sk); 229 ub = rcu_dereference_sk_user_data(sk);
222 if (!ub) { 230 if (!ub) {
@@ -425,7 +433,6 @@ static void tipc_udp_disable(struct tipc_bearer *b)
425 } 433 }
426 if (ub->ubsock) 434 if (ub->ubsock)
427 sock_set_flag(ub->ubsock->sk, SOCK_DEAD); 435 sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
428 RCU_INIT_POINTER(b->media_ptr, NULL);
429 RCU_INIT_POINTER(ub->bearer, NULL); 436 RCU_INIT_POINTER(ub->bearer, NULL);
430 437
431 /* sock_release need to be done outside of rtnl lock */ 438 /* sock_release need to be done outside of rtnl lock */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ef31b40ad550..955ec152cb71 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,9 +326,10 @@ found:
326 return s; 326 return s;
327} 327}
328 328
329static inline int unix_writable(struct sock *sk) 329static int unix_writable(const struct sock *sk)
330{ 330{
331 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 331 return sk->sk_state != TCP_LISTEN &&
332 (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
332} 333}
333 334
334static void unix_write_space(struct sock *sk) 335static void unix_write_space(struct sock *sk)
@@ -440,6 +441,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
440 if (state == TCP_LISTEN) 441 if (state == TCP_LISTEN)
441 unix_release_sock(skb->sk, 1); 442 unix_release_sock(skb->sk, 1);
442 /* passed fds are erased in the kfree_skb hook */ 443 /* passed fds are erased in the kfree_skb hook */
444 UNIXCB(skb).consumed = skb->len;
443 kfree_skb(skb); 445 kfree_skb(skb);
444 } 446 }
445 447
@@ -1798,6 +1800,7 @@ alloc_skb:
1798 * this - does no harm 1800 * this - does no harm
1799 */ 1801 */
1800 consume_skb(newskb); 1802 consume_skb(newskb);
1803 newskb = NULL;
1801 } 1804 }
1802 1805
1803 if (skb_append_pagefrags(skb, page, offset, size)) { 1806 if (skb_append_pagefrags(skb, page, offset, size)) {
@@ -1810,8 +1813,11 @@ alloc_skb:
1810 skb->truesize += size; 1813 skb->truesize += size;
1811 atomic_add(size, &sk->sk_wmem_alloc); 1814 atomic_add(size, &sk->sk_wmem_alloc);
1812 1815
1813 if (newskb) 1816 if (newskb) {
1817 spin_lock(&other->sk_receive_queue.lock);
1814 __skb_queue_tail(&other->sk_receive_queue, newskb); 1818 __skb_queue_tail(&other->sk_receive_queue, newskb);
1819 spin_unlock(&other->sk_receive_queue.lock);
1820 }
1815 1821
1816 unix_state_unlock(other); 1822 unix_state_unlock(other);
1817 mutex_unlock(&unix_sk(other)->readlock); 1823 mutex_unlock(&unix_sk(other)->readlock);
@@ -2064,8 +2070,14 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
2064 goto out; 2070 goto out;
2065 } 2071 }
2066 2072
2073 if (flags & MSG_PEEK)
2074 skip = sk_peek_offset(sk, flags);
2075 else
2076 skip = 0;
2077
2067 do { 2078 do {
2068 int chunk; 2079 int chunk;
2080 bool drop_skb;
2069 struct sk_buff *skb, *last; 2081 struct sk_buff *skb, *last;
2070 2082
2071 unix_state_lock(sk); 2083 unix_state_lock(sk);
@@ -2112,7 +2124,6 @@ unlock:
2112 break; 2124 break;
2113 } 2125 }
2114 2126
2115 skip = sk_peek_offset(sk, flags);
2116 while (skip >= unix_skb_len(skb)) { 2127 while (skip >= unix_skb_len(skb)) {
2117 skip -= unix_skb_len(skb); 2128 skip -= unix_skb_len(skb);
2118 last = skb; 2129 last = skb;
@@ -2147,7 +2158,11 @@ unlock:
2147 } 2158 }
2148 2159
2149 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2160 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2161 skb_get(skb);
2150 chunk = state->recv_actor(skb, skip, chunk, state); 2162 chunk = state->recv_actor(skb, skip, chunk, state);
2163 drop_skb = !unix_skb_len(skb);
2164 /* skb is only safe to use if !drop_skb */
2165 consume_skb(skb);
2151 if (chunk < 0) { 2166 if (chunk < 0) {
2152 if (copied == 0) 2167 if (copied == 0)
2153 copied = -EFAULT; 2168 copied = -EFAULT;
@@ -2156,6 +2171,18 @@ unlock:
2156 copied += chunk; 2171 copied += chunk;
2157 size -= chunk; 2172 size -= chunk;
2158 2173
2174 if (drop_skb) {
2175 /* the skb was touched by a concurrent reader;
2176 * we should not expect anything from this skb
2177 * anymore and assume it invalid - we can be
2178 * sure it was dropped from the socket queue
2179 *
2180 * let's report a short read
2181 */
2182 err = 0;
2183 break;
2184 }
2185
2159 /* Mark read part of skb as used */ 2186 /* Mark read part of skb as used */
2160 if (!(flags & MSG_PEEK)) { 2187 if (!(flags & MSG_PEEK)) {
2161 UNIXCB(skb).consumed += chunk; 2188 UNIXCB(skb).consumed += chunk;
@@ -2179,14 +2206,12 @@ unlock:
2179 if (UNIXCB(skb).fp) 2206 if (UNIXCB(skb).fp)
2180 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2207 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2181 2208
2182 if (skip) { 2209 sk_peek_offset_fwd(sk, chunk);
2183 sk_peek_offset_fwd(sk, chunk);
2184 skip -= chunk;
2185 }
2186 2210
2187 if (UNIXCB(skb).fp) 2211 if (UNIXCB(skb).fp)
2188 break; 2212 break;
2189 2213
2214 skip = 0;
2190 last = skb; 2215 last = skb;
2191 last_len = skb->len; 2216 last_len = skb->len;
2192 unix_state_lock(sk); 2217 unix_state_lock(sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index df5fc6b340f1..7fd1220fbfa0 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,19 +36,20 @@
36 * not support simultaneous connects (two "client" sockets connecting). 36 * not support simultaneous connects (two "client" sockets connecting).
37 * 37 *
38 * - "Server" sockets are referred to as listener sockets throughout this 38 * - "Server" sockets are referred to as listener sockets throughout this
39 * implementation because they are in the SS_LISTEN state. When a connection 39 * implementation because they are in the VSOCK_SS_LISTEN state. When a
40 * request is received (the second kind of socket mentioned above), we create a 40 * connection request is received (the second kind of socket mentioned above),
41 * new socket and refer to it as a pending socket. These pending sockets are 41 * we create a new socket and refer to it as a pending socket. These pending
42 * placed on the pending connection list of the listener socket. When future 42 * sockets are placed on the pending connection list of the listener socket.
43 * packets are received for the address the listener socket is bound to, we 43 * When future packets are received for the address the listener socket is
44 * check if the source of the packet is from one that has an existing pending 44 * bound to, we check if the source of the packet is from one that has an
45 * connection. If it does, we process the packet for the pending socket. When 45 * existing pending connection. If it does, we process the packet for the
46 * that socket reaches the connected state, it is removed from the listener 46 * pending socket. When that socket reaches the connected state, it is removed
47 * socket's pending list and enqueued in the listener socket's accept queue. 47 * from the listener socket's pending list and enqueued in the listener
48 * Callers of accept(2) will accept connected sockets from the listener socket's 48 * socket's accept queue. Callers of accept(2) will accept connected sockets
49 * accept queue. If the socket cannot be accepted for some reason then it is 49 * from the listener socket's accept queue. If the socket cannot be accepted
50 * marked rejected. Once the connection is accepted, it is owned by the user 50 * for some reason then it is marked rejected. Once the connection is
51 * process and the responsibility for cleanup falls with that user process. 51 * accepted, it is owned by the user process and the responsibility for cleanup
52 * falls with that user process.
52 * 53 *
53 * - It is possible that these pending sockets will never reach the connected 54 * - It is possible that these pending sockets will never reach the connected
54 * state; in fact, we may never receive another packet after the connection 55 * state; in fact, we may never receive another packet after the connection
@@ -114,8 +115,6 @@ static struct proto vsock_proto = {
114 */ 115 */
115#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) 116#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
116 117
117#define SS_LISTEN 255
118
119static const struct vsock_transport *transport; 118static const struct vsock_transport *transport;
120static DEFINE_MUTEX(vsock_register_mutex); 119static DEFINE_MUTEX(vsock_register_mutex);
121 120
@@ -887,7 +886,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
887 /* Listening sockets that have connections in their accept 886 /* Listening sockets that have connections in their accept
888 * queue can be read. 887 * queue can be read.
889 */ 888 */
890 if (sk->sk_state == SS_LISTEN 889 if (sk->sk_state == VSOCK_SS_LISTEN
891 && !vsock_is_accept_queue_empty(sk)) 890 && !vsock_is_accept_queue_empty(sk))
892 mask |= POLLIN | POLLRDNORM; 891 mask |= POLLIN | POLLRDNORM;
893 892
@@ -1144,7 +1143,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1144 err = -EALREADY; 1143 err = -EALREADY;
1145 break; 1144 break;
1146 default: 1145 default:
1147 if ((sk->sk_state == SS_LISTEN) || 1146 if ((sk->sk_state == VSOCK_SS_LISTEN) ||
1148 vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { 1147 vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
1149 err = -EINVAL; 1148 err = -EINVAL;
1150 goto out; 1149 goto out;
@@ -1256,7 +1255,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
1256 goto out; 1255 goto out;
1257 } 1256 }
1258 1257
1259 if (listener->sk_state != SS_LISTEN) { 1258 if (listener->sk_state != VSOCK_SS_LISTEN) {
1260 err = -EINVAL; 1259 err = -EINVAL;
1261 goto out; 1260 goto out;
1262 } 1261 }
@@ -1348,7 +1347,7 @@ static int vsock_listen(struct socket *sock, int backlog)
1348 } 1347 }
1349 1348
1350 sk->sk_max_ack_backlog = backlog; 1349 sk->sk_max_ack_backlog = backlog;
1351 sk->sk_state = SS_LISTEN; 1350 sk->sk_state = VSOCK_SS_LISTEN;
1352 1351
1353 err = 0; 1352 err = 0;
1354 1353
@@ -1948,13 +1947,13 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
1948 err = misc_register(&vsock_device); 1947 err = misc_register(&vsock_device);
1949 if (err) { 1948 if (err) {
1950 pr_err("Failed to register misc device\n"); 1949 pr_err("Failed to register misc device\n");
1951 return -ENOENT; 1950 goto err_reset_transport;
1952 } 1951 }
1953 1952
1954 err = proto_register(&vsock_proto, 1); /* we want our slab */ 1953 err = proto_register(&vsock_proto, 1); /* we want our slab */
1955 if (err) { 1954 if (err) {
1956 pr_err("Cannot register vsock protocol\n"); 1955 pr_err("Cannot register vsock protocol\n");
1957 goto err_misc_deregister; 1956 goto err_deregister_misc;
1958 } 1957 }
1959 1958
1960 err = sock_register(&vsock_family_ops); 1959 err = sock_register(&vsock_family_ops);
@@ -1969,8 +1968,9 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
1969 1968
1970err_unregister_proto: 1969err_unregister_proto:
1971 proto_unregister(&vsock_proto); 1970 proto_unregister(&vsock_proto);
1972err_misc_deregister: 1971err_deregister_misc:
1973 misc_deregister(&vsock_device); 1972 misc_deregister(&vsock_device);
1973err_reset_transport:
1974 transport = NULL; 1974 transport = NULL;
1975err_busy: 1975err_busy:
1976 mutex_unlock(&vsock_register_mutex); 1976 mutex_unlock(&vsock_register_mutex);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 1f63daff3965..0a369bb440e7 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -40,13 +40,11 @@
40 40
41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); 41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
42static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); 42static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
43static void vmci_transport_peer_attach_cb(u32 sub_id,
44 const struct vmci_event_data *ed,
45 void *client_data);
46static void vmci_transport_peer_detach_cb(u32 sub_id, 43static void vmci_transport_peer_detach_cb(u32 sub_id,
47 const struct vmci_event_data *ed, 44 const struct vmci_event_data *ed,
48 void *client_data); 45 void *client_data);
49static void vmci_transport_recv_pkt_work(struct work_struct *work); 46static void vmci_transport_recv_pkt_work(struct work_struct *work);
47static void vmci_transport_cleanup(struct work_struct *work);
50static int vmci_transport_recv_listen(struct sock *sk, 48static int vmci_transport_recv_listen(struct sock *sk,
51 struct vmci_transport_packet *pkt); 49 struct vmci_transport_packet *pkt);
52static int vmci_transport_recv_connecting_server( 50static int vmci_transport_recv_connecting_server(
@@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info {
75 struct vmci_transport_packet pkt; 73 struct vmci_transport_packet pkt;
76}; 74};
77 75
76static LIST_HEAD(vmci_transport_cleanup_list);
77static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
78static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
79
78static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, 80static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
79 VMCI_INVALID_ID }; 81 VMCI_INVALID_ID };
80static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; 82static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
@@ -90,8 +92,6 @@ static int PROTOCOL_OVERRIDE = -1;
90 */ 92 */
91#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) 93#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
92 94
93#define SS_LISTEN 255
94
95/* Helper function to convert from a VMCI error code to a VSock error code. */ 95/* Helper function to convert from a VMCI error code to a VSock error code. */
96 96
97static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) 97static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -791,44 +791,6 @@ out:
791 return err; 791 return err;
792} 792}
793 793
794static void vmci_transport_peer_attach_cb(u32 sub_id,
795 const struct vmci_event_data *e_data,
796 void *client_data)
797{
798 struct sock *sk = client_data;
799 const struct vmci_event_payload_qp *e_payload;
800 struct vsock_sock *vsk;
801
802 e_payload = vmci_event_data_const_payload(e_data);
803
804 vsk = vsock_sk(sk);
805
806 /* We don't ask for delayed CBs when we subscribe to this event (we
807 * pass 0 as flags to vmci_event_subscribe()). VMCI makes no
808 * guarantees in that case about what context we might be running in,
809 * so it could be BH or process, blockable or non-blockable. So we
810 * need to account for all possible contexts here.
811 */
812 local_bh_disable();
813 bh_lock_sock(sk);
814
815 /* XXX This is lame, we should provide a way to lookup sockets by
816 * qp_handle.
817 */
818 if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
819 e_payload->handle)) {
820 /* XXX This doesn't do anything, but in the future we may want
821 * to set a flag here to verify the attach really did occur and
822 * we weren't just sent a datagram claiming it was.
823 */
824 goto out;
825 }
826
827out:
828 bh_unlock_sock(sk);
829 local_bh_enable();
830}
831
832static void vmci_transport_handle_detach(struct sock *sk) 794static void vmci_transport_handle_detach(struct sock *sk)
833{ 795{
834 struct vsock_sock *vsk; 796 struct vsock_sock *vsk;
@@ -871,28 +833,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
871 const struct vmci_event_data *e_data, 833 const struct vmci_event_data *e_data,
872 void *client_data) 834 void *client_data)
873{ 835{
874 struct sock *sk = client_data; 836 struct vmci_transport *trans = client_data;
875 const struct vmci_event_payload_qp *e_payload; 837 const struct vmci_event_payload_qp *e_payload;
876 struct vsock_sock *vsk;
877 838
878 e_payload = vmci_event_data_const_payload(e_data); 839 e_payload = vmci_event_data_const_payload(e_data);
879 vsk = vsock_sk(sk);
880 if (vmci_handle_is_invalid(e_payload->handle))
881 return;
882
883 /* Same rules for locking as for peer_attach_cb(). */
884 local_bh_disable();
885 bh_lock_sock(sk);
886 840
887 /* XXX This is lame, we should provide a way to lookup sockets by 841 /* XXX This is lame, we should provide a way to lookup sockets by
888 * qp_handle. 842 * qp_handle.
889 */ 843 */
890 if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, 844 if (vmci_handle_is_invalid(e_payload->handle) ||
891 e_payload->handle)) 845 vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
892 vmci_transport_handle_detach(sk); 846 return;
893 847
894 bh_unlock_sock(sk); 848 /* We don't ask for delayed CBs when we subscribe to this event (we
895 local_bh_enable(); 849 * pass 0 as flags to vmci_event_subscribe()). VMCI makes no
850 * guarantees in that case about what context we might be running in,
851 * so it could be BH or process, blockable or non-blockable. So we
852 * need to account for all possible contexts here.
853 */
854 spin_lock_bh(&trans->lock);
855 if (!trans->sk)
856 goto out;
857
858 /* Apart from here, trans->lock is only grabbed as part of sk destruct,
859 * where trans->sk isn't locked.
860 */
861 bh_lock_sock(trans->sk);
862
863 vmci_transport_handle_detach(trans->sk);
864
865 bh_unlock_sock(trans->sk);
866 out:
867 spin_unlock_bh(&trans->lock);
896} 868}
897 869
898static void vmci_transport_qp_resumed_cb(u32 sub_id, 870static void vmci_transport_qp_resumed_cb(u32 sub_id,
@@ -919,7 +891,7 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
919 vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; 891 vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
920 892
921 switch (sk->sk_state) { 893 switch (sk->sk_state) {
922 case SS_LISTEN: 894 case VSOCK_SS_LISTEN:
923 vmci_transport_recv_listen(sk, pkt); 895 vmci_transport_recv_listen(sk, pkt);
924 break; 896 break;
925 case SS_CONNECTING: 897 case SS_CONNECTING:
@@ -1181,7 +1153,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
1181 */ 1153 */
1182 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, 1154 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1183 vmci_transport_peer_detach_cb, 1155 vmci_transport_peer_detach_cb,
1184 pending, &detach_sub_id); 1156 vmci_trans(vpending), &detach_sub_id);
1185 if (err < VMCI_SUCCESS) { 1157 if (err < VMCI_SUCCESS) {
1186 vmci_transport_send_reset(pending, pkt); 1158 vmci_transport_send_reset(pending, pkt);
1187 err = vmci_transport_error_to_vsock_error(err); 1159 err = vmci_transport_error_to_vsock_error(err);
@@ -1262,7 +1234,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
1262 /* Callers of accept() will be be waiting on the listening socket, not 1234 /* Callers of accept() will be be waiting on the listening socket, not
1263 * the pending socket. 1235 * the pending socket.
1264 */ 1236 */
1265 listener->sk_state_change(listener); 1237 listener->sk_data_ready(listener);
1266 1238
1267 return 0; 1239 return 0;
1268 1240
@@ -1321,7 +1293,6 @@ vmci_transport_recv_connecting_client(struct sock *sk,
1321 || vmci_trans(vsk)->qpair 1293 || vmci_trans(vsk)->qpair
1322 || vmci_trans(vsk)->produce_size != 0 1294 || vmci_trans(vsk)->produce_size != 0
1323 || vmci_trans(vsk)->consume_size != 0 1295 || vmci_trans(vsk)->consume_size != 0
1324 || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
1325 || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { 1296 || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1326 skerr = EPROTO; 1297 skerr = EPROTO;
1327 err = -EINVAL; 1298 err = -EINVAL;
@@ -1389,7 +1360,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
1389 struct vsock_sock *vsk; 1360 struct vsock_sock *vsk;
1390 struct vmci_handle handle; 1361 struct vmci_handle handle;
1391 struct vmci_qp *qpair; 1362 struct vmci_qp *qpair;
1392 u32 attach_sub_id;
1393 u32 detach_sub_id; 1363 u32 detach_sub_id;
1394 bool is_local; 1364 bool is_local;
1395 u32 flags; 1365 u32 flags;
@@ -1399,7 +1369,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
1399 1369
1400 vsk = vsock_sk(sk); 1370 vsk = vsock_sk(sk);
1401 handle = VMCI_INVALID_HANDLE; 1371 handle = VMCI_INVALID_HANDLE;
1402 attach_sub_id = VMCI_INVALID_ID;
1403 detach_sub_id = VMCI_INVALID_ID; 1372 detach_sub_id = VMCI_INVALID_ID;
1404 1373
1405 /* If we have gotten here then we should be past the point where old 1374 /* If we have gotten here then we should be past the point where old
@@ -1444,23 +1413,15 @@ static int vmci_transport_recv_connecting_client_negotiate(
1444 goto destroy; 1413 goto destroy;
1445 } 1414 }
1446 1415
1447 /* Subscribe to attach and detach events first. 1416 /* Subscribe to detach events first.
1448 * 1417 *
1449 * XXX We attach once for each queue pair created for now so it is easy 1418 * XXX We attach once for each queue pair created for now so it is easy
1450 * to find the socket (it's provided), but later we should only 1419 * to find the socket (it's provided), but later we should only
1451 * subscribe once and add a way to lookup sockets by queue pair handle. 1420 * subscribe once and add a way to lookup sockets by queue pair handle.
1452 */ 1421 */
1453 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
1454 vmci_transport_peer_attach_cb,
1455 sk, &attach_sub_id);
1456 if (err < VMCI_SUCCESS) {
1457 err = vmci_transport_error_to_vsock_error(err);
1458 goto destroy;
1459 }
1460
1461 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, 1422 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1462 vmci_transport_peer_detach_cb, 1423 vmci_transport_peer_detach_cb,
1463 sk, &detach_sub_id); 1424 vmci_trans(vsk), &detach_sub_id);
1464 if (err < VMCI_SUCCESS) { 1425 if (err < VMCI_SUCCESS) {
1465 err = vmci_transport_error_to_vsock_error(err); 1426 err = vmci_transport_error_to_vsock_error(err);
1466 goto destroy; 1427 goto destroy;
@@ -1496,7 +1457,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
1496 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 1457 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
1497 pkt->u.size; 1458 pkt->u.size;
1498 1459
1499 vmci_trans(vsk)->attach_sub_id = attach_sub_id;
1500 vmci_trans(vsk)->detach_sub_id = detach_sub_id; 1460 vmci_trans(vsk)->detach_sub_id = detach_sub_id;
1501 1461
1502 vmci_trans(vsk)->notify_ops->process_negotiate(sk); 1462 vmci_trans(vsk)->notify_ops->process_negotiate(sk);
@@ -1504,9 +1464,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
1504 return 0; 1464 return 0;
1505 1465
1506destroy: 1466destroy:
1507 if (attach_sub_id != VMCI_INVALID_ID)
1508 vmci_event_unsubscribe(attach_sub_id);
1509
1510 if (detach_sub_id != VMCI_INVALID_ID) 1467 if (detach_sub_id != VMCI_INVALID_ID)
1511 vmci_event_unsubscribe(detach_sub_id); 1468 vmci_event_unsubscribe(detach_sub_id);
1512 1469
@@ -1607,9 +1564,11 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
1607 vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; 1564 vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1608 vmci_trans(vsk)->qpair = NULL; 1565 vmci_trans(vsk)->qpair = NULL;
1609 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; 1566 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
1610 vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = 1567 vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
1611 VMCI_INVALID_ID;
1612 vmci_trans(vsk)->notify_ops = NULL; 1568 vmci_trans(vsk)->notify_ops = NULL;
1569 INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
1570 vmci_trans(vsk)->sk = &vsk->sk;
1571 spin_lock_init(&vmci_trans(vsk)->lock);
1613 if (psk) { 1572 if (psk) {
1614 vmci_trans(vsk)->queue_pair_size = 1573 vmci_trans(vsk)->queue_pair_size =
1615 vmci_trans(psk)->queue_pair_size; 1574 vmci_trans(psk)->queue_pair_size;
@@ -1629,29 +1588,57 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
1629 return 0; 1588 return 0;
1630} 1589}
1631 1590
1632static void vmci_transport_destruct(struct vsock_sock *vsk) 1591static void vmci_transport_free_resources(struct list_head *transport_list)
1633{ 1592{
1634 if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { 1593 while (!list_empty(transport_list)) {
1635 vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); 1594 struct vmci_transport *transport =
1636 vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; 1595 list_first_entry(transport_list, struct vmci_transport,
1637 } 1596 elem);
1597 list_del(&transport->elem);
1638 1598
1639 if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { 1599 if (transport->detach_sub_id != VMCI_INVALID_ID) {
1640 vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); 1600 vmci_event_unsubscribe(transport->detach_sub_id);
1641 vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; 1601 transport->detach_sub_id = VMCI_INVALID_ID;
1642 } 1602 }
1643 1603
1644 if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { 1604 if (!vmci_handle_is_invalid(transport->qp_handle)) {
1645 vmci_qpair_detach(&vmci_trans(vsk)->qpair); 1605 vmci_qpair_detach(&transport->qpair);
1646 vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; 1606 transport->qp_handle = VMCI_INVALID_HANDLE;
1647 vmci_trans(vsk)->produce_size = 0; 1607 transport->produce_size = 0;
1648 vmci_trans(vsk)->consume_size = 0; 1608 transport->consume_size = 0;
1609 }
1610
1611 kfree(transport);
1649 } 1612 }
1613}
1614
1615static void vmci_transport_cleanup(struct work_struct *work)
1616{
1617 LIST_HEAD(pending);
1618
1619 spin_lock_bh(&vmci_transport_cleanup_lock);
1620 list_replace_init(&vmci_transport_cleanup_list, &pending);
1621 spin_unlock_bh(&vmci_transport_cleanup_lock);
1622 vmci_transport_free_resources(&pending);
1623}
1624
1625static void vmci_transport_destruct(struct vsock_sock *vsk)
1626{
1627 /* Ensure that the detach callback doesn't use the sk/vsk
1628 * we are about to destruct.
1629 */
1630 spin_lock_bh(&vmci_trans(vsk)->lock);
1631 vmci_trans(vsk)->sk = NULL;
1632 spin_unlock_bh(&vmci_trans(vsk)->lock);
1650 1633
1651 if (vmci_trans(vsk)->notify_ops) 1634 if (vmci_trans(vsk)->notify_ops)
1652 vmci_trans(vsk)->notify_ops->socket_destruct(vsk); 1635 vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
1653 1636
1654 kfree(vsk->trans); 1637 spin_lock_bh(&vmci_transport_cleanup_lock);
1638 list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list);
1639 spin_unlock_bh(&vmci_transport_cleanup_lock);
1640 schedule_work(&vmci_transport_cleanup_work);
1641
1655 vsk->trans = NULL; 1642 vsk->trans = NULL;
1656} 1643}
1657 1644
@@ -2146,6 +2133,9 @@ module_init(vmci_transport_init);
2146 2133
2147static void __exit vmci_transport_exit(void) 2134static void __exit vmci_transport_exit(void)
2148{ 2135{
2136 cancel_work_sync(&vmci_transport_cleanup_work);
2137 vmci_transport_free_resources(&vmci_transport_cleanup_list);
2138
2149 if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { 2139 if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
2150 if (vmci_datagram_destroy_handle( 2140 if (vmci_datagram_destroy_handle(
2151 vmci_transport_stream_handle) != VMCI_SUCCESS) 2141 vmci_transport_stream_handle) != VMCI_SUCCESS)
@@ -2164,6 +2154,7 @@ module_exit(vmci_transport_exit);
2164 2154
2165MODULE_AUTHOR("VMware, Inc."); 2155MODULE_AUTHOR("VMware, Inc.");
2166MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); 2156MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2157MODULE_VERSION("1.0.2.0-k");
2167MODULE_LICENSE("GPL v2"); 2158MODULE_LICENSE("GPL v2");
2168MODULE_ALIAS("vmware_vsock"); 2159MODULE_ALIAS("vmware_vsock");
2169MODULE_ALIAS_NETPROTO(PF_VSOCK); 2160MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index ce6c9623d5f0..2ad46f39649f 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -119,10 +119,12 @@ struct vmci_transport {
119 u64 queue_pair_size; 119 u64 queue_pair_size;
120 u64 queue_pair_min_size; 120 u64 queue_pair_min_size;
121 u64 queue_pair_max_size; 121 u64 queue_pair_max_size;
122 u32 attach_sub_id;
123 u32 detach_sub_id; 122 u32 detach_sub_id;
124 union vmci_transport_notify notify; 123 union vmci_transport_notify notify;
125 struct vmci_transport_notify_ops *notify_ops; 124 struct vmci_transport_notify_ops *notify_ops;
125 struct list_head elem;
126 struct sock *sk;
127 spinlock_t lock; /* protects sk. */
126}; 128};
127 129
128int vmci_transport_register(void); 130int vmci_transport_register(void);
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 4f5543dd2524..da72ed32f143 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -174,6 +174,16 @@ config CFG80211_INTERNAL_REGDB
174 174
175 Most distributions have a CRDA package. So if unsure, say N. 175 Most distributions have a CRDA package. So if unsure, say N.
176 176
177config CFG80211_CRDA_SUPPORT
178 bool "support CRDA" if CFG80211_INTERNAL_REGDB
179 default y
180 depends on CFG80211
181 help
182 You should enable this option unless you know for sure you have no
183 need for it, for example when using internal regdb (above.)
184
185 If unsure, say Y.
186
177config CFG80211_WEXT 187config CFG80211_WEXT
178 bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT 188 bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT
179 depends on CFG80211 189 depends on CFG80211
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3893409dee95..b0915515640e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -419,6 +419,7 @@ use_default_name:
419 device_initialize(&rdev->wiphy.dev); 419 device_initialize(&rdev->wiphy.dev);
420 rdev->wiphy.dev.class = &ieee80211_class; 420 rdev->wiphy.dev.class = &ieee80211_class;
421 rdev->wiphy.dev.platform_data = rdev; 421 rdev->wiphy.dev.platform_data = rdev;
422 device_enable_async_suspend(&rdev->wiphy.dev);
422 423
423 INIT_LIST_HEAD(&rdev->destroy_list); 424 INIT_LIST_HEAD(&rdev->destroy_list);
424 spin_lock_init(&rdev->destroy_list_lock); 425 spin_lock_init(&rdev->destroy_list_lock);
@@ -460,6 +461,9 @@ use_default_name:
460 461
461 rdev->wiphy.max_num_csa_counters = 1; 462 rdev->wiphy.max_num_csa_counters = 1;
462 463
464 rdev->wiphy.max_sched_scan_plans = 1;
465 rdev->wiphy.max_sched_scan_plan_interval = U32_MAX;
466
463 return &rdev->wiphy; 467 return &rdev->wiphy;
464} 468}
465EXPORT_SYMBOL(wiphy_new_nm); 469EXPORT_SYMBOL(wiphy_new_nm);
@@ -635,7 +639,7 @@ int wiphy_register(struct wiphy *wiphy)
635 if (WARN_ON(!sband->n_channels)) 639 if (WARN_ON(!sband->n_channels))
636 return -EINVAL; 640 return -EINVAL;
637 /* 641 /*
638 * on 60gHz band, there are no legacy rates, so 642 * on 60GHz band, there are no legacy rates, so
639 * n_bitrates is 0 643 * n_bitrates is 0
640 */ 644 */
641 if (WARN_ON(band != IEEE80211_BAND_60GHZ && 645 if (WARN_ON(band != IEEE80211_BAND_60GHZ &&
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b9d5bc8c148d..a618b4b86fa4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -137,6 +137,7 @@ struct cfg80211_internal_bss {
137 struct list_head list; 137 struct list_head list;
138 struct list_head hidden_list; 138 struct list_head hidden_list;
139 struct rb_node rbn; 139 struct rb_node rbn;
140 u64 ts_boottime;
140 unsigned long ts; 141 unsigned long ts;
141 unsigned long refcount; 142 unsigned long refcount;
142 atomic_t hold; 143 atomic_t hold;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5d8748b4c8a2..c71e274c810a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
5 * Copyright 2013-2014 Intel Mobile Communications GmbH 5 * Copyright 2013-2014 Intel Mobile Communications GmbH
6 * Copyright 2015 Intel Deutschland GmbH
6 */ 7 */
7 8
8#include <linux/if.h> 9#include <linux/if.h>
@@ -478,6 +479,12 @@ nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
478 [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, 479 [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
479}; 480};
480 481
482static const struct nla_policy
483nl80211_plan_policy[NL80211_SCHED_SCAN_PLAN_MAX + 1] = {
484 [NL80211_SCHED_SCAN_PLAN_INTERVAL] = { .type = NLA_U32 },
485 [NL80211_SCHED_SCAN_PLAN_ITERATIONS] = { .type = NLA_U32 },
486};
487
481static int nl80211_prepare_wdev_dump(struct sk_buff *skb, 488static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
482 struct netlink_callback *cb, 489 struct netlink_callback *cb,
483 struct cfg80211_registered_device **rdev, 490 struct cfg80211_registered_device **rdev,
@@ -1303,7 +1310,13 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
1303 nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, 1310 nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
1304 rdev->wiphy.max_sched_scan_ie_len) || 1311 rdev->wiphy.max_sched_scan_ie_len) ||
1305 nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, 1312 nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
1306 rdev->wiphy.max_match_sets)) 1313 rdev->wiphy.max_match_sets) ||
1314 nla_put_u32(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS,
1315 rdev->wiphy.max_sched_scan_plans) ||
1316 nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL,
1317 rdev->wiphy.max_sched_scan_plan_interval) ||
1318 nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS,
1319 rdev->wiphy.max_sched_scan_plan_iterations))
1307 goto nla_put_failure; 1320 goto nla_put_failure;
1308 1321
1309 if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && 1322 if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
@@ -2403,6 +2416,16 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
2403 } 2416 }
2404 } 2417 }
2405 2418
2419 if (rdev->ops->get_tx_power) {
2420 int dbm, ret;
2421
2422 ret = rdev_get_tx_power(rdev, wdev, &dbm);
2423 if (ret == 0 &&
2424 nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
2425 DBM_TO_MBM(dbm)))
2426 goto nla_put_failure;
2427 }
2428
2406 if (wdev->ssid_len) { 2429 if (wdev->ssid_len) {
2407 if (nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) 2430 if (nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
2408 goto nla_put_failure; 2431 goto nla_put_failure;
@@ -3409,12 +3432,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
3409 wdev->iftype)) 3432 wdev->iftype))
3410 return -EINVAL; 3433 return -EINVAL;
3411 3434
3412 if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
3413 params.acl = parse_acl_data(&rdev->wiphy, info);
3414 if (IS_ERR(params.acl))
3415 return PTR_ERR(params.acl);
3416 }
3417
3418 if (info->attrs[NL80211_ATTR_SMPS_MODE]) { 3435 if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
3419 params.smps_mode = 3436 params.smps_mode =
3420 nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); 3437 nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
@@ -3438,6 +3455,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
3438 params.smps_mode = NL80211_SMPS_OFF; 3455 params.smps_mode = NL80211_SMPS_OFF;
3439 } 3456 }
3440 3457
3458 if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
3459 params.acl = parse_acl_data(&rdev->wiphy, info);
3460 if (IS_ERR(params.acl))
3461 return PTR_ERR(params.acl);
3462 }
3463
3441 wdev_lock(wdev); 3464 wdev_lock(wdev);
3442 err = rdev_start_ap(rdev, dev, &params); 3465 err = rdev_start_ap(rdev, dev, &params);
3443 if (!err) { 3466 if (!err) {
@@ -3945,10 +3968,13 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
3945 struct station_parameters *params, 3968 struct station_parameters *params,
3946 enum cfg80211_station_type statype) 3969 enum cfg80211_station_type statype)
3947{ 3970{
3948 if (params->listen_interval != -1) 3971 if (params->listen_interval != -1 &&
3972 statype != CFG80211_STA_AP_CLIENT_UNASSOC)
3949 return -EINVAL; 3973 return -EINVAL;
3974
3950 if (params->aid && 3975 if (params->aid &&
3951 !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) 3976 !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
3977 statype != CFG80211_STA_AP_CLIENT_UNASSOC)
3952 return -EINVAL; 3978 return -EINVAL;
3953 3979
3954 /* When you run into this, adjust the code below for the new flag */ 3980 /* When you run into this, adjust the code below for the new flag */
@@ -3998,7 +4024,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
3998 params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); 4024 params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3999 } 4025 }
4000 4026
4001 if (statype != CFG80211_STA_TDLS_PEER_SETUP) { 4027 if (statype != CFG80211_STA_TDLS_PEER_SETUP &&
4028 statype != CFG80211_STA_AP_CLIENT_UNASSOC) {
4002 /* reject other things that can't change */ 4029 /* reject other things that can't change */
4003 if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) 4030 if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD)
4004 return -EINVAL; 4031 return -EINVAL;
@@ -4010,7 +4037,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
4010 return -EINVAL; 4037 return -EINVAL;
4011 } 4038 }
4012 4039
4013 if (statype != CFG80211_STA_AP_CLIENT) { 4040 if (statype != CFG80211_STA_AP_CLIENT &&
4041 statype != CFG80211_STA_AP_CLIENT_UNASSOC) {
4014 if (params->vlan) 4042 if (params->vlan)
4015 return -EINVAL; 4043 return -EINVAL;
4016 } 4044 }
@@ -4022,6 +4050,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
4022 return -EOPNOTSUPP; 4050 return -EOPNOTSUPP;
4023 break; 4051 break;
4024 case CFG80211_STA_AP_CLIENT: 4052 case CFG80211_STA_AP_CLIENT:
4053 case CFG80211_STA_AP_CLIENT_UNASSOC:
4025 /* accept only the listed bits */ 4054 /* accept only the listed bits */
4026 if (params->sta_flags_mask & 4055 if (params->sta_flags_mask &
4027 ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | 4056 ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
@@ -4219,13 +4248,22 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
4219 4248
4220 memset(&params, 0, sizeof(params)); 4249 memset(&params, 0, sizeof(params));
4221 4250
4222 params.listen_interval = -1;
4223
4224 if (!rdev->ops->change_station) 4251 if (!rdev->ops->change_station)
4225 return -EOPNOTSUPP; 4252 return -EOPNOTSUPP;
4226 4253
4227 if (info->attrs[NL80211_ATTR_STA_AID]) 4254 /*
4228 return -EINVAL; 4255 * AID and listen_interval properties can be set only for unassociated
4256 * station. Include these parameters here and will check them in
4257 * cfg80211_check_station_change().
4258 */
4259 if (info->attrs[NL80211_ATTR_PEER_AID])
4260 params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]);
4261
4262 if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
4263 params.listen_interval =
4264 nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
4265 else
4266 params.listen_interval = -1;
4229 4267
4230 if (!info->attrs[NL80211_ATTR_MAC]) 4268 if (!info->attrs[NL80211_ATTR_MAC])
4231 return -EINVAL; 4269 return -EINVAL;
@@ -4252,9 +4290,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
4252 nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); 4290 nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]);
4253 } 4291 }
4254 4292
4255 if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
4256 return -EINVAL;
4257
4258 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params)) 4293 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
4259 return -EINVAL; 4294 return -EINVAL;
4260 4295
@@ -4918,56 +4953,6 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
4918 return err; 4953 return err;
4919} 4954}
4920 4955
4921static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
4922 [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 },
4923 [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 },
4924 [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 },
4925 [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 },
4926 [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 },
4927 [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 },
4928 [NL80211_ATTR_DFS_CAC_TIME] = { .type = NLA_U32 },
4929};
4930
4931static int parse_reg_rule(struct nlattr *tb[],
4932 struct ieee80211_reg_rule *reg_rule)
4933{
4934 struct ieee80211_freq_range *freq_range = &reg_rule->freq_range;
4935 struct ieee80211_power_rule *power_rule = &reg_rule->power_rule;
4936
4937 if (!tb[NL80211_ATTR_REG_RULE_FLAGS])
4938 return -EINVAL;
4939 if (!tb[NL80211_ATTR_FREQ_RANGE_START])
4940 return -EINVAL;
4941 if (!tb[NL80211_ATTR_FREQ_RANGE_END])
4942 return -EINVAL;
4943 if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
4944 return -EINVAL;
4945 if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP])
4946 return -EINVAL;
4947
4948 reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]);
4949
4950 freq_range->start_freq_khz =
4951 nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]);
4952 freq_range->end_freq_khz =
4953 nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]);
4954 freq_range->max_bandwidth_khz =
4955 nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
4956
4957 power_rule->max_eirp =
4958 nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]);
4959
4960 if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN])
4961 power_rule->max_antenna_gain =
4962 nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]);
4963
4964 if (tb[NL80211_ATTR_DFS_CAC_TIME])
4965 reg_rule->dfs_cac_ms =
4966 nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]);
4967
4968 return 0;
4969}
4970
4971static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) 4956static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
4972{ 4957{
4973 char *data = NULL; 4958 char *data = NULL;
@@ -5599,6 +5584,57 @@ out_err:
5599 return err; 5584 return err;
5600} 5585}
5601 5586
5587#ifdef CONFIG_CFG80211_CRDA_SUPPORT
5588static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
5589 [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 },
5590 [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 },
5591 [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 },
5592 [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 },
5593 [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 },
5594 [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 },
5595 [NL80211_ATTR_DFS_CAC_TIME] = { .type = NLA_U32 },
5596};
5597
5598static int parse_reg_rule(struct nlattr *tb[],
5599 struct ieee80211_reg_rule *reg_rule)
5600{
5601 struct ieee80211_freq_range *freq_range = &reg_rule->freq_range;
5602 struct ieee80211_power_rule *power_rule = &reg_rule->power_rule;
5603
5604 if (!tb[NL80211_ATTR_REG_RULE_FLAGS])
5605 return -EINVAL;
5606 if (!tb[NL80211_ATTR_FREQ_RANGE_START])
5607 return -EINVAL;
5608 if (!tb[NL80211_ATTR_FREQ_RANGE_END])
5609 return -EINVAL;
5610 if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
5611 return -EINVAL;
5612 if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP])
5613 return -EINVAL;
5614
5615 reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]);
5616
5617 freq_range->start_freq_khz =
5618 nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]);
5619 freq_range->end_freq_khz =
5620 nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]);
5621 freq_range->max_bandwidth_khz =
5622 nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
5623
5624 power_rule->max_eirp =
5625 nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]);
5626
5627 if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN])
5628 power_rule->max_antenna_gain =
5629 nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]);
5630
5631 if (tb[NL80211_ATTR_DFS_CAC_TIME])
5632 reg_rule->dfs_cac_ms =
5633 nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]);
5634
5635 return 0;
5636}
5637
5602static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) 5638static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
5603{ 5639{
5604 struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; 5640 struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
@@ -5675,6 +5711,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
5675 kfree(rd); 5711 kfree(rd);
5676 return r; 5712 return r;
5677} 5713}
5714#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
5678 5715
5679static int validate_scan_freqs(struct nlattr *freqs) 5716static int validate_scan_freqs(struct nlattr *freqs)
5680{ 5717{
@@ -5960,14 +5997,100 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
5960 return err; 5997 return err;
5961} 5998}
5962 5999
6000static int
6001nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
6002 struct cfg80211_sched_scan_request *request,
6003 struct nlattr **attrs)
6004{
6005 int tmp, err, i = 0;
6006 struct nlattr *attr;
6007
6008 if (!attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) {
6009 u32 interval;
6010
6011 /*
6012 * If scan plans are not specified,
6013 * %NL80211_ATTR_SCHED_SCAN_INTERVAL must be specified. In this
6014 * case one scan plan will be set with the specified scan
6015 * interval and infinite number of iterations.
6016 */
6017 if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
6018 return -EINVAL;
6019
6020 interval = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]);
6021 if (!interval)
6022 return -EINVAL;
6023
6024 request->scan_plans[0].interval =
6025 DIV_ROUND_UP(interval, MSEC_PER_SEC);
6026 if (!request->scan_plans[0].interval)
6027 return -EINVAL;
6028
6029 if (request->scan_plans[0].interval >
6030 wiphy->max_sched_scan_plan_interval)
6031 request->scan_plans[0].interval =
6032 wiphy->max_sched_scan_plan_interval;
6033
6034 return 0;
6035 }
6036
6037 nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp) {
6038 struct nlattr *plan[NL80211_SCHED_SCAN_PLAN_MAX + 1];
6039
6040 if (WARN_ON(i >= n_plans))
6041 return -EINVAL;
6042
6043 err = nla_parse(plan, NL80211_SCHED_SCAN_PLAN_MAX,
6044 nla_data(attr), nla_len(attr),
6045 nl80211_plan_policy);
6046 if (err)
6047 return err;
6048
6049 if (!plan[NL80211_SCHED_SCAN_PLAN_INTERVAL])
6050 return -EINVAL;
6051
6052 request->scan_plans[i].interval =
6053 nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_INTERVAL]);
6054 if (!request->scan_plans[i].interval ||
6055 request->scan_plans[i].interval >
6056 wiphy->max_sched_scan_plan_interval)
6057 return -EINVAL;
6058
6059 if (plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]) {
6060 request->scan_plans[i].iterations =
6061 nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]);
6062 if (!request->scan_plans[i].iterations ||
6063 (request->scan_plans[i].iterations >
6064 wiphy->max_sched_scan_plan_iterations))
6065 return -EINVAL;
6066 } else if (i < n_plans - 1) {
6067 /*
6068 * All scan plans but the last one must specify
6069 * a finite number of iterations
6070 */
6071 return -EINVAL;
6072 }
6073
6074 i++;
6075 }
6076
6077 /*
6078 * The last scan plan must not specify the number of
6079 * iterations, it is supposed to run infinitely
6080 */
6081 if (request->scan_plans[n_plans - 1].iterations)
6082 return -EINVAL;
6083
6084 return 0;
6085}
6086
5963static struct cfg80211_sched_scan_request * 6087static struct cfg80211_sched_scan_request *
5964nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, 6088nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
5965 struct nlattr **attrs) 6089 struct nlattr **attrs)
5966{ 6090{
5967 struct cfg80211_sched_scan_request *request; 6091 struct cfg80211_sched_scan_request *request;
5968 struct nlattr *attr; 6092 struct nlattr *attr;
5969 int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i; 6093 int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0;
5970 u32 interval;
5971 enum ieee80211_band band; 6094 enum ieee80211_band band;
5972 size_t ie_len; 6095 size_t ie_len;
5973 struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; 6096 struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1];
@@ -5976,13 +6099,6 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
5976 if (!is_valid_ie_attr(attrs[NL80211_ATTR_IE])) 6099 if (!is_valid_ie_attr(attrs[NL80211_ATTR_IE]))
5977 return ERR_PTR(-EINVAL); 6100 return ERR_PTR(-EINVAL);
5978 6101
5979 if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
5980 return ERR_PTR(-EINVAL);
5981
5982 interval = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]);
5983 if (interval == 0)
5984 return ERR_PTR(-EINVAL);
5985
5986 if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { 6102 if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
5987 n_channels = validate_scan_freqs( 6103 n_channels = validate_scan_freqs(
5988 attrs[NL80211_ATTR_SCAN_FREQUENCIES]); 6104 attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
@@ -6046,9 +6162,37 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
6046 if (ie_len > wiphy->max_sched_scan_ie_len) 6162 if (ie_len > wiphy->max_sched_scan_ie_len)
6047 return ERR_PTR(-EINVAL); 6163 return ERR_PTR(-EINVAL);
6048 6164
6165 if (attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) {
6166 /*
6167 * NL80211_ATTR_SCHED_SCAN_INTERVAL must not be specified since
6168 * each scan plan already specifies its own interval
6169 */
6170 if (attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
6171 return ERR_PTR(-EINVAL);
6172
6173 nla_for_each_nested(attr,
6174 attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp)
6175 n_plans++;
6176 } else {
6177 /*
6178 * The scan interval attribute is kept for backward
6179 * compatibility. If no scan plans are specified and sched scan
6180 * interval is specified, one scan plan will be set with this
6181 * scan interval and infinite number of iterations.
6182 */
6183 if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
6184 return ERR_PTR(-EINVAL);
6185
6186 n_plans = 1;
6187 }
6188
6189 if (!n_plans || n_plans > wiphy->max_sched_scan_plans)
6190 return ERR_PTR(-EINVAL);
6191
6049 request = kzalloc(sizeof(*request) 6192 request = kzalloc(sizeof(*request)
6050 + sizeof(*request->ssids) * n_ssids 6193 + sizeof(*request->ssids) * n_ssids
6051 + sizeof(*request->match_sets) * n_match_sets 6194 + sizeof(*request->match_sets) * n_match_sets
6195 + sizeof(*request->scan_plans) * n_plans
6052 + sizeof(*request->channels) * n_channels 6196 + sizeof(*request->channels) * n_channels
6053 + ie_len, GFP_KERNEL); 6197 + ie_len, GFP_KERNEL);
6054 if (!request) 6198 if (!request)
@@ -6076,6 +6220,18 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
6076 } 6220 }
6077 request->n_match_sets = n_match_sets; 6221 request->n_match_sets = n_match_sets;
6078 6222
6223 if (n_match_sets)
6224 request->scan_plans = (void *)(request->match_sets +
6225 n_match_sets);
6226 else if (request->ie)
6227 request->scan_plans = (void *)(request->ie + ie_len);
6228 else if (n_ssids)
6229 request->scan_plans = (void *)(request->ssids + n_ssids);
6230 else
6231 request->scan_plans = (void *)(request->channels + n_channels);
6232
6233 request->n_scan_plans = n_plans;
6234
6079 i = 0; 6235 i = 0;
6080 if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { 6236 if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
6081 /* user specified, bail out if channel not found */ 6237 /* user specified, bail out if channel not found */
@@ -6238,7 +6394,10 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
6238 request->delay = 6394 request->delay =
6239 nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]); 6395 nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]);
6240 6396
6241 request->interval = interval; 6397 err = nl80211_parse_sched_scan_plans(wiphy, n_plans, request, attrs);
6398 if (err)
6399 goto out_free;
6400
6242 request->scan_start = jiffies; 6401 request->scan_start = jiffies;
6243 6402
6244 return request; 6403 return request;
@@ -6591,6 +6750,11 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
6591 jiffies_to_msecs(jiffies - intbss->ts))) 6750 jiffies_to_msecs(jiffies - intbss->ts)))
6592 goto nla_put_failure; 6751 goto nla_put_failure;
6593 6752
6753 if (intbss->ts_boottime &&
6754 nla_put_u64(msg, NL80211_BSS_LAST_SEEN_BOOTTIME,
6755 intbss->ts_boottime))
6756 goto nla_put_failure;
6757
6594 switch (rdev->wiphy.signal_type) { 6758 switch (rdev->wiphy.signal_type) {
6595 case CFG80211_SIGNAL_TYPE_MBM: 6759 case CFG80211_SIGNAL_TYPE_MBM:
6596 if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal)) 6760 if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal))
@@ -8831,7 +8995,7 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg,
8831static int nl80211_send_wowlan_nd(struct sk_buff *msg, 8995static int nl80211_send_wowlan_nd(struct sk_buff *msg,
8832 struct cfg80211_sched_scan_request *req) 8996 struct cfg80211_sched_scan_request *req)
8833{ 8997{
8834 struct nlattr *nd, *freqs, *matches, *match; 8998 struct nlattr *nd, *freqs, *matches, *match, *scan_plans, *scan_plan;
8835 int i; 8999 int i;
8836 9000
8837 if (!req) 9001 if (!req)
@@ -8841,7 +9005,9 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg,
8841 if (!nd) 9005 if (!nd)
8842 return -ENOBUFS; 9006 return -ENOBUFS;
8843 9007
8844 if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval)) 9008 if (req->n_scan_plans == 1 &&
9009 nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL,
9010 req->scan_plans[0].interval * 1000))
8845 return -ENOBUFS; 9011 return -ENOBUFS;
8846 9012
8847 if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_DELAY, req->delay)) 9013 if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_DELAY, req->delay))
@@ -8868,6 +9034,23 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg,
8868 nla_nest_end(msg, matches); 9034 nla_nest_end(msg, matches);
8869 } 9035 }
8870 9036
9037 scan_plans = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_PLANS);
9038 if (!scan_plans)
9039 return -ENOBUFS;
9040
9041 for (i = 0; i < req->n_scan_plans; i++) {
9042 scan_plan = nla_nest_start(msg, i + 1);
9043 if (!scan_plan ||
9044 nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
9045 req->scan_plans[i].interval) ||
9046 (req->scan_plans[i].iterations &&
9047 nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_ITERATIONS,
9048 req->scan_plans[i].iterations)))
9049 return -ENOBUFS;
9050 nla_nest_end(msg, scan_plan);
9051 }
9052 nla_nest_end(msg, scan_plans);
9053
8871 nla_nest_end(msg, nd); 9054 nla_nest_end(msg, nd);
8872 9055
8873 return 0; 9056 return 0;
@@ -9938,6 +10121,9 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
9938 if (!wdev->netdev && !wdev->p2p_started) 10121 if (!wdev->netdev && !wdev->p2p_started)
9939 return -ENETDOWN; 10122 return -ENETDOWN;
9940 } 10123 }
10124
10125 if (!vcmd->doit)
10126 return -EOPNOTSUPP;
9941 } else { 10127 } else {
9942 wdev = NULL; 10128 wdev = NULL;
9943 } 10129 }
@@ -9957,6 +10143,193 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
9957 return -EOPNOTSUPP; 10143 return -EOPNOTSUPP;
9958} 10144}
9959 10145
10146static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
10147 struct netlink_callback *cb,
10148 struct cfg80211_registered_device **rdev,
10149 struct wireless_dev **wdev)
10150{
10151 u32 vid, subcmd;
10152 unsigned int i;
10153 int vcmd_idx = -1;
10154 int err;
10155 void *data = NULL;
10156 unsigned int data_len = 0;
10157
10158 rtnl_lock();
10159
10160 if (cb->args[0]) {
10161 /* subtract the 1 again here */
10162 struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
10163 struct wireless_dev *tmp;
10164
10165 if (!wiphy) {
10166 err = -ENODEV;
10167 goto out_unlock;
10168 }
10169 *rdev = wiphy_to_rdev(wiphy);
10170 *wdev = NULL;
10171
10172 if (cb->args[1]) {
10173 list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
10174 if (tmp->identifier == cb->args[1] - 1) {
10175 *wdev = tmp;
10176 break;
10177 }
10178 }
10179 }
10180
10181 /* keep rtnl locked in successful case */
10182 return 0;
10183 }
10184
10185 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
10186 nl80211_fam.attrbuf, nl80211_fam.maxattr,
10187 nl80211_policy);
10188 if (err)
10189 goto out_unlock;
10190
10191 if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] ||
10192 !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
10193 err = -EINVAL;
10194 goto out_unlock;
10195 }
10196
10197 *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
10198 nl80211_fam.attrbuf);
10199 if (IS_ERR(*wdev))
10200 *wdev = NULL;
10201
10202 *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
10203 nl80211_fam.attrbuf);
10204 if (IS_ERR(*rdev)) {
10205 err = PTR_ERR(*rdev);
10206 goto out_unlock;
10207 }
10208
10209 vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]);
10210 subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
10211
10212 for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) {
10213 const struct wiphy_vendor_command *vcmd;
10214
10215 vcmd = &(*rdev)->wiphy.vendor_commands[i];
10216
10217 if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
10218 continue;
10219
10220 if (!vcmd->dumpit) {
10221 err = -EOPNOTSUPP;
10222 goto out_unlock;
10223 }
10224
10225 vcmd_idx = i;
10226 break;
10227 }
10228
10229 if (vcmd_idx < 0) {
10230 err = -EOPNOTSUPP;
10231 goto out_unlock;
10232 }
10233
10234 if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) {
10235 data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
10236 data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
10237 }
10238
10239 /* 0 is the first index - add 1 to parse only once */
10240 cb->args[0] = (*rdev)->wiphy_idx + 1;
10241 /* add 1 to know if it was NULL */
10242 cb->args[1] = *wdev ? (*wdev)->identifier + 1 : 0;
10243 cb->args[2] = vcmd_idx;
10244 cb->args[3] = (unsigned long)data;
10245 cb->args[4] = data_len;
10246
10247 /* keep rtnl locked in successful case */
10248 return 0;
10249 out_unlock:
10250 rtnl_unlock();
10251 return err;
10252}
10253
10254static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
10255 struct netlink_callback *cb)
10256{
10257 struct cfg80211_registered_device *rdev;
10258 struct wireless_dev *wdev;
10259 unsigned int vcmd_idx;
10260 const struct wiphy_vendor_command *vcmd;
10261 void *data;
10262 int data_len;
10263 int err;
10264 struct nlattr *vendor_data;
10265
10266 err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev);
10267 if (err)
10268 return err;
10269
10270 vcmd_idx = cb->args[2];
10271 data = (void *)cb->args[3];
10272 data_len = cb->args[4];
10273 vcmd = &rdev->wiphy.vendor_commands[vcmd_idx];
10274
10275 if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
10276 WIPHY_VENDOR_CMD_NEED_NETDEV)) {
10277 if (!wdev)
10278 return -EINVAL;
10279 if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
10280 !wdev->netdev)
10281 return -EINVAL;
10282
10283 if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
10284 if (wdev->netdev &&
10285 !netif_running(wdev->netdev))
10286 return -ENETDOWN;
10287 if (!wdev->netdev && !wdev->p2p_started)
10288 return -ENETDOWN;
10289 }
10290 }
10291
10292 while (1) {
10293 void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid,
10294 cb->nlh->nlmsg_seq, NLM_F_MULTI,
10295 NL80211_CMD_VENDOR);
10296 if (!hdr)
10297 break;
10298
10299 if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
10300 (wdev && nla_put_u64(skb, NL80211_ATTR_WDEV,
10301 wdev_id(wdev)))) {
10302 genlmsg_cancel(skb, hdr);
10303 break;
10304 }
10305
10306 vendor_data = nla_nest_start(skb, NL80211_ATTR_VENDOR_DATA);
10307 if (!vendor_data) {
10308 genlmsg_cancel(skb, hdr);
10309 break;
10310 }
10311
10312 err = vcmd->dumpit(&rdev->wiphy, wdev, skb, data, data_len,
10313 (unsigned long *)&cb->args[5]);
10314 nla_nest_end(skb, vendor_data);
10315
10316 if (err == -ENOBUFS || err == -ENOENT) {
10317 genlmsg_cancel(skb, hdr);
10318 break;
10319 } else if (err) {
10320 genlmsg_cancel(skb, hdr);
10321 goto out;
10322 }
10323
10324 genlmsg_end(skb, hdr);
10325 }
10326
10327 err = skb->len;
10328 out:
10329 rtnl_unlock();
10330 return err;
10331}
10332
9960struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, 10333struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
9961 enum nl80211_commands cmd, 10334 enum nl80211_commands cmd,
9962 enum nl80211_attrs attr, 10335 enum nl80211_attrs attr,
@@ -10533,6 +10906,7 @@ static const struct genl_ops nl80211_ops[] = {
10533 .internal_flags = NL80211_FLAG_NEED_RTNL, 10906 .internal_flags = NL80211_FLAG_NEED_RTNL,
10534 /* can be retrieved by unprivileged users */ 10907 /* can be retrieved by unprivileged users */
10535 }, 10908 },
10909#ifdef CONFIG_CFG80211_CRDA_SUPPORT
10536 { 10910 {
10537 .cmd = NL80211_CMD_SET_REG, 10911 .cmd = NL80211_CMD_SET_REG,
10538 .doit = nl80211_set_reg, 10912 .doit = nl80211_set_reg,
@@ -10540,6 +10914,7 @@ static const struct genl_ops nl80211_ops[] = {
10540 .flags = GENL_ADMIN_PERM, 10914 .flags = GENL_ADMIN_PERM,
10541 .internal_flags = NL80211_FLAG_NEED_RTNL, 10915 .internal_flags = NL80211_FLAG_NEED_RTNL,
10542 }, 10916 },
10917#endif
10543 { 10918 {
10544 .cmd = NL80211_CMD_REQ_SET_REG, 10919 .cmd = NL80211_CMD_REQ_SET_REG,
10545 .doit = nl80211_req_set_reg, 10920 .doit = nl80211_req_set_reg,
@@ -10994,6 +11369,7 @@ static const struct genl_ops nl80211_ops[] = {
10994 { 11369 {
10995 .cmd = NL80211_CMD_VENDOR, 11370 .cmd = NL80211_CMD_VENDOR,
10996 .doit = nl80211_vendor_cmd, 11371 .doit = nl80211_vendor_cmd,
11372 .dumpit = nl80211_vendor_cmd_dump,
10997 .policy = nl80211_policy, 11373 .policy = nl80211_policy,
10998 .flags = GENL_ADMIN_PERM, 11374 .flags = GENL_ADMIN_PERM,
10999 .internal_flags = NL80211_FLAG_NEED_WIPHY | 11375 .internal_flags = NL80211_FLAG_NEED_WIPHY |
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2510b231451e..2e8d6f39ed56 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -135,10 +135,7 @@ static spinlock_t reg_indoor_lock;
135/* Used to track the userspace process controlling the indoor setting */ 135/* Used to track the userspace process controlling the indoor setting */
136static u32 reg_is_indoor_portid; 136static u32 reg_is_indoor_portid;
137 137
138/* Max number of consecutive attempts to communicate with CRDA */ 138static void restore_regulatory_settings(bool reset_user);
139#define REG_MAX_CRDA_TIMEOUTS 10
140
141static u32 reg_crda_timeouts;
142 139
143static const struct ieee80211_regdomain *get_cfg80211_regdom(void) 140static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
144{ 141{
@@ -226,9 +223,6 @@ static DECLARE_DELAYED_WORK(reg_check_chans, reg_check_chans_work);
226static void reg_todo(struct work_struct *work); 223static void reg_todo(struct work_struct *work);
227static DECLARE_WORK(reg_work, reg_todo); 224static DECLARE_WORK(reg_work, reg_todo);
228 225
229static void reg_timeout_work(struct work_struct *work);
230static DECLARE_DELAYED_WORK(reg_timeout, reg_timeout_work);
231
232/* We keep a static world regulatory domain in case of the absence of CRDA */ 226/* We keep a static world regulatory domain in case of the absence of CRDA */
233static const struct ieee80211_regdomain world_regdom = { 227static const struct ieee80211_regdomain world_regdom = {
234 .n_reg_rules = 8, 228 .n_reg_rules = 8,
@@ -262,7 +256,7 @@ static const struct ieee80211_regdomain world_regdom = {
262 REG_RULE(5745-10, 5825+10, 80, 6, 20, 256 REG_RULE(5745-10, 5825+10, 80, 6, 20,
263 NL80211_RRF_NO_IR), 257 NL80211_RRF_NO_IR),
264 258
265 /* IEEE 802.11ad (60gHz), channels 1..3 */ 259 /* IEEE 802.11ad (60GHz), channels 1..3 */
266 REG_RULE(56160+2160*1-1080, 56160+2160*3+1080, 2160, 0, 0, 0), 260 REG_RULE(56160+2160*1-1080, 56160+2160*3+1080, 2160, 0, 0, 0),
267 } 261 }
268}; 262};
@@ -279,6 +273,9 @@ MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
279 273
280static void reg_free_request(struct regulatory_request *request) 274static void reg_free_request(struct regulatory_request *request)
281{ 275{
276 if (request == &core_request_world)
277 return;
278
282 if (request != get_last_request()) 279 if (request != get_last_request())
283 kfree(request); 280 kfree(request);
284} 281}
@@ -453,68 +450,70 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
453} 450}
454 451
455#ifdef CONFIG_CFG80211_INTERNAL_REGDB 452#ifdef CONFIG_CFG80211_INTERNAL_REGDB
456struct reg_regdb_search_request { 453struct reg_regdb_apply_request {
457 char alpha2[2];
458 struct list_head list; 454 struct list_head list;
455 const struct ieee80211_regdomain *regdom;
459}; 456};
460 457
461static LIST_HEAD(reg_regdb_search_list); 458static LIST_HEAD(reg_regdb_apply_list);
462static DEFINE_MUTEX(reg_regdb_search_mutex); 459static DEFINE_MUTEX(reg_regdb_apply_mutex);
463 460
464static void reg_regdb_search(struct work_struct *work) 461static void reg_regdb_apply(struct work_struct *work)
465{ 462{
466 struct reg_regdb_search_request *request; 463 struct reg_regdb_apply_request *request;
467 const struct ieee80211_regdomain *curdom, *regdom = NULL;
468 int i;
469 464
470 rtnl_lock(); 465 rtnl_lock();
471 466
472 mutex_lock(&reg_regdb_search_mutex); 467 mutex_lock(&reg_regdb_apply_mutex);
473 while (!list_empty(&reg_regdb_search_list)) { 468 while (!list_empty(&reg_regdb_apply_list)) {
474 request = list_first_entry(&reg_regdb_search_list, 469 request = list_first_entry(&reg_regdb_apply_list,
475 struct reg_regdb_search_request, 470 struct reg_regdb_apply_request,
476 list); 471 list);
477 list_del(&request->list); 472 list_del(&request->list);
478 473
479 for (i = 0; i < reg_regdb_size; i++) { 474 set_regdom(request->regdom, REGD_SOURCE_INTERNAL_DB);
480 curdom = reg_regdb[i];
481
482 if (alpha2_equal(request->alpha2, curdom->alpha2)) {
483 regdom = reg_copy_regd(curdom);
484 break;
485 }
486 }
487
488 kfree(request); 475 kfree(request);
489 } 476 }
490 mutex_unlock(&reg_regdb_search_mutex); 477 mutex_unlock(&reg_regdb_apply_mutex);
491
492 if (!IS_ERR_OR_NULL(regdom))
493 set_regdom(regdom, REGD_SOURCE_INTERNAL_DB);
494 478
495 rtnl_unlock(); 479 rtnl_unlock();
496} 480}
497 481
498static DECLARE_WORK(reg_regdb_work, reg_regdb_search); 482static DECLARE_WORK(reg_regdb_work, reg_regdb_apply);
499 483
500static void reg_regdb_query(const char *alpha2) 484static int reg_query_builtin(const char *alpha2)
501{ 485{
502 struct reg_regdb_search_request *request; 486 const struct ieee80211_regdomain *regdom = NULL;
487 struct reg_regdb_apply_request *request;
488 unsigned int i;
503 489
504 if (!alpha2) 490 for (i = 0; i < reg_regdb_size; i++) {
505 return; 491 if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) {
492 regdom = reg_regdb[i];
493 break;
494 }
495 }
496
497 if (!regdom)
498 return -ENODATA;
506 499
507 request = kzalloc(sizeof(struct reg_regdb_search_request), GFP_KERNEL); 500 request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL);
508 if (!request) 501 if (!request)
509 return; 502 return -ENOMEM;
510 503
511 memcpy(request->alpha2, alpha2, 2); 504 request->regdom = reg_copy_regd(regdom);
505 if (IS_ERR_OR_NULL(request->regdom)) {
506 kfree(request);
507 return -ENOMEM;
508 }
512 509
513 mutex_lock(&reg_regdb_search_mutex); 510 mutex_lock(&reg_regdb_apply_mutex);
514 list_add_tail(&request->list, &reg_regdb_search_list); 511 list_add_tail(&request->list, &reg_regdb_apply_list);
515 mutex_unlock(&reg_regdb_search_mutex); 512 mutex_unlock(&reg_regdb_apply_mutex);
516 513
517 schedule_work(&reg_regdb_work); 514 schedule_work(&reg_regdb_work);
515
516 return 0;
518} 517}
519 518
520/* Feel free to add any other sanity checks here */ 519/* Feel free to add any other sanity checks here */
@@ -525,9 +524,45 @@ static void reg_regdb_size_check(void)
525} 524}
526#else 525#else
527static inline void reg_regdb_size_check(void) {} 526static inline void reg_regdb_size_check(void) {}
528static inline void reg_regdb_query(const char *alpha2) {} 527static inline int reg_query_builtin(const char *alpha2)
528{
529 return -ENODATA;
530}
529#endif /* CONFIG_CFG80211_INTERNAL_REGDB */ 531#endif /* CONFIG_CFG80211_INTERNAL_REGDB */
530 532
533#ifdef CONFIG_CFG80211_CRDA_SUPPORT
534/* Max number of consecutive attempts to communicate with CRDA */
535#define REG_MAX_CRDA_TIMEOUTS 10
536
537static u32 reg_crda_timeouts;
538
539static void crda_timeout_work(struct work_struct *work);
540static DECLARE_DELAYED_WORK(crda_timeout, crda_timeout_work);
541
542static void crda_timeout_work(struct work_struct *work)
543{
544 REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
545 rtnl_lock();
546 reg_crda_timeouts++;
547 restore_regulatory_settings(true);
548 rtnl_unlock();
549}
550
551static void cancel_crda_timeout(void)
552{
553 cancel_delayed_work(&crda_timeout);
554}
555
556static void cancel_crda_timeout_sync(void)
557{
558 cancel_delayed_work_sync(&crda_timeout);
559}
560
561static void reset_crda_timeouts(void)
562{
563 reg_crda_timeouts = 0;
564}
565
531/* 566/*
532 * This lets us keep regulatory code which is updated on a regulatory 567 * This lets us keep regulatory code which is updated on a regulatory
533 * basis in userspace. 568 * basis in userspace.
@@ -536,13 +571,11 @@ static int call_crda(const char *alpha2)
536{ 571{
537 char country[12]; 572 char country[12];
538 char *env[] = { country, NULL }; 573 char *env[] = { country, NULL };
574 int ret;
539 575
540 snprintf(country, sizeof(country), "COUNTRY=%c%c", 576 snprintf(country, sizeof(country), "COUNTRY=%c%c",
541 alpha2[0], alpha2[1]); 577 alpha2[0], alpha2[1]);
542 578
543 /* query internal regulatory database (if it exists) */
544 reg_regdb_query(alpha2);
545
546 if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) { 579 if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) {
547 pr_debug("Exceeded CRDA call max attempts. Not calling CRDA\n"); 580 pr_debug("Exceeded CRDA call max attempts. Not calling CRDA\n");
548 return -EINVAL; 581 return -EINVAL;
@@ -554,18 +587,34 @@ static int call_crda(const char *alpha2)
554 else 587 else
555 pr_debug("Calling CRDA to update world regulatory domain\n"); 588 pr_debug("Calling CRDA to update world regulatory domain\n");
556 589
557 return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env); 590 ret = kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env);
591 if (ret)
592 return ret;
593
594 queue_delayed_work(system_power_efficient_wq,
595 &crda_timeout, msecs_to_jiffies(3142));
596 return 0;
597}
598#else
599static inline void cancel_crda_timeout(void) {}
600static inline void cancel_crda_timeout_sync(void) {}
601static inline void reset_crda_timeouts(void) {}
602static inline int call_crda(const char *alpha2)
603{
604 return -ENODATA;
558} 605}
606#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
559 607
560static enum reg_request_treatment 608static bool reg_query_database(struct regulatory_request *request)
561reg_call_crda(struct regulatory_request *request)
562{ 609{
563 if (call_crda(request->alpha2)) 610 /* query internal regulatory database (if it exists) */
564 return REG_REQ_IGNORE; 611 if (reg_query_builtin(request->alpha2) == 0)
612 return true;
565 613
566 queue_delayed_work(system_power_efficient_wq, 614 if (call_crda(request->alpha2) == 0)
567 &reg_timeout, msecs_to_jiffies(3142)); 615 return true;
568 return REG_REQ_OK; 616
617 return false;
569} 618}
570 619
571bool reg_is_valid_request(const char *alpha2) 620bool reg_is_valid_request(const char *alpha2)
@@ -1040,8 +1089,8 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
1040 return ERR_PTR(-EINVAL); 1089 return ERR_PTR(-EINVAL);
1041} 1090}
1042 1091
1043const struct ieee80211_reg_rule *__freq_reg_info(struct wiphy *wiphy, 1092static const struct ieee80211_reg_rule *
1044 u32 center_freq, u32 min_bw) 1093__freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 min_bw)
1045{ 1094{
1046 const struct ieee80211_regdomain *regd = reg_get_regdomain(wiphy); 1095 const struct ieee80211_regdomain *regd = reg_get_regdomain(wiphy);
1047 const struct ieee80211_reg_rule *reg_rule = NULL; 1096 const struct ieee80211_reg_rule *reg_rule = NULL;
@@ -1081,11 +1130,11 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
1081} 1130}
1082EXPORT_SYMBOL(reg_initiator_name); 1131EXPORT_SYMBOL(reg_initiator_name);
1083 1132
1084#ifdef CONFIG_CFG80211_REG_DEBUG
1085static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, 1133static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
1086 struct ieee80211_channel *chan, 1134 struct ieee80211_channel *chan,
1087 const struct ieee80211_reg_rule *reg_rule) 1135 const struct ieee80211_reg_rule *reg_rule)
1088{ 1136{
1137#ifdef CONFIG_CFG80211_REG_DEBUG
1089 const struct ieee80211_power_rule *power_rule; 1138 const struct ieee80211_power_rule *power_rule;
1090 const struct ieee80211_freq_range *freq_range; 1139 const struct ieee80211_freq_range *freq_range;
1091 char max_antenna_gain[32], bw[32]; 1140 char max_antenna_gain[32], bw[32];
@@ -1096,7 +1145,7 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
1096 if (!power_rule->max_antenna_gain) 1145 if (!power_rule->max_antenna_gain)
1097 snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A"); 1146 snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A");
1098 else 1147 else
1099 snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d", 1148 snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d mBi",
1100 power_rule->max_antenna_gain); 1149 power_rule->max_antenna_gain);
1101 1150
1102 if (reg_rule->flags & NL80211_RRF_AUTO_BW) 1151 if (reg_rule->flags & NL80211_RRF_AUTO_BW)
@@ -1110,19 +1159,12 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
1110 REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n", 1159 REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n",
1111 chan->center_freq); 1160 chan->center_freq);
1112 1161
1113 REG_DBG_PRINT("%d KHz - %d KHz @ %s), (%s mBi, %d mBm)\n", 1162 REG_DBG_PRINT("(%d KHz - %d KHz @ %s), (%s, %d mBm)\n",
1114 freq_range->start_freq_khz, freq_range->end_freq_khz, 1163 freq_range->start_freq_khz, freq_range->end_freq_khz,
1115 bw, max_antenna_gain, 1164 bw, max_antenna_gain,
1116 power_rule->max_eirp); 1165 power_rule->max_eirp);
1117}
1118#else
1119static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
1120 struct ieee80211_channel *chan,
1121 const struct ieee80211_reg_rule *reg_rule)
1122{
1123 return;
1124}
1125#endif 1166#endif
1167}
1126 1168
1127/* 1169/*
1128 * Note that right now we assume the desired channel bandwidth 1170 * Note that right now we assume the desired channel bandwidth
@@ -1311,7 +1353,8 @@ static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy)
1311 return !(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS); 1353 return !(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS);
1312} 1354}
1313#else 1355#else
1314static int reg_ignore_cell_hint(struct regulatory_request *pending_request) 1356static enum reg_request_treatment
1357reg_ignore_cell_hint(struct regulatory_request *pending_request)
1315{ 1358{
1316 return REG_REQ_IGNORE; 1359 return REG_REQ_IGNORE;
1317} 1360}
@@ -1846,7 +1889,7 @@ static void reg_set_request_processed(void)
1846 need_more_processing = true; 1889 need_more_processing = true;
1847 spin_unlock(&reg_requests_lock); 1890 spin_unlock(&reg_requests_lock);
1848 1891
1849 cancel_delayed_work(&reg_timeout); 1892 cancel_crda_timeout();
1850 1893
1851 if (need_more_processing) 1894 if (need_more_processing)
1852 schedule_work(&reg_work); 1895 schedule_work(&reg_work);
@@ -1858,19 +1901,18 @@ static void reg_set_request_processed(void)
1858 * 1901 *
1859 * The wireless subsystem can use this function to process 1902 * The wireless subsystem can use this function to process
1860 * a regulatory request issued by the regulatory core. 1903 * a regulatory request issued by the regulatory core.
1861 *
1862 * Returns one of the different reg request treatment values.
1863 */ 1904 */
1864static enum reg_request_treatment 1905static enum reg_request_treatment
1865reg_process_hint_core(struct regulatory_request *core_request) 1906reg_process_hint_core(struct regulatory_request *core_request)
1866{ 1907{
1908 if (reg_query_database(core_request)) {
1909 core_request->intersect = false;
1910 core_request->processed = false;
1911 reg_update_last_request(core_request);
1912 return REG_REQ_OK;
1913 }
1867 1914
1868 core_request->intersect = false; 1915 return REG_REQ_IGNORE;
1869 core_request->processed = false;
1870
1871 reg_update_last_request(core_request);
1872
1873 return reg_call_crda(core_request);
1874} 1916}
1875 1917
1876static enum reg_request_treatment 1918static enum reg_request_treatment
@@ -1915,8 +1957,6 @@ __reg_process_hint_user(struct regulatory_request *user_request)
1915 * 1957 *
1916 * The wireless subsystem can use this function to process 1958 * The wireless subsystem can use this function to process
1917 * a regulatory request initiated by userspace. 1959 * a regulatory request initiated by userspace.
1918 *
1919 * Returns one of the different reg request treatment values.
1920 */ 1960 */
1921static enum reg_request_treatment 1961static enum reg_request_treatment
1922reg_process_hint_user(struct regulatory_request *user_request) 1962reg_process_hint_user(struct regulatory_request *user_request)
@@ -1925,20 +1965,20 @@ reg_process_hint_user(struct regulatory_request *user_request)
1925 1965
1926 treatment = __reg_process_hint_user(user_request); 1966 treatment = __reg_process_hint_user(user_request);
1927 if (treatment == REG_REQ_IGNORE || 1967 if (treatment == REG_REQ_IGNORE ||
1928 treatment == REG_REQ_ALREADY_SET) { 1968 treatment == REG_REQ_ALREADY_SET)
1929 reg_free_request(user_request); 1969 return REG_REQ_IGNORE;
1930 return treatment;
1931 }
1932 1970
1933 user_request->intersect = treatment == REG_REQ_INTERSECT; 1971 user_request->intersect = treatment == REG_REQ_INTERSECT;
1934 user_request->processed = false; 1972 user_request->processed = false;
1935 1973
1936 reg_update_last_request(user_request); 1974 if (reg_query_database(user_request)) {
1937 1975 reg_update_last_request(user_request);
1938 user_alpha2[0] = user_request->alpha2[0]; 1976 user_alpha2[0] = user_request->alpha2[0];
1939 user_alpha2[1] = user_request->alpha2[1]; 1977 user_alpha2[1] = user_request->alpha2[1];
1978 return REG_REQ_OK;
1979 }
1940 1980
1941 return reg_call_crda(user_request); 1981 return REG_REQ_IGNORE;
1942} 1982}
1943 1983
1944static enum reg_request_treatment 1984static enum reg_request_treatment
@@ -1986,16 +2026,12 @@ reg_process_hint_driver(struct wiphy *wiphy,
1986 case REG_REQ_OK: 2026 case REG_REQ_OK:
1987 break; 2027 break;
1988 case REG_REQ_IGNORE: 2028 case REG_REQ_IGNORE:
1989 reg_free_request(driver_request); 2029 return REG_REQ_IGNORE;
1990 return treatment;
1991 case REG_REQ_INTERSECT: 2030 case REG_REQ_INTERSECT:
1992 /* fall through */
1993 case REG_REQ_ALREADY_SET: 2031 case REG_REQ_ALREADY_SET:
1994 regd = reg_copy_regd(get_cfg80211_regdom()); 2032 regd = reg_copy_regd(get_cfg80211_regdom());
1995 if (IS_ERR(regd)) { 2033 if (IS_ERR(regd))
1996 reg_free_request(driver_request);
1997 return REG_REQ_IGNORE; 2034 return REG_REQ_IGNORE;
1998 }
1999 2035
2000 tmp = get_wiphy_regdom(wiphy); 2036 tmp = get_wiphy_regdom(wiphy);
2001 rcu_assign_pointer(wiphy->regd, regd); 2037 rcu_assign_pointer(wiphy->regd, regd);
@@ -2006,8 +2042,6 @@ reg_process_hint_driver(struct wiphy *wiphy,
2006 driver_request->intersect = treatment == REG_REQ_INTERSECT; 2042 driver_request->intersect = treatment == REG_REQ_INTERSECT;
2007 driver_request->processed = false; 2043 driver_request->processed = false;
2008 2044
2009 reg_update_last_request(driver_request);
2010
2011 /* 2045 /*
2012 * Since CRDA will not be called in this case as we already 2046 * Since CRDA will not be called in this case as we already
2013 * have applied the requested regulatory domain before we just 2047 * have applied the requested regulatory domain before we just
@@ -2015,11 +2049,17 @@ reg_process_hint_driver(struct wiphy *wiphy,
2015 */ 2049 */
2016 if (treatment == REG_REQ_ALREADY_SET) { 2050 if (treatment == REG_REQ_ALREADY_SET) {
2017 nl80211_send_reg_change_event(driver_request); 2051 nl80211_send_reg_change_event(driver_request);
2052 reg_update_last_request(driver_request);
2018 reg_set_request_processed(); 2053 reg_set_request_processed();
2019 return treatment; 2054 return REG_REQ_ALREADY_SET;
2020 } 2055 }
2021 2056
2022 return reg_call_crda(driver_request); 2057 if (reg_query_database(driver_request)) {
2058 reg_update_last_request(driver_request);
2059 return REG_REQ_OK;
2060 }
2061
2062 return REG_REQ_IGNORE;
2023} 2063}
2024 2064
2025static enum reg_request_treatment 2065static enum reg_request_treatment
@@ -2085,12 +2125,11 @@ reg_process_hint_country_ie(struct wiphy *wiphy,
2085 case REG_REQ_OK: 2125 case REG_REQ_OK:
2086 break; 2126 break;
2087 case REG_REQ_IGNORE: 2127 case REG_REQ_IGNORE:
2088 /* fall through */ 2128 return REG_REQ_IGNORE;
2089 case REG_REQ_ALREADY_SET: 2129 case REG_REQ_ALREADY_SET:
2090 reg_free_request(country_ie_request); 2130 reg_free_request(country_ie_request);
2091 return treatment; 2131 return REG_REQ_ALREADY_SET;
2092 case REG_REQ_INTERSECT: 2132 case REG_REQ_INTERSECT:
2093 reg_free_request(country_ie_request);
2094 /* 2133 /*
2095 * This doesn't happen yet, not sure we 2134 * This doesn't happen yet, not sure we
2096 * ever want to support it for this case. 2135 * ever want to support it for this case.
@@ -2102,9 +2141,12 @@ reg_process_hint_country_ie(struct wiphy *wiphy,
2102 country_ie_request->intersect = false; 2141 country_ie_request->intersect = false;
2103 country_ie_request->processed = false; 2142 country_ie_request->processed = false;
2104 2143
2105 reg_update_last_request(country_ie_request); 2144 if (reg_query_database(country_ie_request)) {
2145 reg_update_last_request(country_ie_request);
2146 return REG_REQ_OK;
2147 }
2106 2148
2107 return reg_call_crda(country_ie_request); 2149 return REG_REQ_IGNORE;
2108} 2150}
2109 2151
2110/* This processes *all* regulatory hints */ 2152/* This processes *all* regulatory hints */
@@ -2118,11 +2160,11 @@ static void reg_process_hint(struct regulatory_request *reg_request)
2118 2160
2119 switch (reg_request->initiator) { 2161 switch (reg_request->initiator) {
2120 case NL80211_REGDOM_SET_BY_CORE: 2162 case NL80211_REGDOM_SET_BY_CORE:
2121 reg_process_hint_core(reg_request); 2163 treatment = reg_process_hint_core(reg_request);
2122 return; 2164 break;
2123 case NL80211_REGDOM_SET_BY_USER: 2165 case NL80211_REGDOM_SET_BY_USER:
2124 reg_process_hint_user(reg_request); 2166 treatment = reg_process_hint_user(reg_request);
2125 return; 2167 break;
2126 case NL80211_REGDOM_SET_BY_DRIVER: 2168 case NL80211_REGDOM_SET_BY_DRIVER:
2127 if (!wiphy) 2169 if (!wiphy)
2128 goto out_free; 2170 goto out_free;
@@ -2138,6 +2180,12 @@ static void reg_process_hint(struct regulatory_request *reg_request)
2138 goto out_free; 2180 goto out_free;
2139 } 2181 }
2140 2182
2183 if (treatment == REG_REQ_IGNORE)
2184 goto out_free;
2185
2186 WARN(treatment != REG_REQ_OK && treatment != REG_REQ_ALREADY_SET,
2187 "unexpected treatment value %d\n", treatment);
2188
2141 /* This is required so that the orig_* parameters are saved. 2189 /* This is required so that the orig_* parameters are saved.
2142 * NOTE: treatment must be set for any case that reaches here! 2190 * NOTE: treatment must be set for any case that reaches here!
2143 */ 2191 */
@@ -2345,7 +2393,7 @@ int regulatory_hint_user(const char *alpha2,
2345 request->user_reg_hint_type = user_reg_hint_type; 2393 request->user_reg_hint_type = user_reg_hint_type;
2346 2394
2347 /* Allow calling CRDA again */ 2395 /* Allow calling CRDA again */
2348 reg_crda_timeouts = 0; 2396 reset_crda_timeouts();
2349 2397
2350 queue_regulatory_request(request); 2398 queue_regulatory_request(request);
2351 2399
@@ -2417,7 +2465,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
2417 request->initiator = NL80211_REGDOM_SET_BY_DRIVER; 2465 request->initiator = NL80211_REGDOM_SET_BY_DRIVER;
2418 2466
2419 /* Allow calling CRDA again */ 2467 /* Allow calling CRDA again */
2420 reg_crda_timeouts = 0; 2468 reset_crda_timeouts();
2421 2469
2422 queue_regulatory_request(request); 2470 queue_regulatory_request(request);
2423 2471
@@ -2473,7 +2521,7 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band,
2473 request->country_ie_env = env; 2521 request->country_ie_env = env;
2474 2522
2475 /* Allow calling CRDA again */ 2523 /* Allow calling CRDA again */
2476 reg_crda_timeouts = 0; 2524 reset_crda_timeouts();
2477 2525
2478 queue_regulatory_request(request); 2526 queue_regulatory_request(request);
2479 request = NULL; 2527 request = NULL;
@@ -2874,11 +2922,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
2874 } 2922 }
2875 2923
2876 request_wiphy = wiphy_idx_to_wiphy(driver_request->wiphy_idx); 2924 request_wiphy = wiphy_idx_to_wiphy(driver_request->wiphy_idx);
2877 if (!request_wiphy) { 2925 if (!request_wiphy)
2878 queue_delayed_work(system_power_efficient_wq,
2879 &reg_timeout, 0);
2880 return -ENODEV; 2926 return -ENODEV;
2881 }
2882 2927
2883 if (!driver_request->intersect) { 2928 if (!driver_request->intersect) {
2884 if (request_wiphy->regd) 2929 if (request_wiphy->regd)
@@ -2935,11 +2980,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
2935 } 2980 }
2936 2981
2937 request_wiphy = wiphy_idx_to_wiphy(country_ie_request->wiphy_idx); 2982 request_wiphy = wiphy_idx_to_wiphy(country_ie_request->wiphy_idx);
2938 if (!request_wiphy) { 2983 if (!request_wiphy)
2939 queue_delayed_work(system_power_efficient_wq,
2940 &reg_timeout, 0);
2941 return -ENODEV; 2984 return -ENODEV;
2942 }
2943 2985
2944 if (country_ie_request->intersect) 2986 if (country_ie_request->intersect)
2945 return -EINVAL; 2987 return -EINVAL;
@@ -2966,7 +3008,7 @@ int set_regdom(const struct ieee80211_regdomain *rd,
2966 } 3008 }
2967 3009
2968 if (regd_src == REGD_SOURCE_CRDA) 3010 if (regd_src == REGD_SOURCE_CRDA)
2969 reg_crda_timeouts = 0; 3011 reset_crda_timeouts();
2970 3012
2971 lr = get_last_request(); 3013 lr = get_last_request();
2972 3014
@@ -3123,15 +3165,6 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy)
3123 lr->country_ie_env = ENVIRON_ANY; 3165 lr->country_ie_env = ENVIRON_ANY;
3124} 3166}
3125 3167
3126static void reg_timeout_work(struct work_struct *work)
3127{
3128 REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
3129 rtnl_lock();
3130 reg_crda_timeouts++;
3131 restore_regulatory_settings(true);
3132 rtnl_unlock();
3133}
3134
3135/* 3168/*
3136 * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for 3169 * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for
3137 * UNII band definitions 3170 * UNII band definitions
@@ -3217,7 +3250,7 @@ void regulatory_exit(void)
3217 struct reg_beacon *reg_beacon, *btmp; 3250 struct reg_beacon *reg_beacon, *btmp;
3218 3251
3219 cancel_work_sync(&reg_work); 3252 cancel_work_sync(&reg_work);
3220 cancel_delayed_work_sync(&reg_timeout); 3253 cancel_crda_timeout_sync();
3221 cancel_delayed_work_sync(&reg_check_chans); 3254 cancel_delayed_work_sync(&reg_check_chans);
3222 3255
3223 /* Lock to suppress warnings */ 3256 /* Lock to suppress warnings */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 3a50aa2553bf..14d5369eb778 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -266,8 +266,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk)
266 spin_lock_bh(&rdev->bss_lock); 266 spin_lock_bh(&rdev->bss_lock);
267 __cfg80211_bss_expire(rdev, request->scan_start); 267 __cfg80211_bss_expire(rdev, request->scan_start);
268 spin_unlock_bh(&rdev->bss_lock); 268 spin_unlock_bh(&rdev->bss_lock);
269 request->scan_start = 269 request->scan_start = jiffies;
270 jiffies + msecs_to_jiffies(request->interval);
271 } 270 }
272 nl80211_send_sched_scan_results(rdev, request->dev); 271 nl80211_send_sched_scan_results(rdev, request->dev);
273 } 272 }
@@ -839,6 +838,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
839 found->pub.signal = tmp->pub.signal; 838 found->pub.signal = tmp->pub.signal;
840 found->pub.capability = tmp->pub.capability; 839 found->pub.capability = tmp->pub.capability;
841 found->ts = tmp->ts; 840 found->ts = tmp->ts;
841 found->ts_boottime = tmp->ts_boottime;
842 } else { 842 } else {
843 struct cfg80211_internal_bss *new; 843 struct cfg80211_internal_bss *new;
844 struct cfg80211_internal_bss *hidden; 844 struct cfg80211_internal_bss *hidden;
@@ -938,14 +938,13 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
938} 938}
939 939
940/* Returned bss is reference counted and must be cleaned up appropriately. */ 940/* Returned bss is reference counted and must be cleaned up appropriately. */
941struct cfg80211_bss* 941struct cfg80211_bss *
942cfg80211_inform_bss_width(struct wiphy *wiphy, 942cfg80211_inform_bss_data(struct wiphy *wiphy,
943 struct ieee80211_channel *rx_channel, 943 struct cfg80211_inform_bss *data,
944 enum nl80211_bss_scan_width scan_width, 944 enum cfg80211_bss_frame_type ftype,
945 enum cfg80211_bss_frame_type ftype, 945 const u8 *bssid, u64 tsf, u16 capability,
946 const u8 *bssid, u64 tsf, u16 capability, 946 u16 beacon_interval, const u8 *ie, size_t ielen,
947 u16 beacon_interval, const u8 *ie, size_t ielen, 947 gfp_t gfp)
948 s32 signal, gfp_t gfp)
949{ 948{
950 struct cfg80211_bss_ies *ies; 949 struct cfg80211_bss_ies *ies;
951 struct ieee80211_channel *channel; 950 struct ieee80211_channel *channel;
@@ -957,19 +956,21 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
957 return NULL; 956 return NULL;
958 957
959 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && 958 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
960 (signal < 0 || signal > 100))) 959 (data->signal < 0 || data->signal > 100)))
961 return NULL; 960 return NULL;
962 961
963 channel = cfg80211_get_bss_channel(wiphy, ie, ielen, rx_channel); 962 channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan);
964 if (!channel) 963 if (!channel)
965 return NULL; 964 return NULL;
966 965
967 memcpy(tmp.pub.bssid, bssid, ETH_ALEN); 966 memcpy(tmp.pub.bssid, bssid, ETH_ALEN);
968 tmp.pub.channel = channel; 967 tmp.pub.channel = channel;
969 tmp.pub.scan_width = scan_width; 968 tmp.pub.scan_width = data->scan_width;
970 tmp.pub.signal = signal; 969 tmp.pub.signal = data->signal;
971 tmp.pub.beacon_interval = beacon_interval; 970 tmp.pub.beacon_interval = beacon_interval;
972 tmp.pub.capability = capability; 971 tmp.pub.capability = capability;
972 tmp.ts_boottime = data->boottime_ns;
973
973 /* 974 /*
974 * If we do not know here whether the IEs are from a Beacon or Probe 975 * If we do not know here whether the IEs are from a Beacon or Probe
975 * Response frame, we need to pick one of the options and only use it 976 * Response frame, we need to pick one of the options and only use it
@@ -999,7 +1000,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
999 } 1000 }
1000 rcu_assign_pointer(tmp.pub.ies, ies); 1001 rcu_assign_pointer(tmp.pub.ies, ies);
1001 1002
1002 signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= 1003 signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
1003 wiphy->max_adj_channel_rssi_comp; 1004 wiphy->max_adj_channel_rssi_comp;
1004 res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); 1005 res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
1005 if (!res) 1006 if (!res)
@@ -1019,15 +1020,15 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
1019 /* cfg80211_bss_update gives us a referenced result */ 1020 /* cfg80211_bss_update gives us a referenced result */
1020 return &res->pub; 1021 return &res->pub;
1021} 1022}
1022EXPORT_SYMBOL(cfg80211_inform_bss_width); 1023EXPORT_SYMBOL(cfg80211_inform_bss_data);
1023 1024
1024/* Returned bss is reference counted and must be cleaned up appropriately. */ 1025/* cfg80211_inform_bss_width_frame helper */
1025struct cfg80211_bss * 1026struct cfg80211_bss *
1026cfg80211_inform_bss_width_frame(struct wiphy *wiphy, 1027cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
1027 struct ieee80211_channel *rx_channel, 1028 struct cfg80211_inform_bss *data,
1028 enum nl80211_bss_scan_width scan_width, 1029 struct ieee80211_mgmt *mgmt, size_t len,
1029 struct ieee80211_mgmt *mgmt, size_t len, 1030 gfp_t gfp)
1030 s32 signal, gfp_t gfp) 1031
1031{ 1032{
1032 struct cfg80211_internal_bss tmp = {}, *res; 1033 struct cfg80211_internal_bss tmp = {}, *res;
1033 struct cfg80211_bss_ies *ies; 1034 struct cfg80211_bss_ies *ies;
@@ -1040,8 +1041,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
1040 BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != 1041 BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
1041 offsetof(struct ieee80211_mgmt, u.beacon.variable)); 1042 offsetof(struct ieee80211_mgmt, u.beacon.variable));
1042 1043
1043 trace_cfg80211_inform_bss_width_frame(wiphy, rx_channel, scan_width, mgmt, 1044 trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len);
1044 len, signal);
1045 1045
1046 if (WARN_ON(!mgmt)) 1046 if (WARN_ON(!mgmt))
1047 return NULL; 1047 return NULL;
@@ -1050,14 +1050,14 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
1050 return NULL; 1050 return NULL;
1051 1051
1052 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && 1052 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
1053 (signal < 0 || signal > 100))) 1053 (data->signal < 0 || data->signal > 100)))
1054 return NULL; 1054 return NULL;
1055 1055
1056 if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable))) 1056 if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
1057 return NULL; 1057 return NULL;
1058 1058
1059 channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable, 1059 channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable,
1060 ielen, rx_channel); 1060 ielen, data->chan);
1061 if (!channel) 1061 if (!channel)
1062 return NULL; 1062 return NULL;
1063 1063
@@ -1077,12 +1077,13 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
1077 1077
1078 memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN); 1078 memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN);
1079 tmp.pub.channel = channel; 1079 tmp.pub.channel = channel;
1080 tmp.pub.scan_width = scan_width; 1080 tmp.pub.scan_width = data->scan_width;
1081 tmp.pub.signal = signal; 1081 tmp.pub.signal = data->signal;
1082 tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); 1082 tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
1083 tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); 1083 tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
1084 tmp.ts_boottime = data->boottime_ns;
1084 1085
1085 signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= 1086 signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
1086 wiphy->max_adj_channel_rssi_comp; 1087 wiphy->max_adj_channel_rssi_comp;
1087 res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); 1088 res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
1088 if (!res) 1089 if (!res)
@@ -1102,7 +1103,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
1102 /* cfg80211_bss_update gives us a referenced result */ 1103 /* cfg80211_bss_update gives us a referenced result */
1103 return &res->pub; 1104 return &res->pub;
1104} 1105}
1105EXPORT_SYMBOL(cfg80211_inform_bss_width_frame); 1106EXPORT_SYMBOL(cfg80211_inform_bss_frame_data);
1106 1107
1107void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) 1108void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
1108{ 1109{
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index a808279a432a..0c392d36781b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2670,30 +2670,30 @@ TRACE_EVENT(cfg80211_get_bss,
2670 __entry->privacy) 2670 __entry->privacy)
2671); 2671);
2672 2672
2673TRACE_EVENT(cfg80211_inform_bss_width_frame, 2673TRACE_EVENT(cfg80211_inform_bss_frame,
2674 TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel, 2674 TP_PROTO(struct wiphy *wiphy, struct cfg80211_inform_bss *data,
2675 enum nl80211_bss_scan_width scan_width, 2675 struct ieee80211_mgmt *mgmt, size_t len),
2676 struct ieee80211_mgmt *mgmt, size_t len, 2676 TP_ARGS(wiphy, data, mgmt, len),
2677 s32 signal),
2678 TP_ARGS(wiphy, channel, scan_width, mgmt, len, signal),
2679 TP_STRUCT__entry( 2677 TP_STRUCT__entry(
2680 WIPHY_ENTRY 2678 WIPHY_ENTRY
2681 CHAN_ENTRY 2679 CHAN_ENTRY
2682 __field(enum nl80211_bss_scan_width, scan_width) 2680 __field(enum nl80211_bss_scan_width, scan_width)
2683 __dynamic_array(u8, mgmt, len) 2681 __dynamic_array(u8, mgmt, len)
2684 __field(s32, signal) 2682 __field(s32, signal)
2683 __field(u64, ts_boottime)
2685 ), 2684 ),
2686 TP_fast_assign( 2685 TP_fast_assign(
2687 WIPHY_ASSIGN; 2686 WIPHY_ASSIGN;
2688 CHAN_ASSIGN(channel); 2687 CHAN_ASSIGN(data->chan);
2689 __entry->scan_width = scan_width; 2688 __entry->scan_width = data->scan_width;
2690 if (mgmt) 2689 if (mgmt)
2691 memcpy(__get_dynamic_array(mgmt), mgmt, len); 2690 memcpy(__get_dynamic_array(mgmt), mgmt, len);
2692 __entry->signal = signal; 2691 __entry->signal = data->signal;
2692 __entry->ts_boottime = data->boottime_ns;
2693 ), 2693 ),
2694 TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d", 2694 TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d, tsb:%llu",
2695 WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width, 2695 WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
2696 __entry->signal) 2696 __entry->signal, (unsigned long long)__entry->ts_boottime)
2697); 2697);
2698 2698
2699DECLARE_EVENT_CLASS(cfg80211_bss_evt, 2699DECLARE_EVENT_CLASS(cfg80211_bss_evt,
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 60ce7014e1b0..ad7f5b3f9b61 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -330,8 +330,10 @@ resume:
330 330
331 if (x->sel.family == AF_UNSPEC) { 331 if (x->sel.family == AF_UNSPEC) {
332 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 332 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
333 if (inner_mode == NULL) 333 if (inner_mode == NULL) {
334 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
334 goto drop; 335 goto drop;
336 }
335 } 337 }
336 338
337 if (inner_mode->input(x, skb)) { 339 if (inner_mode->input(x, skb)) {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 68ada2ca4b60..cc3676eb6239 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -19,7 +19,7 @@
19#include <net/dst.h> 19#include <net/dst.h>
20#include <net/xfrm.h> 20#include <net/xfrm.h>
21 21
22static int xfrm_output2(struct sock *sk, struct sk_buff *skb); 22static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
23 23
24static int xfrm_skb_check_space(struct sk_buff *skb) 24static int xfrm_skb_check_space(struct sk_buff *skb)
25{ 25{
@@ -131,18 +131,20 @@ out:
131 131
132int xfrm_output_resume(struct sk_buff *skb, int err) 132int xfrm_output_resume(struct sk_buff *skb, int err)
133{ 133{
134 struct net *net = xs_net(skb_dst(skb)->xfrm);
135
134 while (likely((err = xfrm_output_one(skb, err)) == 0)) { 136 while (likely((err = xfrm_output_one(skb, err)) == 0)) {
135 nf_reset(skb); 137 nf_reset(skb);
136 138
137 err = skb_dst(skb)->ops->local_out(skb); 139 err = skb_dst(skb)->ops->local_out(net, skb->sk, skb);
138 if (unlikely(err != 1)) 140 if (unlikely(err != 1))
139 goto out; 141 goto out;
140 142
141 if (!skb_dst(skb)->xfrm) 143 if (!skb_dst(skb)->xfrm)
142 return dst_output(skb); 144 return dst_output(net, skb->sk, skb);
143 145
144 err = nf_hook(skb_dst(skb)->ops->family, 146 err = nf_hook(skb_dst(skb)->ops->family,
145 NF_INET_POST_ROUTING, skb->sk, skb, 147 NF_INET_POST_ROUTING, net, skb->sk, skb,
146 NULL, skb_dst(skb)->dev, xfrm_output2); 148 NULL, skb_dst(skb)->dev, xfrm_output2);
147 if (unlikely(err != 1)) 149 if (unlikely(err != 1))
148 goto out; 150 goto out;
@@ -156,12 +158,12 @@ out:
156} 158}
157EXPORT_SYMBOL_GPL(xfrm_output_resume); 159EXPORT_SYMBOL_GPL(xfrm_output_resume);
158 160
159static int xfrm_output2(struct sock *sk, struct sk_buff *skb) 161static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
160{ 162{
161 return xfrm_output_resume(skb, 1); 163 return xfrm_output_resume(skb, 1);
162} 164}
163 165
164static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb) 166static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb)
165{ 167{
166 struct sk_buff *segs; 168 struct sk_buff *segs;
167 169
@@ -177,7 +179,7 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
177 int err; 179 int err;
178 180
179 segs->next = NULL; 181 segs->next = NULL;
180 err = xfrm_output2(sk, segs); 182 err = xfrm_output2(net, sk, segs);
181 183
182 if (unlikely(err)) { 184 if (unlikely(err)) {
183 kfree_skb_list(nskb); 185 kfree_skb_list(nskb);
@@ -196,7 +198,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
196 int err; 198 int err;
197 199
198 if (skb_is_gso(skb)) 200 if (skb_is_gso(skb))
199 return xfrm_output_gso(sk, skb); 201 return xfrm_output_gso(net, sk, skb);
200 202
201 if (skb->ip_summed == CHECKSUM_PARTIAL) { 203 if (skb->ip_summed == CHECKSUM_PARTIAL) {
202 err = skb_checksum_help(skb); 204 err = skb_checksum_help(skb);
@@ -207,7 +209,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
207 } 209 }
208 } 210 }
209 211
210 return xfrm_output2(sk, skb); 212 return xfrm_output2(net, sk, skb);
211} 213}
212EXPORT_SYMBOL_GPL(xfrm_output); 214EXPORT_SYMBOL_GPL(xfrm_output);
213 215
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 94af3d065785..09bfcbac63bb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1208,7 +1208,7 @@ static inline int policy_to_flow_dir(int dir)
1208 } 1208 }
1209} 1209}
1210 1210
1211static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, 1211static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1212 const struct flowi *fl) 1212 const struct flowi *fl)
1213{ 1213{
1214 struct xfrm_policy *pol; 1214 struct xfrm_policy *pol;
@@ -1583,8 +1583,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1583 1583
1584 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); 1584 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1585 xdst->flo.ops = &xfrm_bundle_fc_ops; 1585 xdst->flo.ops = &xfrm_bundle_fc_ops;
1586 if (afinfo->init_dst)
1587 afinfo->init_dst(net, xdst);
1588 } else 1586 } else
1589 xdst = ERR_PTR(-ENOBUFS); 1587 xdst = ERR_PTR(-ENOBUFS);
1590 1588
@@ -1889,6 +1887,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
1889 struct sock *sk; 1887 struct sock *sk;
1890 struct dst_entry *dst; 1888 struct dst_entry *dst;
1891 struct xfrm_policy *pol = (struct xfrm_policy *)arg; 1889 struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1890 struct net *net = xp_net(pol);
1892 struct xfrm_policy_queue *pq = &pol->polq; 1891 struct xfrm_policy_queue *pq = &pol->polq;
1893 struct flowi fl; 1892 struct flowi fl;
1894 struct sk_buff_head list; 1893 struct sk_buff_head list;
@@ -1905,8 +1904,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
1905 spin_unlock(&pq->hold_queue.lock); 1904 spin_unlock(&pq->hold_queue.lock);
1906 1905
1907 dst_hold(dst->path); 1906 dst_hold(dst->path);
1908 dst = xfrm_lookup(xp_net(pol), dst->path, &fl, 1907 dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
1909 sk, 0);
1910 if (IS_ERR(dst)) 1908 if (IS_ERR(dst))
1911 goto purge_queue; 1909 goto purge_queue;
1912 1910
@@ -1936,8 +1934,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
1936 1934
1937 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); 1935 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1938 dst_hold(skb_dst(skb)->path); 1936 dst_hold(skb_dst(skb)->path);
1939 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, 1937 dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
1940 &fl, skb->sk, 0);
1941 if (IS_ERR(dst)) { 1938 if (IS_ERR(dst)) {
1942 kfree_skb(skb); 1939 kfree_skb(skb);
1943 continue; 1940 continue;
@@ -1947,7 +1944,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
1947 skb_dst_drop(skb); 1944 skb_dst_drop(skb);
1948 skb_dst_set(skb, dst); 1945 skb_dst_set(skb, dst);
1949 1946
1950 dst_output(skb); 1947 dst_output(net, skb->sk, skb);
1951 } 1948 }
1952 1949
1953out: 1950out:
@@ -1960,7 +1957,7 @@ purge_queue:
1960 xfrm_pol_put(pol); 1957 xfrm_pol_put(pol);
1961} 1958}
1962 1959
1963static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) 1960static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1964{ 1961{
1965 unsigned long sched_next; 1962 unsigned long sched_next;
1966 struct dst_entry *dst = skb_dst(skb); 1963 struct dst_entry *dst = skb_dst(skb);
@@ -2187,7 +2184,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
2187 */ 2184 */
2188struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, 2185struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2189 const struct flowi *fl, 2186 const struct flowi *fl,
2190 struct sock *sk, int flags) 2187 const struct sock *sk, int flags)
2191{ 2188{
2192 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 2189 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2193 struct flow_cache_object *flo; 2190 struct flow_cache_object *flo;
@@ -2335,7 +2332,7 @@ EXPORT_SYMBOL(xfrm_lookup);
2335 */ 2332 */
2336struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, 2333struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
2337 const struct flowi *fl, 2334 const struct flowi *fl,
2338 struct sock *sk, int flags) 2335 const struct sock *sk, int flags)
2339{ 2336{
2340 struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, 2337 struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2341 flags | XFRM_LOOKUP_QUEUE | 2338 flags | XFRM_LOOKUP_QUEUE |
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a8de9e300200..805681a7d356 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,6 +31,7 @@
31#if IS_ENABLED(CONFIG_IPV6) 31#if IS_ENABLED(CONFIG_IPV6)
32#include <linux/in6.h> 32#include <linux/in6.h>
33#endif 33#endif
34#include <asm/unaligned.h>
34 35
35static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) 36static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
36{ 37{
@@ -728,7 +729,9 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
728 memcpy(&p->sel, &x->sel, sizeof(p->sel)); 729 memcpy(&p->sel, &x->sel, sizeof(p->sel));
729 memcpy(&p->lft, &x->lft, sizeof(p->lft)); 730 memcpy(&p->lft, &x->lft, sizeof(p->lft));
730 memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); 731 memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
731 memcpy(&p->stats, &x->stats, sizeof(p->stats)); 732 put_unaligned(x->stats.replay_window, &p->stats.replay_window);
733 put_unaligned(x->stats.replay, &p->stats.replay);
734 put_unaligned(x->stats.integrity_failed, &p->stats.integrity_failed);
732 memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr)); 735 memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr));
733 p->mode = x->props.mode; 736 p->mode = x->props.mode;
734 p->replay_window = x->props.replay_window; 737 p->replay_window = x->props.replay_window;
@@ -1928,8 +1931,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
1928 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; 1931 struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
1929 struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; 1932 struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
1930 struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; 1933 struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
1934 struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
1935 struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
1931 1936
1932 if (!lt && !rp && !re) 1937 if (!lt && !rp && !re && !et && !rt)
1933 return err; 1938 return err;
1934 1939
1935 /* pedantic mode - thou shalt sayeth replaceth */ 1940 /* pedantic mode - thou shalt sayeth replaceth */